diff --git a/dev-support/ranger-docker/.env b/dev-support/ranger-docker/.env index e6de538a23..dd985f3f76 100644 --- a/dev-support/ranger-docker/.env +++ b/dev-support/ranger-docker/.env @@ -11,46 +11,66 @@ RANGER_BASE_IMAGE=apache/ranger-base RANGER_BASE_VERSION=20250707-1-8 # Java version used to build Apache Ranger is present as suffix: -8, valid values for suffix: -8, -11, -17 RANGER_BASE_BUILD_VERSION=20250707-1-8 +RANGER_VERSION=3.0.0-SNAPSHOT -# third party image versions -MARIADB_VERSION=10.7.3 -POSTGRES_VERSION=12 -ORACLE_VERSION=23.6 -SQLSERVER_VERSION=2019-latest -ENABLE_DB_MOUNT=true -ZK_VERSION=3.9.2 -SOLR_VERSION=8.11.3 - -# service versions +# Hadoop Configuration HADOOP_VERSION=3.3.6 -HBASE_VERSION=2.6.0 +HDFS_PLUGIN_VERSION=3.0.0-SNAPSHOT +YARN_PLUGIN_VERSION=3.0.0-SNAPSHOT + +# Hive Configuration +HIVE_HADOOP_VERSION=3.3.6 HIVE_VERSION=4.0.1 -HIVE_HADOOP_VERSION=3.1.1 +HIVE_PLUGIN_VERSION=3.0.0-SNAPSHOT + +# Tez Configuration +TEZ_VERSION=0.10.4 + +# HBase Configuration +HBASE_VERSION=2.6.0 +HBASE_PLUGIN_VERSION=3.0.0-SNAPSHOT + +# Kafka Configuration KAFKA_VERSION=2.8.2 +KAFKA_PLUGIN_VERSION=3.0.0-SNAPSHOT + +# Knox Configuration KNOX_VERSION=2.0.0 -TRINO_VERSION=377 -OZONE_VERSION=1.4.0 -OZONE_RUNNER_VERSION=20230615-1 -OZONE_RUNNER_IMAGE=apache/ozone-runner -OZONE_OPTS= +KNOX_PLUGIN_VERSION=3.0.0-SNAPSHOT -# versions of ranger services -RANGER_VERSION=3.0.0-SNAPSHOT +# KMS Configuration KMS_VERSION=3.0.0-SNAPSHOT + +# Usersync Configuration USERSYNC_VERSION=3.0.0-SNAPSHOT + +# Tagsync Configuration TAGSYNC_VERSION=3.0.0-SNAPSHOT -# plugin versions -HDFS_PLUGIN_VERSION=3.0.0-SNAPSHOT -YARN_PLUGIN_VERSION=3.0.0-SNAPSHOT -HIVE_PLUGIN_VERSION=3.0.0-SNAPSHOT -HBASE_PLUGIN_VERSION=3.0.0-SNAPSHOT -KAFKA_PLUGIN_VERSION=3.0.0-SNAPSHOT -KNOX_PLUGIN_VERSION=3.0.0-SNAPSHOT -TRINO_PLUGIN_VERSION=3.0.0-SNAPSHOT +# Solr Configuration +SOLR_VERSION=8.11.2 + +# Zookeeper Configuration +ZK_VERSION=3.8.4 + +# Database Versions +POSTGRES_VERSION=13.16 +MARIADB_VERSION=10.6 +ORACLE_VERSION=21.3.0-xe +SQLSERVER_VERSION=2022-latest + +# Ozone Configuration +OZONE_VERSION=1.4.0 OZONE_PLUGIN_VERSION=3.0.0-SNAPSHOT +OZONE_RUNNER_IMAGE=apache/ozone-runner +OZONE_RUNNER_VERSION=20240625 + +# Trino Configuration +TRINO_VERSION=435 +TRINO_PLUGIN_VERSION=3.0.0-SNAPSHOT -# To enable debug logs +# Debug Configuration DEBUG_ADMIN=false DEBUG_USERSYNC=false DEBUG_TAGSYNC=false +ENABLE_FILE_SYNC_SOURCE=false diff --git a/dev-support/ranger-docker/Dockerfile.ranger-hadoop b/dev-support/ranger-docker/Dockerfile.ranger-hadoop index 5fc455e4b5..8ae23dbc4d 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-hadoop +++ b/dev-support/ranger-docker/Dockerfile.ranger-hadoop @@ -21,12 +21,14 @@ FROM ${RANGER_BASE_IMAGE}:${RANGER_BASE_VERSION} ARG HADOOP_VERSION ARG HDFS_PLUGIN_VERSION ARG YARN_PLUGIN_VERSION +ARG TEZ_VERSION COPY ./dist/version /home/ranger/dist/ COPY ./dist/ranger-${HDFS_PLUGIN_VERSION}-hdfs-plugin.tar.gz /home/ranger/dist/ COPY ./dist/ranger-${YARN_PLUGIN_VERSION}-yarn-plugin.tar.gz /home/ranger/dist/ COPY ./downloads/hadoop-${HADOOP_VERSION}.tar.gz /home/ranger/dist/ +COPY ./downloads/apache-tez-${TEZ_VERSION}-bin.tar.gz /home/ranger/dist/ COPY ./scripts/ranger-hadoop-setup.sh /home/ranger/scripts/ COPY ./scripts/ranger-hadoop.sh /home/ranger/scripts/ @@ -37,6 +39,8 @@ COPY ./scripts/ranger-yarn-plugin-install.properties /home/ranger/scripts/ RUN tar xvfz /home/ranger/dist/hadoop-${HADOOP_VERSION}.tar.gz --directory=/opt/ && \ ln -s /opt/hadoop-${HADOOP_VERSION} /opt/hadoop && \ rm -f /home/ranger/dist/hadoop-${HADOOP_VERSION}.tar.gz && \ + tar xvfz /home/ranger/dist/apache-tez-${TEZ_VERSION}-bin.tar.gz --directory=/opt/ && \ + ln -s /opt/apache-tez-${TEZ_VERSION}-bin /opt/tez && \ tar xvfz /home/ranger/dist/ranger-${HDFS_PLUGIN_VERSION}-hdfs-plugin.tar.gz --directory=/opt/ranger && \ ln -s /opt/ranger/ranger-${HDFS_PLUGIN_VERSION}-hdfs-plugin /opt/ranger/ranger-hdfs-plugin && \ rm -f /home/ranger/dist/ranger-${HDFS_PLUGIN_VERSION}-hdfs-plugin.tar.gz && \ diff --git a/dev-support/ranger-docker/Dockerfile.ranger-hive b/dev-support/ranger-docker/Dockerfile.ranger-hive index 7fddfc0019..6e8c4f2c9f 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-hive +++ b/dev-support/ranger-docker/Dockerfile.ranger-hive @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -ARG RANGER_DB_TYPE +# Only the ARGs needed for this build stage ARG RANGER_BASE_IMAGE ARG RANGER_BASE_VERSION @@ -22,6 +22,7 @@ FROM ${RANGER_BASE_IMAGE}:${RANGER_BASE_VERSION} ARG HIVE_VERSION ARG HIVE_HADOOP_VERSION ARG HIVE_PLUGIN_VERSION +ARG TEZ_VERSION ARG RANGER_DB_TYPE @@ -29,6 +30,7 @@ COPY ./dist/version /home/ranger/dist/ COPY ./dist/ranger-${HIVE_PLUGIN_VERSION}-hive-plugin.tar.gz /home/ranger/dist/ COPY ./downloads/apache-hive-${HIVE_VERSION}-bin.tar.gz /home/ranger/dist/ COPY ./downloads/hadoop-${HIVE_HADOOP_VERSION}.tar.gz /home/ranger/dist/ +COPY ./downloads/apache-tez-${TEZ_VERSION}-bin.tar.gz /home/ranger/dist/ COPY ./downloads/postgresql-42.2.16.jre7.jar /home/ranger/dist/ COPY ./downloads/mysql-connector-java-8.0.28.jar /home/ranger/dist/ COPY ./downloads/ojdbc8.jar /home/ranger/dist/ @@ -38,7 +40,7 @@ COPY ./scripts/ranger-hive.sh /home/ranger/scripts/ COPY ./scripts/ranger-hive-plugin-install.properties /home/ranger/scripts/ COPY ./scripts/hive-site-${RANGER_DB_TYPE}.xml /home/ranger/scripts/hive-site.xml -RUN tar xvfz /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz --directory=/opt/ && \ +RUN cd /opt && tar xzf /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz && \ ln -s /opt/apache-hive-${HIVE_VERSION}-bin /opt/hive && \ rm -f /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz && \ mv /home/ranger/dist/postgresql-42.2.16.jre7.jar /opt/hive/lib/ && \ @@ -47,6 +49,9 @@ RUN tar xvfz /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz --director tar xvfz /home/ranger/dist/hadoop-${HIVE_HADOOP_VERSION}.tar.gz --directory=/opt/ && \ ln -s /opt/hadoop-${HIVE_HADOOP_VERSION} /opt/hadoop && \ rm -f /home/ranger/dist/hadoop-${HIVE_HADOOP_VERSION}.tar.gz && \ + tar xvfz /home/ranger/dist/apache-tez-${TEZ_VERSION}-bin.tar.gz --directory=/opt/ && \ + ln -s /opt/apache-tez-${TEZ_VERSION}-bin /opt/tez && \ + rm -f /home/ranger/dist/apache-tez-${TEZ_VERSION}-bin.tar.gz && \ tar xvfz /home/ranger/dist/ranger-${HIVE_PLUGIN_VERSION}-hive-plugin.tar.gz --directory=/opt/ranger && \ ln -s /opt/ranger/ranger-${HIVE_PLUGIN_VERSION}-hive-plugin /opt/ranger/ranger-hive-plugin && \ rm -f /home/ranger/dist/ranger-${HIVE_PLUGIN_VERSION}-hive-plugin.tar.gz && \ @@ -55,7 +60,8 @@ RUN tar xvfz /home/ranger/dist/apache-hive-${HIVE_VERSION}-bin.tar.gz --director ENV HIVE_HOME=/opt/hive ENV HADOOP_HOME=/opt/hadoop -ENV PATH=/usr/java/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/hive/bin:/opt/hadoop/bin - +ENV TEZ_HOME=/opt/tez +ENV TEZ_CONF_DIR=/opt/tez/conf +ENV PATH=/usr/java/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/hive/bin:/opt/hadoop/bin:/opt/tez/bin ENTRYPOINT [ "/home/ranger/scripts/ranger-hive.sh" ] diff --git a/dev-support/ranger-docker/Dockerfile.ranger-kafka b/dev-support/ranger-docker/Dockerfile.ranger-kafka index 48c5789a7b..922aeafa07 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-kafka +++ b/dev-support/ranger-docker/Dockerfile.ranger-kafka @@ -39,6 +39,7 @@ RUN tar xvfz /home/ranger/dist/kafka_2.12-${KAFKA_VERSION}.tgz --directory=/opt/ cp -f /home/ranger/scripts/ranger-kafka-plugin-install.properties /opt/ranger/ranger-kafka-plugin/install.properties && \ chmod 744 ${RANGER_SCRIPTS}/ranger-kafka-setup.sh ${RANGER_SCRIPTS}/ranger-kafka.sh + ENV KAFKA_HOME=/opt/kafka ENV PATH=/usr/java/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/kafka/bin diff --git a/dev-support/ranger-docker/Dockerfile.ranger-kms b/dev-support/ranger-docker/Dockerfile.ranger-kms index 55401ef60e..604fe69222 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-kms +++ b/dev-support/ranger-docker/Dockerfile.ranger-kms @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + ARG RANGER_DB_TYPE ARG RANGER_BASE_IMAGE ARG RANGER_BASE_VERSION @@ -38,7 +39,7 @@ RUN tar xvfz /home/ranger/dist/ranger-${KMS_VERSION}-kms.tar.gz --directory=${RA ln -s /etc/init.d/ranger-kms /etc/rc3.d/S88ranger-kms && \ ln -s /etc/init.d/ranger-kms /etc/rc3.d/K90ranger-kms && \ ln -s ${RANGER_HOME}/kms/ranger-kms-services.sh /usr/bin/ranger-kms-services.sh && \ - chown -R rangerkms:ranger ${RANGER_HOME}/kms/ ${RANGER_SCRIPTS}/ /var/run/ranger_kms/ /var/log/ranger/ && \ + chown -R rangerkms:ranger ${RANGER_HOME}/kms/ ${RANGER_SCRIPTS}/ /var/run/ranger_kms/ /var/log/ranger/ /etc/ranger && \ chmod 744 ${RANGER_SCRIPTS}/ranger-kms.sh FROM ranger-kms AS ranger_postgres diff --git a/dev-support/ranger-docker/Dockerfile.ranger-usersync b/dev-support/ranger-docker/Dockerfile.ranger-usersync index 9b164cad06..af1d15c3d3 100644 --- a/dev-support/ranger-docker/Dockerfile.ranger-usersync +++ b/dev-support/ranger-docker/Dockerfile.ranger-usersync @@ -31,10 +31,9 @@ RUN tar xvfz /home/ranger/dist/ranger-${USERSYNC_VERSION}-usersync.tar.gz --dire ln -s ${RANGER_HOME}/ranger-${USERSYNC_VERSION}-usersync ${RANGER_HOME}/usersync && \ rm -f /home/ranger/dist/ranger-${USERSYNC_VERSION}-usersync.tar.gz && \ cp -f ${RANGER_SCRIPTS}/ranger-usersync-install.properties ${RANGER_HOME}/usersync/install.properties && \ - mkdir -p /var/run/ranger /var/log/ranger/usersync /etc/ranger && \ - mkdir /etc/init.d || true && \ - mkdir /etc/rc2.d || true && \ - mkdir /etc/rc3.d || true && \ + mkdir -p /var/run/ranger /var/log/ranger/usersync && \ + mkdir -p /etc/ranger && \ + mkdir -p /etc/init.d /etc/rc2.d /etc/rc3.d && \ touch /etc/init.d/ranger-usersync && \ ln -s /etc/init.d/ranger-usersync /etc/rc2.d/S99ranger-usersync && \ ln -s /etc/init.d/ranger-usersync /etc/rc2.d/K00ranger-usersync && \ diff --git a/dev-support/ranger-docker/config/my.cnf b/dev-support/ranger-docker/config/my.cnf index 77e958eaca..bd7d646d5c 100644 --- a/dev-support/ranger-docker/config/my.cnf +++ b/dev-support/ranger-docker/config/my.cnf @@ -19,5 +19,7 @@ collation-server = utf8_unicode_ci init-connect = 'SET NAMES utf8' character_set_server = utf8 +# Enable hostname resolution for user authentication (fixes ranger-db user issue) +skip-name-resolve = 0 # transaction-isolation = REPEATABLE-READ # transaction-isolation = READ-COMMITTED diff --git a/dev-support/ranger-docker/docker-compose.ranger-db.yml b/dev-support/ranger-docker/docker-compose.ranger-db.yml index 3688a2a6df..3a819afb18 100644 --- a/dev-support/ranger-docker/docker-compose.ranger-db.yml +++ b/dev-support/ranger-docker/docker-compose.ranger-db.yml @@ -25,7 +25,7 @@ services: args: - MARIADB_VERSION=${MARIADB_VERSION} image: ranger-mysql - command: --default-authentication-plugin=mysql_native_password + command: --default-authentication-plugin=mysql_native_password --skip-name-resolve=0 container_name: ranger-mysql hostname: ranger-db.example.com ports: diff --git a/dev-support/ranger-docker/docker-compose.ranger-hadoop.yml b/dev-support/ranger-docker/docker-compose.ranger-hadoop.yml index dacbbf0ccc..37fe6b5d83 100644 --- a/dev-support/ranger-docker/docker-compose.ranger-hadoop.yml +++ b/dev-support/ranger-docker/docker-compose.ranger-hadoop.yml @@ -9,6 +9,7 @@ services: - HADOOP_VERSION=${HADOOP_VERSION} - HDFS_PLUGIN_VERSION=${HDFS_PLUGIN_VERSION} - YARN_PLUGIN_VERSION=${YARN_PLUGIN_VERSION} + - TEZ_VERSION=${TEZ_VERSION} image: ranger-hadoop container_name: ranger-hadoop hostname: ranger-hadoop.example.com @@ -32,6 +33,7 @@ services: - HADOOP_VERSION - HDFS_PLUGIN_VERSION - YARN_PLUGIN_VERSION + - TEZ_VERSION networks: ranger: diff --git a/dev-support/ranger-docker/docker-compose.ranger-hive.yml b/dev-support/ranger-docker/docker-compose.ranger-hive.yml index 5815a472d6..6084630825 100644 --- a/dev-support/ranger-docker/docker-compose.ranger-hive.yml +++ b/dev-support/ranger-docker/docker-compose.ranger-hive.yml @@ -9,6 +9,7 @@ services: - HIVE_HADOOP_VERSION=${HIVE_HADOOP_VERSION} - HIVE_VERSION=${HIVE_VERSION} - HIVE_PLUGIN_VERSION=${HIVE_PLUGIN_VERSION} + - TEZ_VERSION=${TEZ_VERSION} - RANGER_DB_TYPE=${RANGER_DB_TYPE} image: ranger-hive container_name: ranger-hive @@ -19,6 +20,7 @@ services: - ranger ports: - "10000:10000" + - "9083:9083" depends_on: ranger: condition: service_started @@ -30,6 +32,7 @@ services: - HIVE_HADOOP_VERSION - HIVE_VERSION - HIVE_PLUGIN_VERSION + - TEZ_VERSION - RANGER_DB_TYPE networks: diff --git a/dev-support/ranger-docker/download-archives.sh b/dev-support/ranger-docker/download-archives.sh index 0bfca41ade..1ce4240e4d 100755 --- a/dev-support/ranger-docker/download-archives.sh +++ b/dev-support/ranger-docker/download-archives.sh @@ -53,6 +53,7 @@ then downloadIfNotPresent hbase-${HBASE_VERSION}-bin.tar.gz https://archive.apache.org/dist/hbase/${HBASE_VERSION} downloadIfNotPresent apache-hive-${HIVE_VERSION}-bin.tar.gz https://archive.apache.org/dist/hive/hive-${HIVE_VERSION} downloadIfNotPresent hadoop-${HIVE_HADOOP_VERSION}.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-${HIVE_HADOOP_VERSION} + downloadIfNotPresent apache-tez-${TEZ_VERSION}-bin.tar.gz https://archive.apache.org/dist/tez/${TEZ_VERSION} downloadIfNotPresent kafka_2.12-${KAFKA_VERSION}.tgz https://archive.apache.org/dist/kafka/${KAFKA_VERSION} downloadIfNotPresent knox-${KNOX_VERSION}.tar.gz https://archive.apache.org/dist/knox/${KNOX_VERSION} downloadIfNotPresent ozone-${OZONE_VERSION}.tar.gz https://archive.apache.org/dist/ozone/${OZONE_VERSION} @@ -72,6 +73,7 @@ else then downloadIfNotPresent apache-hive-${HIVE_VERSION}-bin.tar.gz https://archive.apache.org/dist/hive/hive-${HIVE_VERSION} downloadIfNotPresent hadoop-${HIVE_HADOOP_VERSION}.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-${HIVE_HADOOP_VERSION} + downloadIfNotPresent apache-tez-${TEZ_VERSION}-bin.tar.gz https://archive.apache.org/dist/tez/${TEZ_VERSION} elif [[ $arg == 'kafka' ]] then downloadIfNotPresent kafka_2.12-${KAFKA_VERSION}.tgz https://archive.apache.org/dist/kafka/${KAFKA_VERSION} diff --git a/dev-support/ranger-docker/scripts/hive-site-mysql.xml b/dev-support/ranger-docker/scripts/hive-site-mysql.xml index 118cdbbca9..ccb095329b 100644 --- a/dev-support/ranger-docker/scripts/hive-site-mysql.xml +++ b/dev-support/ranger-docker/scripts/hive-site-mysql.xml @@ -49,4 +49,72 @@ hive.zookeeper.client.port 2181 + + + + hive.execution.engine + tez + Execution engine to use for Hive queries + + + + hive.tez.container.size + 1024 + By default Tez will spawn containers of the size of a mapper + + + + hive.tez.java.opts + -Xmx768m + Java command line options for Tez + + + + hive.tez.log.level + INFO + Log level for Tez + + + + hive.prewarm.enabled + false + Enables container prewarm for Tez + + + + hive.prewarm.numcontainers + 3 + Controls the number of containers to prewarm for Tez + + + + hive.tez.auto.reducer.parallelism + true + Turn on Tez' auto reducer parallelism feature + + + + hive.tez.min.reducer.per.query + 2 + Controls the minimum number of reducers for Tez + + + + hive.tez.max.reducer.per.query + 999 + Controls the maximum number of reducers for Tez + + + + + hive.tez.exec.print.summary + true + Whether to print a summary of the plan execution + + + + tez.queue.name + default + YARN queue name for Tez jobs + diff --git a/dev-support/ranger-docker/scripts/hive-site-oracle.xml b/dev-support/ranger-docker/scripts/hive-site-oracle.xml index 2b8cc5e08f..53cde12799 100644 --- a/dev-support/ranger-docker/scripts/hive-site-oracle.xml +++ b/dev-support/ranger-docker/scripts/hive-site-oracle.xml @@ -49,4 +49,72 @@ hive.zookeeper.client.port 2181 + + + + hive.execution.engine + tez + Execution engine to use for Hive queries + + + + hive.tez.container.size + 1024 + By default Tez will spawn containers of the size of a mapper + + + + hive.tez.java.opts + -Xmx768m + Java command line options for Tez + + + + hive.tez.log.level + INFO + Log level for Tez + + + + hive.prewarm.enabled + false + Enables container prewarm for Tez + + + + hive.prewarm.numcontainers + 3 + Controls the number of containers to prewarm for Tez + + + + hive.tez.auto.reducer.parallelism + true + Turn on Tez' auto reducer parallelism feature + + + + hive.tez.min.reducer.per.query + 2 + Controls the minimum number of reducers for Tez + + + + hive.tez.max.reducer.per.query + 999 + Controls the maximum number of reducers for Tez + + + + + hive.tez.exec.print.summary + true + Whether to print a summary of the plan execution + + + + tez.queue.name + default + YARN configuration for Tez jobs + diff --git a/dev-support/ranger-docker/scripts/hive-site-postgres.xml b/dev-support/ranger-docker/scripts/hive-site-postgres.xml index 55343a3234..f139af5d42 100644 --- a/dev-support/ranger-docker/scripts/hive-site-postgres.xml +++ b/dev-support/ranger-docker/scripts/hive-site-postgres.xml @@ -49,4 +49,72 @@ hive.zookeeper.client.port 2181 + + + + hive.execution.engine + tez + Execution engine to use for Hive queries + + + + hive.tez.container.size + 1024 + By default Tez will spawn containers of the size of a mapper + + + + hive.tez.java.opts + -Xmx768m + Java command line options for Tez + + + + hive.tez.log.level + INFO + Log level for Tez + + + + hive.prewarm.enabled + false + Enables container prewarm for Tez + + + + hive.prewarm.numcontainers + 3 + Controls the number of containers to prewarm for Tez + + + + hive.tez.auto.reducer.parallelism + true + Turn on Tez' auto reducer parallelism feature + + + + hive.tez.min.reducer.per.query + 2 + Controls the minimum number of reducers for Tez + + + + hive.tez.max.reducer.per.query + 999 + Controls the maximum number of reducers for Tez + + + + + hive.tez.exec.print.summary + true + Whether to print a summary of the plan execution + + + + tez.queue.name + default + YARN queue name for Tez jobs + diff --git a/dev-support/ranger-docker/scripts/hive-site-sqlserver.xml b/dev-support/ranger-docker/scripts/hive-site-sqlserver.xml index 5bed21c2d8..2298f4b644 100644 --- a/dev-support/ranger-docker/scripts/hive-site-sqlserver.xml +++ b/dev-support/ranger-docker/scripts/hive-site-sqlserver.xml @@ -47,4 +47,72 @@ hive.zookeeper.client.port 2181 + + + + hive.execution.engine + tez + Execution engine to use for Hive queries + + + + hive.tez.container.size + 1024 + By default Tez will spawn containers of the size of a mapper + + + + hive.tez.java.opts + -Xmx768m + Java command line options for Tez + + + + hive.tez.log.level + INFO + Log level for Tez + + + + hive.prewarm.enabled + false + Enables container prewarm for Tez + + + + hive.prewarm.numcontainers + 3 + Controls the number of containers to prewarm for Tez + + + + hive.tez.auto.reducer.parallelism + true + Turn on Tez' auto reducer parallelism feature + + + + hive.tez.min.reducer.per.query + 2 + Controls the minimum number of reducers for Tez + + + + hive.tez.max.reducer.per.query + 999 + Controls the maximum number of reducers for Tez + + + + + hive.tez.exec.print.summary + true + Whether to print a summary of the plan execution + + + + tez.queue.name + default + YARN queue name for Tez jobs + diff --git a/dev-support/ranger-docker/scripts/ranger-hadoop-setup.sh b/dev-support/ranger-docker/scripts/ranger-hadoop-setup.sh index 10f04acd9f..e08b4ac859 100755 --- a/dev-support/ranger-docker/scripts/ranger-hadoop-setup.sh +++ b/dev-support/ranger-docker/scripts/ranger-hadoop-setup.sh @@ -52,16 +52,81 @@ cat < ${HADOOP_HOME}/etc/hadoop/yarn-site.xml yarn.nodemanager.aux-services mapreduce_shuffle + + yarn.nodemanager.aux-services.mapreduce_shuffle.class + org.apache.hadoop.mapred.ShuffleHandler + yarn.nodemanager.env-whitelist JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME + + yarn.resourcemanager.hostname + ranger-hadoop + + + yarn.nodemanager.resource.memory-mb + 4096 + + + yarn.scheduler.maximum-allocation-mb + 4096 + + + yarn.scheduler.minimum-allocation-mb + 256 + + + yarn.nodemanager.vmem-check-enabled + false + + + yarn.log-aggregation-enable + true + + + yarn.timeline-service.enabled + true + + + yarn.timeline-service.hostname + ranger-hadoop + + + yarn.timeline-service.http-cross-origin.enabled + true + + + yarn.resourcemanager.system-metrics-publisher.enabled + true + EOF mkdir -p /opt/hadoop/logs chown -R hdfs:hadoop /opt/hadoop/ chmod g+w /opt/hadoop/logs +# user logs directory permissions for NodeManager health +mkdir -p ${HADOOP_HOME}/logs/userlogs +chown -R yarn:hadoop ${HADOOP_HOME}/logs/userlogs +chmod -R 777 ${HADOOP_HOME}/logs/userlogs + +# Install Tez JARs for YARN NodeManager +echo "Installing Tez JARs for YARN NodeManager..." +if [ -d "/opt/tez" ]; then + echo "Copying Tez JARs to YARN lib directory..." + cp /opt/tez/lib/*.jar /opt/hadoop/share/hadoop/yarn/lib/ 2>/dev/null + cp /opt/tez/*.jar /opt/hadoop/share/hadoop/yarn/lib/ 2>/dev/null + + # Set up Tez environment + export TEZ_HOME=/opt/tez + export TEZ_CONF_DIR=${TEZ_HOME}/conf + mkdir -p ${TEZ_CONF_DIR} + + echo "Tez JARs installed successfully for YARN NodeManager" +else + echo "WARNING: Tez directory not found at /opt/tez" +fi cd ${RANGER_HOME}/ranger-hdfs-plugin ./enable-hdfs-plugin.sh diff --git a/dev-support/ranger-docker/scripts/ranger-hadoop.sh b/dev-support/ranger-docker/scripts/ranger-hadoop.sh index 164c25addb..043d16aea2 100755 --- a/dev-support/ranger-docker/scripts/ranger-hadoop.sh +++ b/dev-support/ranger-docker/scripts/ranger-hadoop.sh @@ -18,6 +18,39 @@ CREATE_HDFS_DIR=false +# Always ensure SSH daemon is running (required for Hadoop services) +echo "Starting SSH daemon..." +# Create SSH privilege separation directory if it doesn't exist +mkdir -p /run/sshd +/usr/sbin/sshd + +if [ -f /home/hdfs/.ssh/id_rsa ]; then + echo "Waiting for SSH daemon to be ready..." + SSH_READY=false + for i in {1..30}; do + if su -c "ssh -o ConnectTimeout=2 -o StrictHostKeyChecking=no localhost exit" hdfs 2>/dev/null; then + echo "SSH daemon is ready for hdfs service..." + SSH_READY=true + break + fi + echo "Waiting for SSH daemon... ($i/30)" + sleep 2 + done + + if [ "$SSH_READY" = false ]; then + echo "WARNING: SSH daemon did not become ready within 60 seconds, Hadoop Services may fail to start properly...." + echo "Attempting to restart SSH daemon..." + pkill sshd 2>/dev/null || true + # Ensure SSH privilege separation directory exists + mkdir -p /run/sshd + /usr/sbin/sshd + sleep 3 + fi +else + echo "SSH keys not yet generated, skipping SSH connectivity test" + sleep 2 +fi + if [ ! -e ${HADOOP_HOME}/.setupDone ] then su -c "ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa" hdfs @@ -29,7 +62,6 @@ then su -c "chmod 0600 ~/.ssh/authorized_keys" yarn ssh-keygen -A - /usr/sbin/sshd -D & # pdsh is unavailable with microdnf in rhel based image. echo "ssh" > /etc/pdsh/rcmd_default diff --git a/dev-support/ranger-docker/scripts/ranger-hive-setup.sh b/dev-support/ranger-docker/scripts/ranger-hive-setup.sh index c0e7ee4063..cb4d7e8f92 100755 --- a/dev-support/ranger-docker/scripts/ranger-hive-setup.sh +++ b/dev-support/ranger-docker/scripts/ranger-hive-setup.sh @@ -35,6 +35,134 @@ EOF cp ${RANGER_SCRIPTS}/hive-site.xml ${HIVE_HOME}/conf/hive-site.xml cp ${RANGER_SCRIPTS}/hive-site.xml ${HIVE_HOME}/conf/hiveserver2-site.xml + +# Configure Tez +mkdir -p ${TEZ_HOME}/conf + +# Create Tez configuration directory for Hadoop +mkdir -p ${HADOOP_HOME}/etc/hadoop + +# Create mapred-site.xml for YARN integration +cat < ${HADOOP_HOME}/etc/hadoop/mapred-site.xml + + + mapreduce.framework.name + yarn + + + mapreduce.application.classpath + \$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:\$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* + + + yarn.app.mapreduce.am.env + HADOOP_MAPRED_HOME=/opt/hadoop + + + mapreduce.map.env + HADOOP_MAPRED_HOME=/opt/hadoop + + + mapreduce.reduce.env + HADOOP_MAPRED_HOME=/opt/hadoop + + +EOF + +# Create yarn-site.xml for YARN ResourceManager connection +cat < ${HADOOP_HOME}/etc/hadoop/yarn-site.xml + + + yarn.resourcemanager.hostname + ranger-hadoop + + + yarn.resourcemanager.address + ranger-hadoop:8032 + + +EOF + +# Fix tez-site.xml to use absolute HDFS path (critical for Tez to find libraries) +cat < ${TEZ_HOME}/conf/tez-site.xml + + + + tez.lib.uris + hdfs://ranger-hadoop:9000/apps/tez/apache-tez-${TEZ_VERSION}-bin.tar.gz + Comma-delimited list of the location of the Tez libraries which will be localized for DAGs. + + + tez.use.cluster.hadoop-libs + true + Use Hadoop libraries provided by cluster instead of those packaged with Tez + + + tez.am.resource.memory.mb + 1024 + The amount of memory to be used by the AppMaster + + + tez.am.java.opts + -Xmx768m + Java opts for the Tez AppMaster process + + + tez.task.resource.memory.mb + 1024 + The amount of memory to be used by tasks + + + tez.task.launch.cmd-opts + -Xmx768m + Java opts for tasks + + + tez.staging-dir + /tmp/hive + The staging directory for Tez applications in HDFS. + + +EOF + +# Copy Tez JARs to Hive lib directory +cp ${TEZ_HOME}/lib/tez-*.jar ${HIVE_HOME}/lib/ +cp ${TEZ_HOME}/tez-*.jar ${HIVE_HOME}/lib/ + +# Copy all Hadoop configurations to Hive conf directory so Hive can find them +cp ${HADOOP_HOME}/etc/hadoop/core-site.xml ${HIVE_HOME}/conf/ +cp ${HADOOP_HOME}/etc/hadoop/mapred-site.xml ${HIVE_HOME}/conf/ +cp ${HADOOP_HOME}/etc/hadoop/yarn-site.xml ${HIVE_HOME}/conf/ +cp ${TEZ_HOME}/conf/tez-site.xml ${HIVE_HOME}/conf/ + +# Upload Tez libraries to HDFS +su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /apps/tez" hdfs + +# Recreate Tez tarball if it doesn't exist (it gets removed during Docker build) +if [ ! -f "/opt/apache-tez-${TEZ_VERSION}-bin.tar.gz" ]; then + echo "Recreating Tez tarball for HDFS upload..." + cd /opt + tar czf apache-tez-${TEZ_VERSION}-bin.tar.gz apache-tez-${TEZ_VERSION}-bin/ +fi + +su -c "${HADOOP_HOME}/bin/hdfs dfs -put /opt/apache-tez-${TEZ_VERSION}-bin.tar.gz /apps/tez/" hdfs +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 755 /apps/tez" hdfs + +# Create HDFS user directory for hive +su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /user/hive" hdfs +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 777 /user/hive" hdfs + +# Create HDFS /tmp/hive directory for Tez staging +su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /tmp/hive" hdfs +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod -R 777 /tmp/hive" hdfs + +# Fix /tmp directory permissions for Ranger (critical for INSERT operations) +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod 777 /tmp" hdfs + +# Create /user/root directory for YARN job execution +su -c "${HADOOP_HOME}/bin/hdfs dfs -mkdir -p /user/root" hdfs +su -c "${HADOOP_HOME}/bin/hdfs dfs -chmod 777 /user/root" hdfs + +# Initialize Hive schema su -c "${HIVE_HOME}/bin/schematool -dbType ${RANGER_DB_TYPE} -initSchema" hive mkdir -p /opt/hive/logs diff --git a/dev-support/ranger-docker/scripts/ranger-hive.sh b/dev-support/ranger-docker/scripts/ranger-hive.sh index 6e8dc4f847..249403faed 100755 --- a/dev-support/ranger-docker/scripts/ranger-hive.sh +++ b/dev-support/ranger-docker/scripts/ranger-hive.sh @@ -17,7 +17,45 @@ # limitations under the License. if [ "${OS_NAME}" = "UBUNTU" ]; then + echo "Starting SSH service (Ubuntu)..." service ssh start +else + echo "Starting SSH daemon (RHEL/CentOS)..." + # Create SSH privilege separation directory if it doesn't exist + mkdir -p /run/sshd + /usr/sbin/sshd +fi + +# Wait for SSH daemon to be fully ready before proceeding +if [ -f /home/hdfs/.ssh/id_rsa ]; then + echo "Waiting for SSH daemon to be ready..." + SSH_READY=false + for i in {1..30}; do + if su -c "ssh -o ConnectTimeout=2 -o StrictHostKeyChecking=no localhost exit" hdfs 2>/dev/null; then + echo "SSH daemon is ready for hdfs service..." + SSH_READY=true + break + fi + echo "Waiting for SSH daemon... ($i/30)" + sleep 2 + done + + if [ "$SSH_READY" = false ]; then + echo "WARNING: SSH daemon did not become ready within 60 seconds, Hive Services may fail to start properly...." + echo "Attempting to restart SSH daemon..." + pkill sshd 2>/dev/null || true + if [ "${OS_NAME}" = "UBUNTU" ]; then + service ssh start + else + # Ensure SSH privilege separation directory exists + mkdir -p /run/sshd + /usr/sbin/sshd + fi + sleep 3 + fi +else + echo "SSH keys not yet generated, skipping SSH connectivity test" + sleep 2 fi if [ ! -e ${HIVE_HOME}/.setupDone ] @@ -28,13 +66,16 @@ then if [ "${OS_NAME}" = "RHEL" ]; then ssh-keygen -A - /usr/sbin/sshd fi su -c "ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa" yarn su -c "cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys" yarn su -c "chmod 0600 ~/.ssh/authorized_keys" yarn + su -c "ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa" hive + su -c "cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys" hive + su -c "chmod 0600 ~/.ssh/authorized_keys" hive + # pdsh is unavailable with microdnf in rhel based image. echo "ssh" > /etc/pdsh/rcmd_default @@ -50,14 +91,49 @@ fi cd "${HIVE_HOME}" || exit # Start Hive MetaStore +echo "Starting Hive MetaStore..." su -c "nohup ${HIVE_HOME}/bin/hive --service metastore > metastore.log 2>&1 &" hive # Start HiveServer2 +echo "Starting HiveServer2..." su -c "nohup ${HIVE_HOME}/bin/hiveserver2 > hive-server2.log 2>&1 &" hive +# Wait for services to initialize +echo "Waiting for Hive services to initialize..." sleep 10 -HIVE_SERVER2_PID=`ps -ef | grep -v grep | grep -i "org.apache.hive.service.server.HiveServer2" | awk '{print $2}'` +# Verify Hive services are running and ready +echo "Verifying Hive services are ready for beeline connections..." +METASTORE_PID=`ps -ef | grep -v grep | grep -i "org.apache.hadoop.hive.metastore.HiveMetaStore" | awk '{print $2}'` +HIVE_SERVER2_PID=`ps -ef | grep -v grep | grep -i "org.apache.hive.service.server.HiveServer2" | awk '{print $2}'` + +if [ -n "$METASTORE_PID" ]; then + echo "Hive MetaStore is running (PID: $METASTORE_PID)" +else + echo "WARNING: Hive MetaStore process not found!" +fi + +if [ -n "$HIVE_SERVER2_PID" ]; then + echo "HiveServer2 is running (PID: $HIVE_SERVER2_PID)" +else + echo "WARNING: HiveServer2 process not found!" +fi + +# Additional verification: Check if HiveServer2 is listening on port 10000 +echo "Checking if HiveServer2 is listening on port 10000..." +for i in {1..30}; do + if timeout 2 bash -c "echo > /dev/tcp/localhost/10000" 2>/dev/null; then + echo "HiveServer2 is ready and listening on port 10000...." + break + fi + if [ $i -eq 30 ]; then + echo "WARNING: HiveServer2 is not listening on port 10000 after 60 seconds" + echo "Beeline connections may fail. Check metastore.log and hive-server2.log for errors." + else + echo "Waiting for HiveServer2 to listen on port 10000... ($i/30)" + sleep 2 + fi +done # prevent the container from exiting if [ -z "$HIVE_SERVER2_PID" ] diff --git a/dev-support/ranger-docker/scripts/tez-site.xml b/dev-support/ranger-docker/scripts/tez-site.xml new file mode 100644 index 0000000000..80faf26e80 --- /dev/null +++ b/dev-support/ranger-docker/scripts/tez-site.xml @@ -0,0 +1,93 @@ + + + + + tez.lib.uris + ${fs.defaultFS}/apps/tez/apache-tez-${TEZ_VERSION}-bin.tar.gz + Comma-delimited list of the location of the Tez libraries which will be localized for DAGs. + + + + tez.use.cluster.hadoop-libs + true + Use Hadoop libraries provided by cluster instead of those packaged with Tez + + + + tez.am.resource.memory.mb + 1024 + The amount of memory to be used by the AppMaster + + + + tez.am.java.opts + -Xmx768m + Java opts for the Tez AppMaster process + + + + tez.task.resource.memory.mb + 1024 + The amount of memory to be used by tasks + + + + tez.task.launch.cmd-opts + -Xmx768m + Java opts for tasks + + + + tez.runtime.io.sort.mb + 256 + The size of the sort buffer when output needs to be sorted + + + + tez.runtime.unordered.output.buffer.size-mb + 100 + The size of the buffer when output does not require to be sorted + + + + tez.session.am.dag.submit.timeout.secs + 300 + Time to wait (in seconds) for AM to submit a DAG before timing out + + + + tez.am.container.reuse.enabled + true + Whether to reuse containers for tasks belonging to the same session + + + + tez.am.container.reuse.rack-fallback.enabled + true + Whether to fallback to rack local containers when node local containers are not available + + + + tez.am.container.reuse.non-local-fallback.enabled + false + Whether to fallback to non-local containers when rack local containers are not available + +