From 9f8f5445d7d2932c620e4d7af8f5d1389b58d993 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 27 Mar 2025 11:59:24 +0100 Subject: [PATCH 1/4] reduce image size --- hive/Dockerfile | 58 +++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/hive/Dockerfile b/hive/Dockerfile index d3f5f61ae..037100e5d 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -13,12 +13,20 @@ FROM stackable/image/java-devel AS hive-builder ARG PRODUCT ARG HADOOP ARG JMX_EXPORTER +ARG AWS_JAVA_SDK_BUNDLE +ARG AZURE_STORAGE +ARG AZURE_KEYVAULT_CORE ARG STACKABLE_USER_UID # Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) # This can be used to speed up builds when disk space is of no concern. ARG DELETE_CACHES="true" +# It is useful to see which version of Hadoop is used at a glance +# Therefore the use of the full name here +# TODO: Do we really need all of Hadoop in here? +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop /stackable/hadoop-${HADOOP} + COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable /stackable USER ${STACKABLE_USER_UID} @@ -58,6 +66,18 @@ rm -rf /stackable/apache-hive-${PRODUCT}-src curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar +# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards +# This way the build will fail should one of the files not be available anymore in a later Hadoop version! + +# Add S3 Support for Hive (support for s3a://) +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ + +# Add Azure ABFS support (support for abfs://) +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ + # We're removing these to make the intermediate layer smaller # This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available # and we are sometimes running into errors because we're out of space. @@ -67,6 +87,9 @@ if [ "${DELETE_CACHES}" = "true" ] ; then rm -rf /stackable/.npm/* rm -rf /stackable/.cache/* fi + +# change groups +chmod -R g=u /stackable EOF @@ -75,9 +98,6 @@ FROM stackable/image/java-base AS final ARG PRODUCT ARG HADOOP ARG RELEASE -ARG AWS_JAVA_SDK_BUNDLE -ARG AZURE_STORAGE -ARG AZURE_KEYVAULT_CORE ARG STACKABLE_USER_UID @@ -106,44 +126,26 @@ LABEL io.k8s.display-name="${NAME}" WORKDIR /stackable COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hadoop-${HADOOP} /stackable/hadoop-${HADOOP} +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/jmx /stackable/jmx -# It is useful to see which version of Hadoop is used at a glance -# Therefore the use of the full name here -# TODO: Do we really need all of Hadoop in here? -COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop /stackable/hadoop-${HADOOP} +COPY hive/licenses /licenses RUN < /stackable/package_manifest.txt +chown ${STACKABLE_USER_UID}:0 /stackable/package_manifest.txt rm -rf /var/cache/yum ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/hive-metastore +chown -h ${STACKABLE_USER_UID}:0 /stackable/hive-metastore ln -s /stackable/hadoop-${HADOOP} /stackable/hadoop - -# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards -# This way the build will fail should one of the files not be available anymore in a later Hadoop version! - -# Add S3 Support for Hive (support for s3a://) -cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/hive-metastore/lib/ -cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/hive-metastore/lib/ - -# Add Azure ABFS support (support for abfs://) -cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/hive-metastore/lib/ -cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/hive-metastore/lib/ -cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/hive-metastore/lib/ - -# All files and folders owned by root group to support running as arbitrary users. -# This is best practice as all container users will belong to the root group (0). -chown -R ${STACKABLE_USER_UID}:0 /stackable -chmod -R g=u /stackable +chown -h ${STACKABLE_USER_UID}:0 /stackable/hadoop EOF -COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/jmx /stackable/jmx -COPY hive/licenses /licenses - # ---------------------------------------- -# Attention: We are changing the group of all files in /stackable directly above +# Attention: # If you do any file based actions (copying / creating etc.) below this comment you # absolutely need to make sure that the correct permissions are applied! # chown ${STACKABLE_USER_UID}:0 From 8e3b39c07311dc7f296cb512724ab52e2ebb46a1 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 27 Mar 2025 12:02:03 +0100 Subject: [PATCH 2/4] adapted changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2897fdb86..e38ad6e91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Fixed + +- hive: reduce docker image size by removing the recursive chown/chmods in the final image ([#1040]). + +[#1040]: https://github.com/stackabletech/docker-images/pull/1040 + ## [25.3.0] - 2025-03-21 ### Added From b32979fd64c2241cb2a97b0f215f39a53b005e9f Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 4 Apr 2025 10:46:36 +0200 Subject: [PATCH 3/4] add permissions check --- hive/Dockerfile | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/hive/Dockerfile b/hive/Dockerfile index 037100e5d..7594d69b4 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -89,7 +89,7 @@ if [ "${DELETE_CACHES}" = "true" ] ; then fi # change groups -chmod -R g=u /stackable +chmod --recursive g=u /stackable EOF @@ -145,12 +145,21 @@ chown -h ${STACKABLE_USER_UID}:0 /stackable/hadoop EOF # ---------------------------------------- -# Attention: -# If you do any file based actions (copying / creating etc.) below this comment you -# absolutely need to make sure that the correct permissions are applied! -# chown ${STACKABLE_USER_UID}:0 +# Checks +# This section is to run final checks to ensure the created final images +# adhere to several minimal requirements like: +# - check file permissions and ownerships # ---------------------------------------- +# Check that permissions and ownership in /stackable are set correctly +# This will fail and stop the build if any mismatches are found. +RUN < Date: Fri, 4 Apr 2025 11:35:16 +0200 Subject: [PATCH 4/4] consolidation --- hive/Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hive/Dockerfile b/hive/Dockerfile index 7594d69b4..acc54fab5 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -136,12 +136,18 @@ microdnf update microdnf clean all rpm -qa --qf "%{NAME}-%{VERSION}-%{RELEASE}\n" | sort > /stackable/package_manifest.txt chown ${STACKABLE_USER_UID}:0 /stackable/package_manifest.txt +chmod g=u /stackable/package_manifest.txt rm -rf /var/cache/yum ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/hive-metastore chown -h ${STACKABLE_USER_UID}:0 /stackable/hive-metastore +chmod g=u /stackable/hive-metastore ln -s /stackable/hadoop-${HADOOP} /stackable/hadoop chown -h ${STACKABLE_USER_UID}:0 /stackable/hadoop +chmod g=u /stackable/hadoop + +# fix missing permissions +chmod --recursive g=u /stackable/jmx EOF # ---------------------------------------- @@ -159,6 +165,7 @@ EOF # ---------------------------------------- # Attention: Do not perform any file based actions (copying/creating etc.) below this comment because the permissions would not be checked. +# ---------------------------------------- USER ${STACKABLE_USER_UID}