Skip to content

Commit a1446e4

Browse files
committed
new spark-k8s/hbase-connectors image
1 parent f80f07c commit a1446e4

6 files changed

Lines changed: 132 additions & 92 deletions

File tree

spark-k8s/Dockerfile.3

Lines changed: 1 addition & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -33,96 +33,7 @@ EOF
3333

3434
# hbase-connectors-builder: Build the Spark HBase connector and copy
3535
# required JARs into /stackable/spark/jars
36-
FROM local-image/java-devel AS hbase-connectors-builder
37-
38-
ARG PRODUCT_VERSION
39-
ARG RELEASE_VERSION
40-
ARG HADOOP_HADOOP_VERSION
41-
# Reassign the arg to `HADOOP_VERSION` for better readability.
42-
ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION}
43-
ARG HBASE_VERSION
44-
ARG HBASE_CONNECTOR_VERSION
45-
ARG STACKABLE_USER_UID
46-
47-
WORKDIR /stackable
48-
49-
# Copy the pom.xml file from the patched Spark source code to read the
50-
# versions used by Spark. The pom.xml defines child modules which are
51-
# not required and not copied, therefore mvn must be called with the
52-
# parameter --non-recursive.
53-
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
54-
/stackable/src/spark-k8s/patchable-work/worktree/${PRODUCT_VERSION}/pom.xml \
55-
spark/
56-
57-
# Patch the hbase-connectors source code
58-
WORKDIR /stackable
59-
60-
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
61-
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR_VERSION}
62-
63-
RUN <<EOF
64-
65-
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR_VERSION})/spark"
66-
67-
NEW_VERSION="${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}"
68-
69-
mvn versions:set -DnewVersion=$NEW_VERSION
70-
71-
# Create snapshot of the source code including custom patches
72-
tar -czf /stackable/hbase-connector-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}-src.tar.gz .
73-
74-
# Building the hbase-connectors with JDK 17 is not yet supported, see
75-
# https://github.com/apache/hbase-connectors/pull/132.
76-
# As there are no JDK profiles, access to the non-public elements must
77-
# be enabled with --add-opens, see https://openjdk.org/jeps/403 and
78-
# https://openjdk.org/jeps/261#Breaking-encapsulation.
79-
export JDK_JAVA_OPTIONS="\
80-
--add-opens java.base/java.lang=ALL-UNNAMED \
81-
--add-opens java.base/java.util=ALL-UNNAMED"
82-
83-
# Get the Scala version used by Spark
84-
SCALA_VERSION=$(grep "scala.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
85-
86-
# Get the Scala binary version used by Spark
87-
SCALA_BINARY_VERSION=$(grep "scala.binary.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
88-
89-
# Build the Spark HBase connector
90-
# Skip the tests because the MiniHBaseCluster does not get ready for
91-
# whatever reason:
92-
# Caused by: java.lang.RuntimeException: Master not active after 30000ms
93-
# at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
94-
# at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
95-
# at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
96-
# at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
97-
mvn \
98-
--batch-mode \
99-
--no-transfer-progress \
100-
--define spark.version="${PRODUCT_VERSION}" \
101-
--define scala.version="${SCALA_VERSION}" \
102-
--define scala.binary.version="${SCALA_BINARY_VERSION}" \
103-
--define hadoop-three.version="${HADOOP_VERSION}" \
104-
--define hbase.version="${HBASE_VERSION}" \
105-
--define skipTests \
106-
--define maven.test.skip=true \
107-
clean package
108-
109-
mkdir -p /stackable/spark/jars
110-
ln -s "$(pwd)/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}.jar" /stackable/spark/jars/hbase-spark-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}.jar
111-
112-
cd /stackable/spark/jars
113-
114-
# Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
115-
# which is required by the connector.
116-
# Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
117-
# log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
118-
# classpath as long as they have the same version.
119-
mvn --non-recursive --file /stackable/spark/pom.xml \
120-
dependency:copy \
121-
-Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
122-
-DoutputDirectory=./jars
123-
chmod g=u /stackable/hbase-connector-${HBASE_CONNECTOR_VERSION}-stackable${RELEASE_VERSION}-src.tar.gz .
124-
EOF
125-
36+
FROM local-image/spark-k8s/hbase-connectors AS hbase-connectors-builder
12637

12738
# spark-builder: Build Spark into /stackable/spark-${PRODUCT_VERSION}/dist,
12839
# download additional JARs and perform checks

spark-k8s/boil-config.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ containerfile = "Dockerfile.3"
66
java-base = "17"
77
java-devel = "17"
88
hbase = "2.6.3"
9+
"spark-k8s/hbase-connectors" = "1.0.1_3"
910

1011
[versions."3.5.7".build-arguments]
1112
python-version = "3.11"
@@ -17,7 +18,7 @@ stax2-api-version = "4.2.1" # needs to match the jackson version h
1718
woodstox-core-version = "6.5.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
1819
jmx-exporter-version = "1.3.0"
1920
tini-version = "0.19.0"
20-
hbase-connector-version = "1.0.1"
21+
hbase-connector-version = "1.0.1_3"
2122

2223
[versions."4.1.1"]
2324
containerfile = "Dockerfile.4"
@@ -38,4 +39,3 @@ stax2-api-version = "4.2.2" # needs to match the jackson version h
3839
woodstox-core-version = "7.1.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.20.0/dependencies
3940
jmx-exporter-version = "1.3.0"
4041
tini-version = "0.19.0"
41-
# hbase-connector-version = "1.0.1" # Not yet available for Spark 4.x https://github.com/apache/hbase-connectors/pull/130
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7
2+
# check=error=true
3+
4+
# The purpose of this stage is to gather jars and environment variables needed in the final stage.
5+
# These are collected in the /stackable/spark directory.
6+
FROM local-image/java-devel AS spark-source-builder
7+
8+
ARG RELEASE_VERSION
9+
ARG SPARK_VERSION
10+
ARG STACKABLE_USER_UID
11+
12+
WORKDIR /stackable
13+
14+
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/patches/patchable.toml /stackable/src/spark-k8s/stackable/patches/patchable.toml
15+
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/patches/${SPARK_VERSION} /stackable/src/spark-k8s/stackable/patches/${SPARK_VERSION}
16+
17+
RUN <<EOF
18+
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s ${SPARK_VERSION})"
19+
20+
# NEW_VERSION="${SPARK_VERSION}-stackable${RELEASE_VERSION}"
21+
22+
# mvn versions:set -DnewVersion=$NEW_VERSION
23+
24+
mkdir -p /stackable/spark/jars
25+
26+
# Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
27+
# which is required by the connector.
28+
# Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
29+
# log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
30+
# classpath as long as they have the same version.
31+
mvn --non-recursive --file pom.xml \
32+
dependency:copy \
33+
-Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
34+
-DoutputDirectory=/stackable/spark/jars
35+
36+
37+
# Create an environment files with properties needed to build the connector
38+
# Get the Scala version used by Spark
39+
SCALA_VERSION=$(grep "scala.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
40+
41+
# Get the Scala binary version used by Spark
42+
SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}')
43+
44+
echo "SCALA_VERSION=${SCALA_VERSION}" > /stackable/spark/env
45+
echo "SCALA_BINARY_VERSION=${SCALA_BINARY_VERSION}" >> /stackable/spark/env
46+
echo "SPARK_VERSION=${SPARK_VERSION}" >> /stackable/spark/env
47+
EOF
48+
49+
# hbase-connectors-builder: Build the Spark HBase connector and copy
50+
# required JARs into /stackable/spark/jars
51+
FROM local-image/java-devel AS final
52+
53+
ARG PRODUCT_VERSION
54+
ARG RELEASE_VERSION
55+
ARG HADOOP_VERSION
56+
ARG HBASE_VERSION
57+
ARG STACKABLE_USER_UID
58+
59+
# Patch the hbase-connectors source code
60+
WORKDIR /stackable
61+
62+
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/patchable.toml /stackable/src/spark-k8s/hbase-connectors/stackable/patches/patchable.toml
63+
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${PRODUCT_VERSION} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${PRODUCT_VERSION}
64+
65+
# Copy jars and env from spark-source-builder
66+
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
67+
/stackable/spark/jars \
68+
spark/jars
69+
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
70+
/stackable/spark/env \
71+
spark/env
72+
73+
RUN <<EOF
74+
75+
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${PRODUCT_VERSION})"/spark
76+
77+
NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"
78+
79+
mvn versions:set -DnewVersion=$NEW_VERSION
80+
81+
# Create snapshot of the source code including custom patches
82+
tar -czf /stackable/hbase-connector-${NEW_VERSION}-src.tar.gz .
83+
84+
# Building the hbase-connectors with JDK 17 is not yet supported, see
85+
# https://github.com/apache/hbase-connectors/pull/132.
86+
# As there are no JDK profiles, access to the non-public elements must
87+
# be enabled with --add-opens, see https://openjdk.org/jeps/403 and
88+
# https://openjdk.org/jeps/261#Breaking-encapsulation.
89+
export JDK_JAVA_OPTIONS="\
90+
--add-opens java.base/java.lang=ALL-UNNAMED \
91+
--add-opens java.base/java.util=ALL-UNNAMED"
92+
93+
source /stackable/spark/env
94+
95+
# Build the Spark HBase connector
96+
# Skip the tests because the MiniHBaseCluster does not get ready for
97+
# whatever reason:
98+
# Caused by: java.lang.RuntimeException: Master not active after 30000ms
99+
# at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
100+
# at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
101+
# at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
102+
# at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
103+
mvn \
104+
--batch-mode \
105+
--no-transfer-progress \
106+
--define spark.version="${SPARK_VERSION}" \
107+
--define scala.version="${SCALA_VERSION}" \
108+
--define scala.binary.version="${SCALA_BINARY_VERSION}" \
109+
--define hadoop-three.version="${HADOOP_VERSION}" \
110+
--define hbase.version="${HBASE_VERSION}" \
111+
--define skipTests \
112+
--define maven.test.skip=true \
113+
clean package
114+
115+
116+
cp "$(pwd)/hbase-spark/target/hbase-spark-${NEW_VERSION}.jar" /stackable/spark/jars/hbase-spark-${NEW_VERSION}.jar
117+
118+
chmod g=u /stackable/hbase-connector-${NEW_VERSION}-src.tar.gz .
119+
EOF
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[versions."1.0.1_3"]
2+
containerfile = "Dockerfile"
3+
4+
[versions."1.0.1_3".local-images]
5+
java-devel = "17"
6+
7+
[versions."1.0.1_3".build-arguments]
8+
spark-version = "3.5.7"
9+
hadoop-version = "3.4.2"
10+
hbase-version = "2.6.3"

spark-k8s/hbase-connectors/stackable/patches/1.0.1/0001-Fix-protobuf-on-aarch64.patch renamed to spark-k8s/hbase-connectors/stackable/patches/1.0.1_3/0001-Fix-protobuf-on-aarch64.patch

File renamed without changes.

spark-k8s/hbase-connectors/stackable/patches/1.0.1/patchable.toml renamed to spark-k8s/hbase-connectors/stackable/patches/1.0.1_3/patchable.toml

File renamed without changes.

0 commit comments

Comments
 (0)