Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 201 additions & 0 deletions .github/workflows/build_spark_with_hopsfs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
name: Build Spark with hopsfs
on:
workflow_call:
inputs:
ref:
description: 'The ref to checkout for the spark repo, default is branch-3.5'
required: false
type: string
default: 'branch-3.5'
jira_tag:
description: 'The tag to use for the jira release, default is the version from version.txt'
required: false
type: string
default: 'NOJIRA'
runner:
description: 'The type of runner to use, default is ghrunner-ee8'
required: false
type: string
default: 'ghrunner-ee8'
build:
description: 'Whether to build spark or not, default is false. If this is false then the workflow will only prepare the versioning related outputs.'
required: false
type: boolean
default: true
secrets:
NEXUS_HARBOR_PASSWORD:
required: true
outputs:
pom_version_no_jira:
value: ${{ jobs.build-spark.outputs.pom_version_no_jira }}
description: 'The pom version without the jira tag'
pom_version:
value: ${{ jobs.build-spark.outputs.pom_version }}
description: 'The pom version with the jira tag'
commit_hash:
value: ${{ jobs.build-spark.outputs.commit_hash }}
description: 'The commit hash of the spark repo'
jira_tag:
value: ${{ jobs.build-spark.outputs.jira_tag }}
description: 'The jira tag used for the build'
spark_tar_name:
value: ${{ jobs.build-spark.outputs.spark_tar_name }}
description: 'The name of the spark tar file'
spark_tar_url:
value: ${{ jobs.build-spark.outputs.spark_tar_url }}
description: 'The url of the spark tar file'
hopsfs_version:
value: ${{ jobs.build-spark.outputs.hopsfs_version }}
description: 'The version of hopsfs used in the build'
workflow_dispatch:
inputs:
ref:
description: 'The ref to checkout for the spark repo, default is branch-3.5'
required: false
type: string
default: 'branch-3.5'
jira_tag:
description: 'The tag to use for the jira release, default is the version from version.txt'
required: false
type: string
default: 'NOJIRA'
runner:
description: 'The type of runner to use, default is ghrunner-ee8'
required: false
type: string
default: 'ghrunner-ee8'
build:
description: 'Whether to build spark or not, default is false. If this is false then the workflow will only prepare the versioning related outputs.'
required: false
type: boolean
default: true
# pull_request:
concurrency:
group: build-spark-${{ github.workflow }}-${{ github.job }}-${{ inputs.jira_tag || 'NOJIRA' }}
cancel-in-progress: true

# Used to avoid error on PRs
env:
SPARK_REF: ${{ inputs.ref || 'branch-3.5' }}
JIRA_TAG: ${{ inputs.jira_tag || 'NOJIRA' }}

jobs:
build-spark:
runs-on: ${{ inputs.runner || 'ubuntu-latest' }}
outputs:
pom_version_no_jira: ${{ steps.prep_version.outputs.pom_version_no_jira }}
pom_version: ${{ steps.prep_version.outputs.pom_version }}
commit_hash: ${{ steps.prep_version.outputs.commit_hash }}
jira_tag: ${{ env.JIRA_TAG }}
spark_tar_name: ${{ steps.prep_version.outputs.spark_tar_name }}
spark_tar_url: ${{ steps.prep_version.outputs.spark_tar_url }}
hopsfs_version: ${{ steps.prep_version.outputs.hopsfs_version }}
steps:
- name: Checkout spark repo
uses: actions/checkout@v4
with:
repository: logicalclocks/spark
ref: ${{ env.SPARK_REF }}
path: ${{ github.workspace }}/spark

- name: To build or not to build
id: to_build_or_not_to_build
shell: bash
env:
BUILD_SPARK: ${{ (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ','), 'build-spark')) || inputs.build }}
run: |
if [[ "${{ env.BUILD_SPARK }}" != "true" ]]; then
echo "# :recycle: Not building Spark" >> $GITHUB_STEP_SUMMARY
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "This is a pull request and the 'build-spark' label is not present." >> $GITHUB_STEP_SUMMARY
echo "pull_request_labels=${{ join(github.event.pull_request.labels.*.name, ', ') }}" >> $GITHUB_STEP_SUMMARY
elif [[ "${{ inputs.build || 'false'}}" != "true" ]]; then
echo "The input 'build' is set to false." >> $GITHUB_STEP_SUMMARY
fi
echo "BUILD_SPARK=$BUILD_SPARK" >> $GITHUB_OUTPUT
else
echo "# :white_check_mark: Building Spark" >> $GITHUB_STEP_SUMMARY
echo "BUILD_SPARK=$BUILD_SPARK" >> $GITHUB_OUTPUT
fi

- name: Prep step version
shell: bash
id: prep_version
working-directory: ${{ github.workspace }}/spark
run: |
COMMIT_HASH=$(git rev-parse --short HEAD)
POM_VERSION_NO_JIRA=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive exec:exec)
find . -name "pom.xml" -exec sed -i "s|<version>${POM_VERSION_NO_JIRA}</version>|<version>${POM_VERSION_NO_JIRA%-SNAPSHOT}-${JIRA_TAG}-SNAPSHOT</version>|g" {} \;
POM_VERSION=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive exec:exec)
SPARK_TAR_NAME=spark-${POM_VERSION}-bin-without-hadoop-with-hive.tgz
SPARK_TAR_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${SPARK_TAR_NAME}"
HOPSFS_VERSION=$(mvn -q -Dexec.executable="echo" -Dexec.args='${hadoop.version}' --non-recursive exec:exec)

echo "POM_VERSION_NO_JIRA=${POM_VERSION_NO_JIRA}" >> $GITHUB_ENV
echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_ENV
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
echo "SPARK_TAR_NAME=${SPARK_TAR_NAME}" >> $GITHUB_ENV
echo "SPARK_TAR_URL=${SPARK_TAR_URL}" >> $GITHUB_ENV
echo "HOPSFS_VERSION=${HOPSFS_VERSION}" >> $GITHUB_ENV

echo "POM_VERSION_NO_JIRA=${POM_VERSION_NO_JIRA}" >> $GITHUB_STEP_SUMMARY
echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_STEP_SUMMARY
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_STEP_SUMMARY
echo "SPARK_TAR_NAME=${SPARK_TAR_NAME}" >> $GITHUB_STEP_SUMMARY
echo "SPARK_TAR_URL=${SPARK_TAR_URL}" >> $GITHUB_STEP_SUMMARY
echo "HOPSFS_VERSION=${HOPSFS_VERSION}" >> $GITHUB_STEP_SUMMARY

echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_OUTPUT
echo "POM_VERSION_NO_JIRA=${POM_VERSION_NO_JIRA}" >> $GITHUB_OUTPUT
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_OUTPUT
echo "SPARK_TAR_NAME=${SPARK_TAR_NAME}" >> $GITHUB_OUTPUT
echo "SPARK_TAR_URL=${SPARK_TAR_URL}" >> $GITHUB_OUTPUT
echo "HOPSFS_VERSION=${HOPSFS_VERSION}" >> $GITHUB_OUTPUT

- name: Set up .m2 settings.xml
shell: bash
if: steps.to_build_or_not_to_build.outputs.BUILD_SPARK == 'true'
env:
M2_HOME: ~/.m2
run: |
echo "M2_HOME var is $M2_HOME" >> $GITHUB_STEP_SUMMARY
mkdir -p ~/.m2
echo "<settings><servers>" > ~/.m2/settings.xml
echo "<server><id>HopsEE</id><username>${{ vars.NEXUS_HARBOR_USER }}</username><password>${{ secrets.NEXUS_HARBOR_PASSWORD }}</password><configuration></configuration></server>" >> ~/.m2/settings.xml
echo "<server><id>HiveEE</id><username>${{ vars.NEXUS_HARBOR_USER }}</username><password>${{ secrets.NEXUS_HARBOR_PASSWORD }}</password><configuration></configuration></server>" >> ~/.m2/settings.xml
echo "</servers></settings>" >> ~/.m2/settings.xml


- name: Cache maven
id: cache-maven
if: steps.to_build_or_not_to_build.outputs.BUILD_SPARK == 'true'
uses: actions/cache@v4
with:
path: |
~/.m2
!~/.m2/settings.xml
key: ${{ runner.os }}-maven-spark-${{ hashFiles('spark/**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-spark-


- name: Build spark and spark-packaging
shell: bash
if: steps.to_build_or_not_to_build.outputs.BUILD_SPARK == 'true'
working-directory: ${{ github.workspace }}/spark
env:
POM_VERSION: ${{ env.POM_VERSION }}
M2_HOME: ~/.m2
run: |
./dev/make-distribution.sh --name without-hadoop-with-hive --tgz "-Pkubernetes,hadoop-provided,parquet-provided,hive,hadoop-cloud,bigtop-dist"

- name: Upload spark-packaging artifact to Nexus
shell: bash
if: steps.to_build_or_not_to_build.outputs.BUILD_SPARK == 'true'
working-directory: ${{ github.workspace }}/spark
env:
M2_HOME: ~/.m2
run: |
curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file spark-$POM_VERSION-bin-without-hadoop-with-hive.tgz "${SPARK_TAR_URL}"
export MAVEN_OPTS="${MAVEN_OPTS:--Xss128m -Xmx4g -XX:ReservedCodeCacheSize=128m}"
./build/mvn deploy -DskipTests -Dmaven.javadoc.skip=true -Dmaven.scaladoc.skip=true -Dmaven.source.skip -Dcyclonedx.skip=true -Pkubernetes,hadoop-provided,parquet-provided,hive,hadoop-cloud
2 changes: 1 addition & 1 deletion dev/make-distribution.sh
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ export MAVEN_OPTS="${MAVEN_OPTS:--Xss128m -Xmx4g -XX:ReservedCodeCacheSize=128m}
# Store the command as an array because $MVN variable might have spaces in it.
# Normal quoting tricks don't work.
# See: http://mywiki.wooledge.org/BashFAQ/050
BUILD_COMMAND=("$MVN" clean package \
BUILD_COMMAND=("$MVN" clean package -U \
-DskipTests \
-Dmaven.javadoc.skip=true \
-Dmaven.scaladoc.skip=true \
Expand Down
24 changes: 20 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@
<slf4j.version>2.0.17</slf4j.version>
<log4j.version>2.24.3</log4j.version>
<!-- make sure to update IsolatedClientLoader whenever this version is changed -->
<hadoop.version>3.2.0.16-EE-SNAPSHOT</hadoop.version>
<hadoop.version>3.2.0.17-EE-RC1</hadoop.version>
<hadoop.group>io.hops</hadoop.group>
<!-- SPARK-41247: When updating `protobuf.version`, also need to update `protoVersion` in `SparkBuild.scala` -->
<protobuf.version>3.23.4</protobuf.version>
Expand All @@ -133,8 +133,8 @@
<hive.group>io.hops.hive</hive.group>
<hive.classifier>core</hive.classifier>
<!-- Version used in Maven Hive dependency -->
<hive.version>3.0.0.13.5</hive.version>
<hive23.version>3.0.0.13.5</hive23.version>
<hive.version>3.0.0.13.10-IS-153-SNAPSHOT</hive.version>
<hive23.version>3.0.0.13.10-IS-153-SNAPSHOT</hive23.version>
<!-- Version used for internal directory structure -->
<hive.version.short>3.0</hive.version.short>
<!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
Expand Down Expand Up @@ -362,9 +362,20 @@
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>HiveEE</id>
<name>Hive Release Repository</name>
<url>https://nexus.hops.works/repository/hive-artifacts</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>Hops</id>
<name>Hops Repo</name>
<name>Hops Archiva Release Repository</name>
<url>https://archiva.hops.works/repository/Hops/</url>
<releases>
<enabled>true</enabled>
Expand Down Expand Up @@ -4029,5 +4040,10 @@
<name>Hops Release Repository</name>
<url>https://nexus.hops.works/repository/hops-artifacts</url>
</repository>
<snapshotRepository>
<id>HopsEE</id>
<name>Hops Repo</name>
<url>https://nexus.hops.works/repository/hops-artifacts/</url>
</snapshotRepository>
</distributionManagement>
</project>
Loading