'
+
+inserted = False
+
+def parse_version(v):
+ return [int(p) for p in v.strip().split(".")]
+
+def vercmp(v1, v2):
+ a = parse_version(v1)
+ b = parse_version(v2)
+ return (a > b) - (a < b)
+
+with open("documentation.md") as f:
+ lines = f.readlines()
+
+with open("documentation.md", "w") as f:
+ if is_preview:
+ in_preview_section = False
+ for i, line in enumerate(lines):
+ if '
Documentation for preview releases:
' in line:
+ in_preview_section = True
+ f.write(line)
+ continue
+
+ if in_preview_section and re.search(r'docs/\d+\.\d+\.\d+-preview\d*/', line):
+ existing_version = re.search(r'docs/(\d+\.\d+\.\d+-preview\d*)/', line).group(1)
+
+ if existing_version == release_version:
+ inserted = True # Already exists, don't add
+ elif not inserted:
+ base_existing = re.sub(r'-preview\d*$', '', existing_version)
+ preview_num_existing = int(re.search(r'preview(\d*)', existing_version).group(1) or "0")
+ preview_num_new = int(re.search(r'preview(\d*)', release_version).group(1) or "0")
+
+ if (vercmp(base_version, base_existing) > 0) or \
+ (vercmp(base_version, base_existing) == 0 and preview_num_new >= preview_num_existing):
+ f.write(preview_newline + "\n")
+ inserted = True
+
+ f.write(line)
+ continue
+
+ if in_preview_section and "" in line and not inserted:
+ f.write(preview_newline + "\n")
+ inserted = True
+ f.write(line)
+ else:
+ for line in lines:
+ match = re.search(r'docs/(\d+\.\d+\.\d+)/', line)
+ if not inserted and match:
+ existing_version = match.group(1)
+ if vercmp(release_version, existing_version) >= 0:
+ f.write(stable_newline + "\n")
+ inserted = True
+ f.write(line)
+ if not inserted:
+ f.write(stable_newline + "\n")
+EOF
+
+ echo "Edited documentation.md"
+
+ # 2. Add download link to js/downloads.js
+ if [[ "$RELEASE_VERSION" =~ -preview[0-9]*$ ]]; then
+ echo "Skipping js/downloads.js for preview release: $RELEASE_VERSION"
+ else
+ RELEASE_DATE=$(TZ=America/Los_Angeles date +"%m/%d/%Y")
+ IFS='.' read -r rel_maj rel_min rel_patch <<< "$RELEASE_VERSION"
+ NEW_PACKAGES="packagesV14"
+ if [[ "$rel_maj" -ge 4 ]]; then
+ NEW_PACKAGES="packagesV15"
+ fi
+
+ python3 < b) - (a < b)
+
+inserted = replaced = False
+
+with open("js/downloads.js") as f:
+ lines = f.readlines()
+
+with open("js/downloads.js", "w") as f:
+ for line in lines:
+ m = re.search(r'addRelease\("(\d+\.\d+\.\d+)"', line)
+ if m:
+ existing_version = m.group(1)
+ cmp_result = vercmp(release_version, existing_version)
+ ex_major, ex_minor, ex_patch = parse_version(existing_version)
+
+ if cmp_result == 0:
+ f.write(newline + "\n")
+ replaced = True
+ elif not replaced and ex_major == new_major and ex_minor == new_minor:
+ f.write(newline + "\n")
+ replaced = True
+ elif not replaced and not inserted and cmp_result > 0:
+ f.write(newline + "\n")
+ f.write(line)
+ inserted = True
+ else:
+ f.write(line)
+ else:
+ f.write(line)
+ if not replaced and not inserted:
+ f.write(newline + "\n")
+EOF
+
+ echo "Edited js/downloads.js"
+ fi
+
+ # 3. Add news post
+ RELEASE_DATE=$(TZ=America/Los_Angeles date +"%Y-%m-%d")
+ FILENAME="news/_posts/${RELEASE_DATE}-spark-${RELEASE_VERSION//./-}-released.md"
+ mkdir -p news/_posts
+
+ if [[ "$RELEASE_VERSION" =~ -preview[0-9]*$ ]]; then
+ BASE_VERSION="${RELEASE_VERSION%%-preview*}"
+ cat > "$FILENAME" <Spark ${RELEASE_VERSION} release.
+This preview is not a stable release in terms of either API or functionality, but it is meant to give the community early
+access to try the code that will become Spark ${BASE_VERSION}. If you would like to test the release,
+please download it, and send feedback using either
+mailing lists or
+JIRA.
+The documentation is available at the link.
+
+We'd like to thank our contributors and users for their contributions and early feedback to this release. This release would not have been possible without you.
+EOF
+
+ else
+ cat > "$FILENAME" <Apache Spark ${RELEASE_VERSION}! Visit the release notes to read about the new features, or download the release today.
+EOF
+ fi
+
+ echo "Created $FILENAME"
+
+ # 4. Add release notes with Python to extract JIRA version ID
+ if [[ "$RELEASE_VERSION" =~ -preview[0-9]*$ ]]; then
+ echo "Skipping JIRA release notes for preview release: $RELEASE_VERSION"
+ else
+ RELEASE_DATE=$(TZ=America/Los_Angeles date +"%Y-%m-%d")
+ JIRA_PROJECT_ID=12315420
+ JIRA_URL="https://issues.apache.org/jira/rest/api/2/project/SPARK/versions"
+ JSON=$(curl -s "$JIRA_URL")
+
+ VERSION_ID=$(python3 - <&2
+ fi
+
+ JIRA_LINK="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=${JIRA_PROJECT_ID}&version=${VERSION_ID}"
+
+ IFS='.' read -r rel_maj rel_min rel_patch <<< "$RELEASE_VERSION"
+ if [[ "$rel_patch" -eq 0 ]]; then
+ ACKNOWLEDGE="patches and features to this release."
+ BODY="Apache Spark ${RELEASE_VERSION} is a new feature release. It introduces new functionality and improvements. We encourage users to try it and provide feedback."
+ else
+ ACKNOWLEDGE="patches to this release."
+ BODY="Apache Spark ${RELEASE_VERSION} is a maintenance release containing security and correctness fixes. This release is based on the branch-${rel_maj}.${rel_min} maintenance branch of Spark. We strongly recommend all ${rel_maj}.${rel_min} users to upgrade to this stable release."
+ fi
+
+ BODY+="
+
+You can find the list of resolved issues and detailed changes in the [JIRA release notes](${JIRA_LINK}).
+
+We would like to acknowledge all community members for contributing ${ACKNOWLEDGE}"
+
+ FILENAME="releases/_posts/${RELEASE_DATE}-spark-release-${RELEASE_VERSION}.md"
+ mkdir -p releases/_posts
+ cat > "$FILENAME" < $RELEASE_VERSION (preview release)"
+
+ else
+ LINK_PATH="site/docs/latest"
+
+ if [[ "$rel_patch" -eq 0 ]]; then
+ if [[ -L "$LINK_PATH" ]]; then
+ CURRENT_TARGET=$(readlink "$LINK_PATH")
+ else
+ CURRENT_TARGET=""
+ fi
+
+ if [[ "$CURRENT_TARGET" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+ IFS='.' read -r cur_maj cur_min cur_patch <<< "$CURRENT_TARGET"
+
+ if [[ "$rel_maj" -gt "$cur_maj" ]]; then
+ ln -sfn "$RELEASE_VERSION" "$LINK_PATH"
+ echo "Updated symlink $LINK_PATH -> $RELEASE_VERSION (major version increased)"
+ elif [[ "$rel_maj" -eq "$cur_maj" && "$rel_min" -gt "$cur_min" ]]; then
+ ln -sfn "$RELEASE_VERSION" "$LINK_PATH"
+ echo "Updated symlink $LINK_PATH -> $RELEASE_VERSION (minor version increased)"
+ else
+ echo "Symlink $LINK_PATH points to $CURRENT_TARGET with equal or newer major.minor, no change"
+ fi
+ else
+ echo "No valid existing version target."
+ fi
+ else
+ echo "Patch release detected ($RELEASE_VERSION), not updating symlink"
+ fi
+ fi
+
+ git add .
+ git commit -m "Add release docs for Apache Spark $RELEASE_VERSION"
+ git push origin HEAD:asf-site
+ cd ..
+ echo "release docs uploaded"
+ rm -rf spark-website
+
# Moves the docs from dev directory to release directory.
echo "Moving Spark docs to the release directory"
svn mv --username "$ASF_USERNAME" --password "$ASF_PASSWORD" -m"Apache Spark $RELEASE_VERSION" \
@@ -216,7 +529,6 @@ if [[ "$1" == "finalize" ]]; then
echo "Done."
- # TODO: Test it in the actual official release
# Remove old releases from the mirror
# Extract major.minor prefix
RELEASE_SERIES=$(echo "$RELEASE_VERSION" | cut -d. -f1-2)
@@ -615,13 +927,15 @@ if [[ "$1" == "publish-release" ]]; then
# Calculate deadline in Pacific Time (PST/PDT)
DEADLINE=$(TZ=America/Los_Angeles date -d "+4 days" "+%a, %d %b %Y %H:%M:%S %Z")
+ PYSPARK_VERSION=`echo "$RELEASE_VERSION" | sed -e "s/-/./" -e "s/preview/dev/"`
JIRA_API_URL="https://issues.apache.org/jira/rest/api/2/project/SPARK/versions"
+ SPARK_VERSION_BASE=$(echo "$SPARK_VERSION" | sed 's/-preview[0-9]*//')
JIRA_VERSION_ID=$(curl -s "$JIRA_API_URL" | \
# Split JSON objects by replacing '},{' with a newline-separated pattern
tr '}' '\n' | \
# Find the block containing the exact version name
- grep -F "\"name\":\"$SPARK_VERSION\"" -A 5 | \
+ grep -F "\"name\":\"$SPARK_VERSION_BASE\"" -A 5 | \
# Extract the line with "id"
grep '"id"' | \
# Extract the numeric id value (assuming "id":"123456")
@@ -693,7 +1007,7 @@ EOF
echo "reporting any regressions."
echo
echo "If you're working in PySpark you can set up a virtual env and install"
- echo "the current RC via \"pip install https://dist.apache.org/repos/dist/dev/spark/${GIT_REF}-bin/pyspark-${SPARK_VERSION}.tar.gz\""
+ echo "the current RC via \"pip install https://dist.apache.org/repos/dist/dev/spark/${GIT_REF}-bin/pyspark-${PYSPARK_VERSION}.tar.gz\""
echo "and see if anything important breaks."
echo "In the Java/Scala, you can add the staging repository to your project's resolvers and test"
echo "with the RC (make sure to clean up the artifact cache before/after so"
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index dbf0cb34c5353..b4034c171fd3a 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar
opentracing-api/0.33.0//opentracing-api-0.33.0.jar
opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/1.9.6/shaded-protobuf/orc-core-1.9.6-shaded-protobuf.jar
-orc-mapreduce/1.9.6/shaded-protobuf/orc-mapreduce-1.9.6-shaded-protobuf.jar
-orc-shims/1.9.6//orc-shims-1.9.6.jar
+orc-core/1.9.7/shaded-protobuf/orc-core-1.9.7-shaded-protobuf.jar
+orc-mapreduce/1.9.7/shaded-protobuf/orc-mapreduce-1.9.7-shaded-protobuf.jar
+orc-shims/1.9.7//orc-shims-1.9.7.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index f0b88666c040d..811b2dba3768e 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -43,7 +43,7 @@ RUN mkdir -p /usr/local/pypy/pypy3.8 && \
ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3.8 && \
ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3
-RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
+RUN curl -sS https://bootstrap.pypa.io/pip/3.8/get-pip.py | pypy3
RUN $APT_INSTALL gnupg ca-certificates pandoc
RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index ef7c010e930a1..a182981dfa063 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -167,6 +167,7 @@ export MAVEN_OPTS="${MAVEN_OPTS:--Xss128m -Xmx4g -XX:ReservedCodeCacheSize=128m}
# Normal quoting tricks don't work.
# See: http://mywiki.wooledge.org/BashFAQ/050
BUILD_COMMAND=("$MVN" clean package \
+ -s ~/.m2/settings.xml \
-DskipTests \
-Dmaven.javadoc.skip=true \
-Dmaven.scaladoc.skip=true \
@@ -187,7 +188,13 @@ echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DI
echo "Build flags: $@" >> "$DISTDIR/RELEASE"
# Copy jars
-cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
+# Fabio: copy jars from the spark-assemmbly-*-dist directory which
+# contains the distribution prepared by the maven-assembly-plugin
+# The maven-assembly-plugin has rules to remove the hadoop/hops dependencies
+# from the final distribution
+# You need to run the -Pbigtop-dist profile for this to work
+cp "$SPARK_HOME"/assembly/target/spark-assembly_"$SCALA_VERSION"-"$VERSION"-dist/lib/* "$DISTDIR/jars/"
+cp "$SPARK_HOME"/assembly/target/spark-assembly_"$SCALA_VERSION"-"$VERSION"-dist/*.jar "$DISTDIR/jars/"
# Only create the yarn directory if the yarn artifacts were built.
if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 36cc7a4f994dc..636ed0ffb0cad 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -37,6 +37,9 @@ HADOOP_HIVE_PROFILES=(
hadoop-3-hive-2.3
)
+MVN_EXEC_PLUGIN_VERSION=$(build/mvn help:evaluate \
+ -Dexpression=exec-maven-plugin.version -q -DforceStdout | grep -E "[0-9]+\.[0-9]+\.[0-9]+")
+
# We'll switch the version to a temp. one, publish POMs using that new version, then switch back to
# the old version. We need to do this because the `dependency:build-classpath` task needs to
# resolve Spark's internal submodule dependencies.
@@ -47,7 +50,7 @@ OLD_VERSION=$($MVN -q \
-Dexec.executable="echo" \
-Dexec.args='${project.version}' \
--non-recursive \
- org.codehaus.mojo:exec-maven-plugin:1.6.0:exec | grep -E '[0-9]+\.[0-9]+\.[0-9]+')
+ org.codehaus.mojo:exec-maven-plugin:${MVN_EXEC_PLUGIN_VERSION}:exec | grep -E '[0-9]+\.[0-9]+\.[0-9]+')
# dependency:get for guava and jetty-io are workaround for SPARK-37302.
GUAVA_VERSION=$(build/mvn help:evaluate -Dexpression=guava.version -q -DforceStdout | grep -E "^[0-9.]+$")
build/mvn dependency:get -Dartifact=com.google.guava:guava:${GUAVA_VERSION} -q
@@ -61,7 +64,7 @@ SCALA_BINARY_VERSION=$($MVN -q \
-Dexec.executable="echo" \
-Dexec.args='${scala.binary.version}' \
--non-recursive \
- org.codehaus.mojo:exec-maven-plugin:1.6.0:exec | grep -E '[0-9]+\.[0-9]+')
+ org.codehaus.mojo:exec-maven-plugin:${MVN_EXEC_PLUGIN_VERSION}:exec | grep -E '[0-9]+\.[0-9]+')
if [[ "$SCALA_BINARY_VERSION" != "2.12" ]]; then
echo "Skip dependency testing on $SCALA_BINARY_VERSION"
exit 0
diff --git a/docs/_config.yml b/docs/_config.yml
index d19efdb99ba06..acfa53576a9ee 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
# These allow the documentation to be updated with newer releases
# of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.8-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.8
+SPARK_VERSION: 3.5.7-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.7
SCALA_BINARY_VERSION: "2.12"
SCALA_VERSION: "2.12.18"
MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
inputSelector: '#docsearch-input',
enhancedSearchInput: true,
algoliaOptions: {
- 'facetFilters': ["version:3.5.8"]
+ 'facetFilters': ["version:3.5.7"]
},
debug: false // Set debug to true if you want to inspect the dropdown
});
diff --git a/docs/_layouts/redirect.html b/docs/_layouts/redirect.html
index 72a0462fc6a30..6177f91b7d793 100644
--- a/docs/_layouts/redirect.html
+++ b/docs/_layouts/redirect.html
@@ -19,10 +19,11 @@
Redirecting…
-
-
-
+{% assign redirect_url = page.redirect.to | replace_first: '/', '' | prepend: rel_path_to_root | append: '.html' %}
+
+
+