Skip to content

Commit ed8ed07

Browse files
committed
Downgraded to java 1.8.
Removed the plugin module. Added one more example. Updated README
1 parent 564fae0 commit ed8ed07

File tree

3,085 files changed

+257
-2941
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,085 files changed

+257
-2941
lines changed

README.md

+53-6
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,67 @@ A Spark bootstrap project written in Scala with gradle as build tool.
88
- [Gradle](https://gradle.org/)
99
- [Scala](https://www.scala-lang.org/)
1010

11+
## Libraries Included
12+
13+
- JavaVersion=1.8
14+
- sparkVersion=3.4.1
15+
- scalaVersion=2.12
16+
- deltaVersion=2.4.0
17+
1118
### Build
1219

1320
`./gradlew clean build`
1421

22+
### Test
23+
24+
./gradlew check
25+
26+
### Run TestCoverage
27+
28+
./gradlew reportTestScoverage
29+
30+
## Run
31+
32+
#### Run sparkSubmit task
33+
34+
- Runs a `spark-submit` with class `dev.template.spark.RddCollect`
35+
36+
./gradlew sparkSubmit
37+
38+
#### Spark Submit commands in shell
39+
40+
> A local spark instance must be up and running: http://localhost:8080/
41+
42+
spark-3.4.1-bin-hadoop3/bin/spark-submit \
43+
--verbose --class dev.template.spark.Main \
44+
--packages io.delta:delta-core_2.12:2.4.0 \
45+
--master spark://localhost:7077 \
46+
--driver-memory 1g \
47+
--executor-memory 1g \
48+
--executor-cores 2 \
49+
build/libs/spark-scala-gradle-bootstrap-2.12.0-all.jar
50+
51+
spark-3.4.1-bin-hadoop3/bin/spark-submit --class dev.template.spark.RddCollect \
52+
--master spark://localhost:7077 \
53+
build/libs/spark-scala-gradle-bootstrap-2.12.0-all.jar
54+
55+
spark-3.4.1-bin-hadoop3/bin/spark-submit --class dev.template.spark.CovidDataPartitioner \
56+
--packages io.delta:delta-core_2.12:2.4.0 \
57+
--master spark://localhost:7077 \
58+
--driver-memory 1g \
59+
--executor-memory 1g \
60+
--executor-cores 2 \
61+
build/libs/spark-scala-gradle-bootstrap-2.12.0-all.jar \
62+
src/main/resources/us-counties-recent.csv \
63+
/tmp/partitioned-covid-data
64+
1565
### Coverage
1666

1767
https://github.com/scoverage/gradle-scoverage
1868

1969
## Functional Test Examples
2070

21-
https://github.com/scoverage/gradle-scoverage/blob/master/build.gradle
22-
23-
## Libraries Included
24-
25-
- Spark - 3.4.1
71+
https://github.com/scoverage/gradle-scoverage/blob/master/build.gradle#L59C1-L59C52
2672

2773
## Useful Links
2874

@@ -34,11 +80,12 @@ https://github.com/scoverage/gradle-scoverage/blob/master/build.gradle
3480

3581
## Issues or Suggestions
3682

83+
https://github.com/mahen-github/spark-scala-gradle-bootstrap/issues
84+
3785
# Learn Spark
3886

3987
https://www.databricks.com/wp-content/uploads/2021/06/Ebook_8-Steps-V2.pdf
4088

4189
# References
4290

4391
https://github.com/spark-examples/spark-scala-examples
44-

build.gradle

+56-67
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,13 @@
1-
//buildscript {
2-
// repositories {
3-
// mavenCentral()
4-
// }
5-
//}
61
plugins {
7-
id 'scala'
8-
id 'application'
9-
id "com.github.spotbugs-base"
10-
id "com.diffplug.spotless"
11-
id 'net.nemerosa.versioning'
12-
id 'com.jfrog.artifactory'
13-
id 'org.scoverage'
14-
id 'template.spark.livyPlugin'
2+
id 'scala'
3+
id 'application'
4+
id "com.github.spotbugs-base"
5+
id "com.diffplug.spotless"
6+
id 'net.nemerosa.versioning'
7+
id 'com.jfrog.artifactory'
8+
id 'org.scoverage'
159
}
1610

17-
//apply plugin: LivyPlugin
1811
apply plugin: 'java'
1912
apply plugin: 'scala'
2013
apply plugin: 'idea'
@@ -31,85 +24,81 @@ sourceCompatibility = JavaVersion.VERSION_1_8
3124
targetCompatibility = JavaVersion.VERSION_1_8
3225

3326
configurations {
34-
provided
27+
provided
3528
}
3629

3730
sourceSets {
38-
main {
39-
compileClasspath += configurations.provided
40-
}
31+
main {
32+
compileClasspath += configurations.provided
33+
}
4134
}
4235

4336
application {
44-
mainClassName = 'dev.template.spark.Main'
37+
mainClassName = 'dev.template.spark.Main'
4538
}
4639

4740

4841
repositories {
49-
mavenCentral()
42+
mavenCentral()
5043
}
5144

5245
dependencies {
53-
implementation "org.scalameta:scalafmt-core_${scalaVersion}:${scalafmt}"
54-
implementation "org.apache.spark:spark-sql_${scalaVersion}:${sparkVersion}"
55-
implementation "org.apache.spark:spark-graphx_${scalaVersion}:${sparkVersion}"
56-
implementation "org.apache.spark:spark-launcher_${scalaVersion}:${sparkVersion}"
57-
implementation "org.apache.spark:spark-catalyst_${scalaVersion}:${sparkVersion}"
58-
implementation "org.apache.spark:spark-streaming_${scalaVersion}:${sparkVersion}"
59-
implementation "org.apache.spark:spark-core_${scalaVersion}:${sparkVersion}"
60-
implementation "commons-io:commons-io:${commonsIO}"
61-
62-
implementation "org.apache.hadoop:hadoop-aws:${hadoopAWS}"
63-
implementation "org.apache.spark:spark-hive_${scalaVersion}:${sparkVersion}"
64-
implementation "io.delta:delta-core_${scalaVersion}:${deltaVersion}"
65-
implementation "com.google.guava:guava:31.1-jre"
66-
compileOnly "org.scala-lang:scala-library:$scalaVersion"
67-
compileOnly "org.scala-lang:scala-compiler:${scalaVersion}"
68-
69-
testImplementation "org.scalatestplus:junit-4-13_${scalaVersion}:3.2.2.0"
70-
testImplementation "junit:junit:${junitVersion}"
71-
testRuntimeOnly "org.scala-lang.modules:scala-xml_${scalaVersion}:1.2.0"
72-
testImplementation 'org.mockito:mockito-core:5.3.1'
73-
74-
testImplementation "org.junit.jupiter:junit-jupiter-api:${jupiterApi}"
75-
testImplementation "org.scalatest:scalatest_${scalaVersion}:${scalaTests}"
46+
implementation "org.scalameta:scalafmt-core_${scalaVersion}:${scalafmt}"
47+
implementation "org.apache.spark:spark-sql_${scalaVersion}:${sparkVersion}"
48+
implementation "org.apache.spark:spark-graphx_${scalaVersion}:${sparkVersion}"
49+
implementation "org.apache.spark:spark-launcher_${scalaVersion}:${sparkVersion}"
50+
implementation "org.apache.spark:spark-catalyst_${scalaVersion}:${sparkVersion}"
51+
implementation "org.apache.spark:spark-streaming_${scalaVersion}:${sparkVersion}"
52+
implementation "org.apache.spark:spark-core_${scalaVersion}:${sparkVersion}"
53+
54+
implementation "org.apache.hadoop:hadoop-aws:${hadoopAWS}"
55+
implementation "org.apache.spark:spark-hive_${scalaVersion}:${sparkVersion}"
56+
implementation "io.delta:delta-core_${scalaVersion}:${deltaVersion}"
57+
compileOnly "org.scala-lang:scala-library:$scalaVersion"
58+
compileOnly "org.scala-lang:scala-compiler:${scalaVersion}"
59+
60+
testImplementation "org.scalatestplus:junit-4-13_${scalaVersion}:3.2.2.0"
61+
testImplementation "junit:junit:${junitVersion}"
62+
testRuntimeOnly "org.scala-lang.modules:scala-xml_${scalaVersion}:1.2.0"
63+
testImplementation 'org.mockito:mockito-core:5.3.1'
64+
65+
testImplementation "org.junit.jupiter:junit-jupiter-api:${jupiterApi}"
66+
testImplementation "org.scalatest:scalatest_${scalaVersion}:${scalaTests}"
7667
}
7768

7869

7970
jar {
80-
classifier 'all'
81-
manifest {
82-
attributes 'Implementation-Title': title,
83-
'Implementation-Version': archiveVersion,
84-
'Main-Class': mainClassFile
85-
}
86-
exclude 'META-INF/*.RSA', 'META-INF/*.SF', 'META-INF/*.DSA'
87-
from files(sourceSets.main.output.classesDirs)
88-
zip64 true
71+
classifier 'all'
72+
manifest {
73+
attributes 'Implementation-Title': title,
74+
'Implementation-Version': archiveVersion,
75+
'Main-Class': mainClassFile
76+
}
77+
exclude 'META-INF/*.RSA', 'META-INF/*.SF', 'META-INF/*.DSA'
78+
from files(sourceSets.main.output.classesDirs)
79+
zip64 true
8980
}
9081

9182

9283
tasks.register('scalaTest', JavaExec) {
93-
dependsOn['testClasses']
94-
mainClass.set("org.scalatest.tools.Runner")
95-
args = ['-R', 'build/classes/scala/test', '-o']
96-
classpath = sourceSets.test.runtimeClasspath
84+
dependsOn['testClasses']
85+
mainClass.set("org.scalatest.tools.Runner")
86+
args = ['-R', 'build/classes/scala/test', '-o']
87+
classpath = sourceSets.test.runtimeClasspath
9788
}
9889

9990
test.dependsOn scalaTest
10091

10192
idea {
102-
module {
103-
// IntelliJ does not know about the standard idiom of provided as used in managing
104-
// uber/shaded jar dependencies. Make it so!
105-
scopes.PROVIDED.plus += [configurations.provided]
106-
}
93+
module {
94+
// IntelliJ does not know about the standard idiom of provided as used in managing
95+
// uber/shaded jar dependencies. Make it so!
96+
scopes.PROVIDED.plus += [configurations.provided]
97+
}
10798
}
10899

109-
def runLivy = tasks.register("runLivy", template.spark.Publish) {
110-
def avscTree = project.fileTree(allAvscs)
111-
112-
targetFiles.from(avscTree)
113-
schemaFile.value(layout.projectDirectory.file("validator/avro-schema.json"))
100+
task sparkSubmit(type: Exec) {
101+
commandLine 'sh', '-c', "/Users/e1xx/spark-3.4.1-bin-hadoop3/bin/spark-submit " +
102+
"--class dev.template.spark.RddCollect" +
103+
" --master spark://localhost:7077 build/libs/spark-scala-gradle-bootstrap-2.12.0-all.jar"
114104
}
115-

gradle.properties

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
sparkVersion=3.4.1
22
scalaVersion=2.12
3-
hadoopAWS=3.3.3
3+
hadoopAWS=3.3.4
44
commonsIO=2.13.0
55
deltaVersion=2.4.0
66
#kafka
@@ -21,3 +21,5 @@ mainClassFile=dev.template.spark.Main
2121
title='spark-scala-gradle-bootstrap'
2222
group='template.spark'
2323
version='1.0.0.SNAPSHOT'
24+
#gradle run time config
25+
org.gradle.jvmargs=-Xmx2048m

gradle/spotless.gradle

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ spotless {
1414
endWithNewline()
1515
}
1616
java {
17-
googleJavaFormat('1.11.0')
17+
googleJavaFormat('1.7')
1818
}
1919
}
2020

settings.gradle

+27-32
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,37 @@
11
pluginManagement {
2-
repositories {
3-
mavenCentral()
4-
maven {
5-
url = uri("https://plugins.gradle.org/m2/")
6-
}
7-
}
8-
plugins {
9-
id 'com.jfrog.artifactory' version '4.29.0'
10-
// id 'ch.ergon.gradle.goodies.versioning' version '1.0.0'
11-
id 'com.github.johnrengelman.shadow' version "7.1.2"
12-
id 'com.github.spotbugs-base' version '5.1.3'
13-
id 'com.diffplug.spotless' version '6.10.0'
14-
id 'net.nemerosa.versioning' version '2.15.1'
15-
// id 'org.scoverage' version '4.0.0'
16-
id("org.scoverage") version "8.0.3"
17-
18-
includeBuild("plugin")
19-
}
2+
repositories {
3+
mavenCentral()
4+
maven {
5+
url = uri("https://plugins.gradle.org/m2/")
6+
}
7+
}
8+
plugins {
9+
id 'com.jfrog.artifactory' version '4.29.0'
10+
id 'com.github.johnrengelman.shadow' version "7.1.2"
11+
id 'com.github.spotbugs-base' version '5.1.3'
12+
id 'com.diffplug.spotless' version '6.10.0'
13+
id 'net.nemerosa.versioning' version '2.15.1'
14+
id 'org.scoverage' version '8.0.3'
15+
}
2016
}
2117

2218
dependencyResolutionManagement {
23-
repositories {
24-
mavenCentral()
25-
maven {
26-
url = uri("https://plugins.gradle.org/m2/")
27-
}
28-
}
19+
repositories {
20+
mavenCentral()
21+
maven {
22+
url = uri("https://plugins.gradle.org/m2/")
23+
}
24+
}
2925
}
3026

3127
buildscript {
32-
repositories {
33-
mavenCentral()
34-
maven {
35-
url = uri("https://plugins.gradle.org/m2/")
36-
}
37-
}
28+
repositories {
29+
mavenCentral()
30+
maven {
31+
url = uri("https://plugins.gradle.org/m2/")
32+
}
33+
}
3834
}
3935

4036
rootProject.name = 'spark-scala-gradle-bootstrap'
41-
//include 'plugin'
42-
assert gradle.gradleVersion =~ "7..+"
37+
assert gradle.gradleVersion =~ "7..+"
Binary file not shown.

0 commit comments

Comments
 (0)