From 9aeba6d0c1699553dbe2c0118127df6c5afc6d26 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sun, 20 Oct 2024 05:38:08 +0800 Subject: [PATCH 1/2] Add example documentation techniques related to running dynamic JVM classpaths (#3770) --- docs/modules/ROOT/nav.adoc | 1 + .../pages/extending/running-jvm-code.adoc | 33 ++++++++++ .../extending/jvmcode/1-subprocess/build.mill | 65 +++++++++++++++++++ .../jvmcode/1-subprocess/foo/generate.groovy | 4 ++ .../jvmcode/1-subprocess/foo/src/Foo.java | 20 ++++++ .../extending/jvmcode/2-inprocess/build.mill | 52 +++++++++++++++ .../jvmcode/2-inprocess/foo/generate.groovy | 4 ++ .../jvmcode/2-inprocess/foo/src/Foo.java | 20 ++++++ example/package.mill | 1 + main/util/src/mill/util/Jvm.scala | 11 ++++ 10 files changed, 211 insertions(+) create mode 100644 docs/modules/ROOT/pages/extending/running-jvm-code.adoc create mode 100644 example/extending/jvmcode/1-subprocess/build.mill create mode 100644 example/extending/jvmcode/1-subprocess/foo/generate.groovy create mode 100644 example/extending/jvmcode/1-subprocess/foo/src/Foo.java create mode 100644 example/extending/jvmcode/2-inprocess/build.mill create mode 100644 example/extending/jvmcode/2-inprocess/foo/generate.groovy create mode 100644 example/extending/jvmcode/2-inprocess/foo/src/Foo.java diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 6a85f69cef6..9db3f01d8c5 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -82,6 +82,7 @@ ** xref:contrib/twirllib.adoc[] ** xref:contrib/versionfile.adoc[] * xref:extending/thirdparty-plugins.adoc[] +* xref:extending/running-jvm-code.adoc[] * xref:extending/writing-plugins.adoc[] * xref:extending/meta-build.adoc[] diff --git a/docs/modules/ROOT/pages/extending/running-jvm-code.adoc b/docs/modules/ROOT/pages/extending/running-jvm-code.adoc new file mode 100644 index 00000000000..30655331322 --- /dev/null +++ b/docs/modules/ROOT/pages/extending/running-jvm-code.adoc @@ -0,0 +1,33 @@ += Running Dynamic JVM Code + +While xref:extending/import-ivy-plugins.adoc[import $ivy] is convenient, +it comes with limitations as the JVM library it imports is global to your build: + +1. The library has to be resolved and downloaded before any part of your build starts. + If your codebase is large and most parts of your build don't use that library, + needing to download the library when working on parts that don't need it can be wasteful + +2. The library can only have one version across the entire build. This can be an issue if + you need to have multiple versions of the library used in different parts of your build. + e.g. different parts of a large Groovy codebase may use different versions of the Groovy + interpreter, and so the Groovy interpreter cannot be included via `import $ivy` because the + different versions would collide. + +3. The library cannot be built as part of your main build. While it is possible to build + it as part of your xref:extending/meta-build.adoc[Meta-Build], that comes with additional + complexity and limitations. In a large codebase, you often end up building modules that + are shared between production deployments as well as local tooling: in such cases + `import $ivy` is not a good fit + + +In scenarios where these limitations cause issues, Mill provides other ways to run arbitrary +JVM code apart from `import $ivy`. + + +== Subprocesses + +include::partial$example/extending/jvmcode/1-subprocess.adoc[] + +== In-process Isolated Classloaders + +include::partial$example/extending/jvmcode/2-inprocess.adoc[] diff --git a/example/extending/jvmcode/1-subprocess/build.mill b/example/extending/jvmcode/1-subprocess/build.mill new file mode 100644 index 00000000000..3f2c83357b5 --- /dev/null +++ b/example/extending/jvmcode/1-subprocess/build.mill @@ -0,0 +1,65 @@ +// This example demonstrates how to resolve a third-party library from Maven Central, +// but instead of using xref:extending/import-ivy-plugins.adoc[import $ivy] (which loads the +// library as part of the main build) we use: +// +// * `defaultResolver().resolveDeps` to resolve the dependencies from Maven Central, +// converting `org:name:version` coordinates (and their transitive dependencies) to +// `PathRef`s referring to files on disk +// +// * `Jvm.runSubprocess`, which runs the given classpath files in a subprocess, starting +// from specified `mainClass` +// +// While xref:fundamentals/bundled-libraries.adoc#_os_lib[OS-Lib]'s `os.call` and `os.spawn` APIs +// can be used to create any processes, JVM subprocesses are common enough have enough +// idiosyncracies (e.g. classpaths) that Mill provides helper methods specifically for them. + +package build +import mill._, javalib._ +import mill.util.Jvm + +object foo extends JavaModule { + def groovyClasspath: Task[Agg[PathRef]] = Task{ + defaultResolver().resolveDeps(Agg(ivy"org.codehaus.groovy:groovy:3.0.9")) + } + + def groovyScript = Task.Source(millSourcePath / "generate.groovy") + + def groovyGeneratedResources = Task{ + Jvm.runSubprocess( + mainClass = "groovy.ui.GroovyMain", + classPath = groovyClasspath().map(_.path), + mainArgs = Seq( + groovyScript().path.toString, + "Groovy!", + (Task.dest / "groovy-generated.html").toString + ), + workingDir = Task.dest + ) + PathRef(Task.dest) + } + + def resources = super.resources() ++ Seq(groovyGeneratedResources()) +} + +// For this example, we use the https://groovy-lang.org/[Groovy] interpreter as our example +// third-party library. While often used as a `groovy` CLI command, Groovy is also available +// on Maven Central at the `org.codehaus.groovy:groovy:3.0.9` coordinates. This lets us pull +// it into our build as a classpath comprising ``PathRef``s to files on disk, and then run the +// Groovy JVM main method (in the class +// https://github.com/apache/groovy/blob/48c8720c04b2c15396a7b37f140e0954418f74d3/src/main/java/groovy/ui/GroovyMain.java#L113[groovy.ui.GroovyMain]) +// passing it our script file `generate.groovy` (wired into our build using an +// xref:fundamentals/tasks.adoc#_sources[Source Task] `groovyScript`) and arguments +// used to configure the generated file and tell the script where to write it to. `generate.groovy` +// generates a file on disk that we then wire into `def resources`, which is read at runtime +// by `foo.run` and printed to the terminal output as shown below: + +/** Usage + +> ./mill foo.run +Contents of groovy-generated.html is

Hello!

Groovy!

+*/ + +// As mentioned above, `defaultResolver().resolveDeps` and `Jvm.runSubprocess` are an +// alternative to `import $ivy`, providing you more flexibility to resolve dependencies +// on-demand as part of your task graph only when necessary, and keeping it isolated from +// the build in a subprocess preventing classpath collisions. \ No newline at end of file diff --git a/example/extending/jvmcode/1-subprocess/foo/generate.groovy b/example/extending/jvmcode/1-subprocess/foo/generate.groovy new file mode 100644 index 00000000000..e3813e24eb5 --- /dev/null +++ b/example/extending/jvmcode/1-subprocess/foo/generate.groovy @@ -0,0 +1,4 @@ +def htmlContent = "

Hello!

" + args[0] + "

" + +def outputFile = new File(args[1]) +outputFile.write(htmlContent) \ No newline at end of file diff --git a/example/extending/jvmcode/1-subprocess/foo/src/Foo.java b/example/extending/jvmcode/1-subprocess/foo/src/Foo.java new file mode 100644 index 00000000000..91a18ebb3b9 --- /dev/null +++ b/example/extending/jvmcode/1-subprocess/foo/src/Foo.java @@ -0,0 +1,20 @@ +package foo; + +import java.io.IOException; +import java.io.InputStream; + +public class Foo { + + // Read `file.txt` from classpath + public static String groovyGeneratedHtml() throws IOException { + // Get the resource as an InputStream + try (InputStream inputStream = Foo.class.getClassLoader().getResourceAsStream("groovy-generated.html")) { + return new String(inputStream.readAllBytes()); + } + } + + public static void main(String[] args) throws IOException{ + String appClasspathResourceText = Foo.groovyGeneratedHtml(); + System.out.println("Contents of groovy-generated.html is " + appClasspathResourceText); + } +} diff --git a/example/extending/jvmcode/2-inprocess/build.mill b/example/extending/jvmcode/2-inprocess/build.mill new file mode 100644 index 00000000000..835f682e191 --- /dev/null +++ b/example/extending/jvmcode/2-inprocess/build.mill @@ -0,0 +1,52 @@ +// This example is similar to the earlier example running the Groovy interpreter in +// a subprocess, but instead of using `Jvm.runSubprocess` we use `Jvm.inprocess` to +// load the Groovy interpreter classpath files into an in-memory in-process classloader. + +package build +import mill._, javalib._ +import mill.util.Jvm + +object foo extends JavaModule { + def groovyClasspath: Task[Agg[PathRef]] = Task{ + defaultResolver().resolveDeps(Agg(ivy"org.codehaus.groovy:groovy:3.0.9")) + } + + def groovyScript = Task.Source(millSourcePath / "generate.groovy") + + def groovyGeneratedResources = Task{ + Jvm.runInprocess(classPath = groovyClasspath().map(_.path)){ classLoader => + classLoader + .loadClass("groovy.ui.GroovyMain") + .getMethod("main", classOf[Array[String]]) + .invoke( + null, + Array[String]( + groovyScript().path.toString, + "Groovy!", + (Task.dest / "groovy-generated.html").toString + ) + ) + } + + PathRef(Task.dest) + } + + def resources = super.resources() ++ Seq(groovyGeneratedResources()) +} + +// Note that unlike `Jvm.runSubprocess`, `Jvm.runInprocess` does not take a `workingDir` +// on `mainArgs`: it instead provides you an in-memory `classLoader` that contains the +// classpath you gave it. From there, you can use `.loadClass` and `.getMethod` to fish out +// the classes and methods you want, and `.invoke` to call them. + +/** Usage + +> ./mill foo.run +Contents of groovy-generated.html is

Hello!

Groovy!

+*/ + +// `Jvm.runInprocess` has significantly less overhead than `Jvm.runSubprocess`: both in terms +// of wall-clock time and in terms of memory footprint. However, it does have somewhat less +// isolation, as the code is running inside your JVM and cannot be configured to have a separate +// working directory, environment variables, and other process-global configs. Which one is +// better to use differs on a case-by-case basis. \ No newline at end of file diff --git a/example/extending/jvmcode/2-inprocess/foo/generate.groovy b/example/extending/jvmcode/2-inprocess/foo/generate.groovy new file mode 100644 index 00000000000..e3813e24eb5 --- /dev/null +++ b/example/extending/jvmcode/2-inprocess/foo/generate.groovy @@ -0,0 +1,4 @@ +def htmlContent = "

Hello!

" + args[0] + "

" + +def outputFile = new File(args[1]) +outputFile.write(htmlContent) \ No newline at end of file diff --git a/example/extending/jvmcode/2-inprocess/foo/src/Foo.java b/example/extending/jvmcode/2-inprocess/foo/src/Foo.java new file mode 100644 index 00000000000..91a18ebb3b9 --- /dev/null +++ b/example/extending/jvmcode/2-inprocess/foo/src/Foo.java @@ -0,0 +1,20 @@ +package foo; + +import java.io.IOException; +import java.io.InputStream; + +public class Foo { + + // Read `file.txt` from classpath + public static String groovyGeneratedHtml() throws IOException { + // Get the resource as an InputStream + try (InputStream inputStream = Foo.class.getClassLoader().getResourceAsStream("groovy-generated.html")) { + return new String(inputStream.readAllBytes()); + } + } + + public static void main(String[] args) throws IOException{ + String appClasspathResourceText = Foo.groovyGeneratedHtml(); + System.out.println("Contents of groovy-generated.html is " + appClasspathResourceText); + } +} diff --git a/example/package.mill b/example/package.mill index 259d6ce977c..3bbb67d784e 100644 --- a/example/package.mill +++ b/example/package.mill @@ -81,6 +81,7 @@ object `package` extends RootModule with Module { object imports extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "imports")) object metabuild extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "metabuild")) object plugins extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "plugins")) + object jvmcode extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "jvmcode")) } trait ExampleCrossModuleKotlin extends ExampleCrossModuleJava { diff --git a/main/util/src/mill/util/Jvm.scala b/main/util/src/mill/util/Jvm.scala index aaff3514375..d759c4305a8 100644 --- a/main/util/src/mill/util/Jvm.scala +++ b/main/util/src/mill/util/Jvm.scala @@ -316,6 +316,17 @@ object Jvm extends CoursierSupport { method } + def runInprocess[T](classPath: Agg[os.Path])(body: ClassLoader => T)(implicit + ctx: mill.api.Ctx.Home + ): T = { + inprocess( + classPath, + classLoaderOverrideSbtTesting = false, + isolated = true, + closeContextClassLoaderWhenDone = true, + body + ) + } def inprocess[T]( classPath: Agg[os.Path], classLoaderOverrideSbtTesting: Boolean, From 2bc599d78a5bc08f4a1f1bf8aa94358e47e5cde7 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sun, 20 Oct 2024 05:51:18 +0800 Subject: [PATCH 2/2] Pull in Zinc 0.10.3 (#3773) Pulls in https://github.com/sbt/zinc/releases/tag/v1.10.3 and fixes https://github.com/com-lihaoyi/mill/issues/3748 --- build.mill | 2 +- .../src/CodeSigSubfolderTests.scala | 16 ++++------------ .../src/ZincBuildCompilationTests.scala | 4 +--- .../mill/scalalib/worker/ZincWorkerImpl.scala | 1 + 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/build.mill b/build.mill index 4c508827af3..eb09fc3c6ae 100644 --- a/build.mill +++ b/build.mill @@ -179,7 +179,7 @@ object Deps { val sourcecode = ivy"com.lihaoyi::sourcecode:0.3.1" val upickle = ivy"com.lihaoyi::upickle:3.3.1" val windowsAnsi = ivy"io.github.alexarchambault.windows-ansi:windows-ansi:0.0.5" - val zinc = ivy"org.scala-sbt::zinc:1.10.2" + val zinc = ivy"org.scala-sbt::zinc:1.10.3" // keep in sync with doc/antora/antory.yml val bsp4j = ivy"ch.epfl.scala:bsp4j:2.2.0-M2" val fansi = ivy"com.lihaoyi::fansi:0.5.0" diff --git a/integration/invalidation/codesig-subfolder/src/CodeSigSubfolderTests.scala b/integration/invalidation/codesig-subfolder/src/CodeSigSubfolderTests.scala index 2bc75a240bb..e9471be7dfc 100644 --- a/integration/invalidation/codesig-subfolder/src/CodeSigSubfolderTests.scala +++ b/integration/invalidation/codesig-subfolder/src/CodeSigSubfolderTests.scala @@ -46,9 +46,7 @@ object CodeSigSubfolderTests extends UtestIntegrationTestSuite { // Changing stuff in subfolder/package.mill does not invalidate unrelated tasks in build.mill val cached3 = eval("foo") assert(cached3.out == "") - // This should only compile 1 source but it seems there's an upstream bug in Zinc - // https://github.com/sbt/zinc/issues/1461 - assert(cached3.err.contains("compiling 2 Scala sources")) + assert(cached3.err.contains("compiling 1 Scala source")) modifyFile( workspacePath / "subfolder/package.mill", @@ -56,9 +54,7 @@ object CodeSigSubfolderTests extends UtestIntegrationTestSuite { ) val mangledHelperFoo = eval("foo") assert(mangledHelperFoo.out.linesIterator.toSeq == Seq("running foo2", "running helperFoo2")) - // This should only compile 1 source but it seems there's an upstream bug in Zinc - // https://github.com/sbt/zinc/issues/1461 - assert(mangledHelperFoo.err.contains("compiling 2 Scala sources")) + assert(mangledHelperFoo.err.contains("compiling 1 Scala source")) // Make sure changing `val`s, which only affects the Module constructor and // not the Task method itself, causes invalidation @@ -68,9 +64,7 @@ object CodeSigSubfolderTests extends UtestIntegrationTestSuite { ) val mangledValFoo = eval("foo") assert(mangledValFoo.out.linesIterator.toSeq == Seq("running foo2", "running helperFoo2")) - // This should only compile 1 source but it seems there's an upstream bug in Zinc - // https://github.com/sbt/zinc/issues/1461 - assert(mangledValFoo.err.contains("compiling 2 Scala sources")) + assert(mangledValFoo.err.contains("compiling 1 Scala source")) // Even modifying `val`s that do not affect the task invalidates it, because // we only know that the constructor changed and don't do enough analysis to @@ -85,9 +79,7 @@ object CodeSigSubfolderTests extends UtestIntegrationTestSuite { "running helperFoo2" )) - // This should only compile 1 source but it seems there's an upstream bug in Zinc - // https://github.com/sbt/zinc/issues/1461 - assert(mangledValFooUsedInBar.err.contains("compiling 2 Scala sources")) + assert(mangledValFooUsedInBar.err.contains("compiling 1 Scala source")) val cached4 = eval("foo") assert(cached4.out == "") diff --git a/integration/invalidation/zinc-build-compilation/src/ZincBuildCompilationTests.scala b/integration/invalidation/zinc-build-compilation/src/ZincBuildCompilationTests.scala index 06a7b98c34c..0814cf98c4f 100644 --- a/integration/invalidation/zinc-build-compilation/src/ZincBuildCompilationTests.scala +++ b/integration/invalidation/zinc-build-compilation/src/ZincBuildCompilationTests.scala @@ -31,9 +31,7 @@ object ZincBuildCompilationTests extends UtestIntegrationTestSuite { _.replace("running helperFoo", "running helperFoo2") ) val mangledHelperFoo = eval(("dummy")) - // This should only compile 1 source but it seems there's an upstream bug in Zinc - // https://github.com/sbt/zinc/issues/1461 - assert(mangledHelperFoo.err.contains("compiling 2 Scala source")) + assert(mangledHelperFoo.err.contains("compiling 1 Scala source")) } } diff --git a/scalalib/worker/src/mill/scalalib/worker/ZincWorkerImpl.scala b/scalalib/worker/src/mill/scalalib/worker/ZincWorkerImpl.scala index f8bb3ccdea5..b8493781c4e 100644 --- a/scalalib/worker/src/mill/scalalib/worker/ZincWorkerImpl.scala +++ b/scalalib/worker/src/mill/scalalib/worker/ZincWorkerImpl.scala @@ -478,6 +478,7 @@ class ZincWorkerImpl( ConsistentFileAnalysisStore.binary( file = path.toIO, mappers = ReadWriteMappers.getEmptyMappers(), + sort = true, // No need to utilize more that 8 cores to serialize a small file parallelism = math.min(Runtime.getRuntime.availableProcessors(), 8) )