Merge branch 'main' into issue-3550

com-lihaoyi · Oct 20, 2024 · 56b1ba8 · 56b1ba8
2 parents bfa6b1d + 2bc599d
commit 56b1ba8
Show file tree

Hide file tree

Showing 14 changed files with 218 additions and 16 deletions.
diff --git a/build.mill b/build.mill
@@ -179,7 +179,7 @@ object Deps {
   val sourcecode = ivy"com.lihaoyi::sourcecode:0.3.1"
   val upickle = ivy"com.lihaoyi::upickle:3.3.1"
   val windowsAnsi = ivy"io.github.alexarchambault.windows-ansi:windows-ansi:0.0.5"
-  val zinc = ivy"org.scala-sbt::zinc:1.10.2"
+  val zinc = ivy"org.scala-sbt::zinc:1.10.3"
   // keep in sync with doc/antora/antory.yml
   val bsp4j = ivy"ch.epfl.scala:bsp4j:2.2.0-M2"
   val fansi = ivy"com.lihaoyi::fansi:0.5.0"

diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
@@ -82,6 +82,7 @@
 ** xref:contrib/twirllib.adoc[]
 ** xref:contrib/versionfile.adoc[]
 * xref:extending/thirdparty-plugins.adoc[]
+* xref:extending/running-jvm-code.adoc[]
 * xref:extending/writing-plugins.adoc[]
 * xref:extending/meta-build.adoc[]
 

diff --git a/docs/modules/ROOT/pages/extending/running-jvm-code.adoc b/docs/modules/ROOT/pages/extending/running-jvm-code.adoc
@@ -0,0 +1,33 @@
+= Running Dynamic JVM Code
+
+While xref:extending/import-ivy-plugins.adoc[import $ivy] is convenient,
+it comes with limitations as the JVM library it imports is global to your build:
+
+1. The library has to be resolved and downloaded before any part of your build starts.
+   If your codebase is large and most parts of your build don't use that library,
+   needing to download the library when working on parts that don't need it can be wasteful
+
+2. The library can only have one version across the entire build. This can be an issue if
+   you need to have multiple versions of the library used in different parts of your build.
+   e.g. different parts of a large Groovy codebase may use different versions of the Groovy
+   interpreter, and so the Groovy interpreter cannot be included via `import $ivy` because the
+   different versions would collide.
+
+3. The library cannot be built as part of your main build. While it is possible to build
+   it as part of your xref:extending/meta-build.adoc[Meta-Build], that comes with additional
+   complexity and limitations. In a large codebase, you often end up building modules that
+   are shared between production deployments as well as local tooling: in such cases
+   `import $ivy` is not a good fit
+
+
+In scenarios where these limitations cause issues, Mill provides other ways to run arbitrary
+JVM code apart from `import $ivy`.
+
+
+== Subprocesses
+
+include::partial$example/extending/jvmcode/1-subprocess.adoc[]
+
+== In-process Isolated Classloaders
+
+include::partial$example/extending/jvmcode/2-inprocess.adoc[]
diff --git a/example/extending/jvmcode/1-subprocess/build.mill b/example/extending/jvmcode/1-subprocess/build.mill
@@ -0,0 +1,65 @@
+// This example demonstrates how to resolve a third-party library from Maven Central,
+// but instead of using xref:extending/import-ivy-plugins.adoc[import $ivy] (which loads the
+// library as part of the main build) we use:
+//
+// * `defaultResolver().resolveDeps` to resolve the dependencies from Maven Central,
+//   converting `org:name:version` coordinates (and their transitive dependencies) to
+//   `PathRef`s referring to files on disk
+//
+// * `Jvm.runSubprocess`, which runs the given classpath files in a subprocess, starting
+//   from specified `mainClass`
+//
+// While xref:fundamentals/bundled-libraries.adoc#_os_lib[OS-Lib]'s `os.call` and `os.spawn` APIs
+// can be used to create any processes, JVM subprocesses are common enough have enough
+// idiosyncracies (e.g. classpaths) that Mill provides helper methods specifically for them.
+
+package build
+import mill._, javalib._
+import mill.util.Jvm
+
+object foo extends JavaModule {
+  def groovyClasspath: Task[Agg[PathRef]] = Task{
+    defaultResolver().resolveDeps(Agg(ivy"org.codehaus.groovy:groovy:3.0.9"))
+  }
+
+  def groovyScript = Task.Source(millSourcePath / "generate.groovy")
+
+  def groovyGeneratedResources = Task{
+    Jvm.runSubprocess(
+      mainClass = "groovy.ui.GroovyMain",
+      classPath = groovyClasspath().map(_.path),
+      mainArgs = Seq(
+        groovyScript().path.toString,
+        "Groovy!",
+        (Task.dest / "groovy-generated.html").toString
+      ),
+      workingDir = Task.dest
+    )
+    PathRef(Task.dest)
+  }
+
+  def resources = super.resources() ++ Seq(groovyGeneratedResources())
+}
+
+// For this example, we use the https://groovy-lang.org/[Groovy] interpreter as our example
+// third-party library. While often used as a `groovy` CLI command, Groovy is also available
+// on Maven Central at the `org.codehaus.groovy:groovy:3.0.9` coordinates. This lets us pull
+// it into our build as a classpath comprising ``PathRef``s to files on disk, and then run the
+// Groovy JVM main method (in the class
+// https://github.com/apache/groovy/blob/48c8720c04b2c15396a7b37f140e0954418f74d3/src/main/java/groovy/ui/GroovyMain.java#L113[groovy.ui.GroovyMain])
+// passing it our script file `generate.groovy` (wired into our build using an
+// xref:fundamentals/tasks.adoc#_sources[Source Task] `groovyScript`) and arguments
+// used to configure the generated file and tell the script where to write it to. `generate.groovy`
+// generates a file on disk that we then wire into `def resources`, which is read at runtime
+// by `foo.run` and printed to the terminal output as shown below:
+
+/** Usage
+
+> ./mill foo.run
+Contents of groovy-generated.html is <html><body><h1>Hello!</h1><p>Groovy!</p></body></html>
+*/
+
+// As mentioned above, `defaultResolver().resolveDeps` and `Jvm.runSubprocess` are an
+// alternative to `import $ivy`, providing you more flexibility to resolve dependencies
+// on-demand as part of your task graph only when necessary, and keeping it isolated from
+// the build in a subprocess preventing classpath collisions.
diff --git a/example/extending/jvmcode/1-subprocess/foo/generate.groovy b/example/extending/jvmcode/1-subprocess/foo/generate.groovy
@@ -0,0 +1,4 @@
+def htmlContent = "<html><body><h1>Hello!</h1><p>" + args[0] + "</p></body></html>"
+
+def outputFile = new File(args[1])
+outputFile.write(htmlContent)
diff --git a/example/extending/jvmcode/1-subprocess/foo/src/Foo.java b/example/extending/jvmcode/1-subprocess/foo/src/Foo.java
@@ -0,0 +1,20 @@
+package foo;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class Foo {
+
+  // Read `file.txt` from classpath
+  public static String groovyGeneratedHtml() throws IOException {
+    // Get the resource as an InputStream
+    try (InputStream inputStream = Foo.class.getClassLoader().getResourceAsStream("groovy-generated.html")) {
+      return new String(inputStream.readAllBytes());
+    }
+  }
+
+  public static void main(String[] args) throws IOException{
+      String appClasspathResourceText = Foo.groovyGeneratedHtml();
+      System.out.println("Contents of groovy-generated.html is " + appClasspathResourceText);
+  }
+}
diff --git a/example/extending/jvmcode/2-inprocess/build.mill b/example/extending/jvmcode/2-inprocess/build.mill
@@ -0,0 +1,52 @@
+// This example is similar to the earlier example running the Groovy interpreter in
+// a subprocess, but instead of using `Jvm.runSubprocess` we use `Jvm.inprocess` to
+// load the Groovy interpreter classpath files into an in-memory in-process classloader.
+
+package build
+import mill._, javalib._
+import mill.util.Jvm
+
+object foo extends JavaModule {
+  def groovyClasspath: Task[Agg[PathRef]] = Task{
+    defaultResolver().resolveDeps(Agg(ivy"org.codehaus.groovy:groovy:3.0.9"))
+  }
+
+  def groovyScript = Task.Source(millSourcePath / "generate.groovy")
+
+  def groovyGeneratedResources = Task{
+    Jvm.runInprocess(classPath = groovyClasspath().map(_.path)){ classLoader =>
+      classLoader
+        .loadClass("groovy.ui.GroovyMain")
+        .getMethod("main", classOf[Array[String]])
+        .invoke(
+          null,
+          Array[String](
+            groovyScript().path.toString,
+            "Groovy!",
+            (Task.dest / "groovy-generated.html").toString
+          )
+        )
+    }
+
+    PathRef(Task.dest)
+  }
+
+  def resources = super.resources() ++ Seq(groovyGeneratedResources())
+}
+
+// Note that unlike `Jvm.runSubprocess`, `Jvm.runInprocess` does not take a `workingDir`
+// on `mainArgs`: it instead provides you an in-memory `classLoader` that contains the
+// classpath you gave it. From there, you can use `.loadClass` and `.getMethod` to fish out
+// the classes and methods you want, and `.invoke` to call them.
+
+/** Usage
+
+> ./mill foo.run
+Contents of groovy-generated.html is <html><body><h1>Hello!</h1><p>Groovy!</p></body></html>
+*/
+
+// `Jvm.runInprocess` has significantly less overhead than `Jvm.runSubprocess`: both in terms
+// of wall-clock time and in terms of memory footprint. However, it does have somewhat less
+// isolation, as the code is running inside your JVM and cannot be configured to have a separate
+// working directory, environment variables, and other process-global configs. Which one is
+// better to use differs on a case-by-case basis.
diff --git a/example/extending/jvmcode/2-inprocess/foo/generate.groovy b/example/extending/jvmcode/2-inprocess/foo/generate.groovy
@@ -0,0 +1,4 @@
+def htmlContent = "<html><body><h1>Hello!</h1><p>" + args[0] + "</p></body></html>"
+
+def outputFile = new File(args[1])
+outputFile.write(htmlContent)
diff --git a/example/extending/jvmcode/2-inprocess/foo/src/Foo.java b/example/extending/jvmcode/2-inprocess/foo/src/Foo.java
@@ -0,0 +1,20 @@
+package foo;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class Foo {
+
+  // Read `file.txt` from classpath
+  public static String groovyGeneratedHtml() throws IOException {
+    // Get the resource as an InputStream
+    try (InputStream inputStream = Foo.class.getClassLoader().getResourceAsStream("groovy-generated.html")) {
+      return new String(inputStream.readAllBytes());
+    }
+  }
+
+  public static void main(String[] args) throws IOException{
+      String appClasspathResourceText = Foo.groovyGeneratedHtml();
+      System.out.println("Contents of groovy-generated.html is " + appClasspathResourceText);
+  }
+}
diff --git a/example/package.mill b/example/package.mill
@@ -81,6 +81,7 @@ object `package` extends RootModule with Module {
     object imports extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "imports"))
     object metabuild extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "metabuild"))
     object plugins extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "plugins"))
+    object jvmcode extends Cross[ExampleCrossModule](build.listIn(millSourcePath / "jvmcode"))
   }
 
   trait ExampleCrossModuleKotlin extends ExampleCrossModuleJava {

diff --git a/integration/invalidation/codesig-subfolder/src/CodeSigSubfolderTests.scala b/integration/invalidation/codesig-subfolder/src/CodeSigSubfolderTests.scala
@@ -46,19 +46,15 @@ object CodeSigSubfolderTests extends UtestIntegrationTestSuite {
       // Changing stuff in subfolder/package.mill does not invalidate unrelated tasks in build.mill
       val cached3 = eval("foo")
       assert(cached3.out == "")
-      // This should only compile 1 source but it seems there's an upstream bug in Zinc
-      // https://github.com/sbt/zinc/issues/1461
-      assert(cached3.err.contains("compiling 2 Scala sources"))
+      assert(cached3.err.contains("compiling 1 Scala source"))
 
       modifyFile(
         workspacePath / "subfolder/package.mill",
         _.replace("running helperFoo", "running helperFoo2")
       )
       val mangledHelperFoo = eval("foo")
       assert(mangledHelperFoo.out.linesIterator.toSeq == Seq("running foo2", "running helperFoo2"))
-      // This should only compile 1 source but it seems there's an upstream bug in Zinc
-      // https://github.com/sbt/zinc/issues/1461
-      assert(mangledHelperFoo.err.contains("compiling 2 Scala sources"))
+      assert(mangledHelperFoo.err.contains("compiling 1 Scala source"))
 
       // Make sure changing `val`s, which only affects the Module constructor and
       // not the Task method itself, causes invalidation
@@ -68,9 +64,7 @@ object CodeSigSubfolderTests extends UtestIntegrationTestSuite {
       )
       val mangledValFoo = eval("foo")
       assert(mangledValFoo.out.linesIterator.toSeq == Seq("running foo2", "running helperFoo2"))
-      // This should only compile 1 source but it seems there's an upstream bug in Zinc
-      // https://github.com/sbt/zinc/issues/1461
-      assert(mangledValFoo.err.contains("compiling 2 Scala sources"))
+      assert(mangledValFoo.err.contains("compiling 1 Scala source"))
 
       // Even modifying `val`s that do not affect the task invalidates it, because
       // we only know that the constructor changed and don't do enough analysis to
@@ -85,9 +79,7 @@ object CodeSigSubfolderTests extends UtestIntegrationTestSuite {
         "running helperFoo2"
       ))
 
-      // This should only compile 1 source but it seems there's an upstream bug in Zinc
-      // https://github.com/sbt/zinc/issues/1461
-      assert(mangledValFooUsedInBar.err.contains("compiling 2 Scala sources"))
+      assert(mangledValFooUsedInBar.err.contains("compiling 1 Scala source"))
 
       val cached4 = eval("foo")
       assert(cached4.out == "")

diff --git a/integration/invalidation/zinc-build-compilation/src/ZincBuildCompilationTests.scala b/integration/invalidation/zinc-build-compilation/src/ZincBuildCompilationTests.scala
@@ -31,9 +31,7 @@ object ZincBuildCompilationTests extends UtestIntegrationTestSuite {
         _.replace("running helperFoo", "running helperFoo2")
       )
       val mangledHelperFoo = eval(("dummy"))
-      // This should only compile 1 source but it seems there's an upstream bug in Zinc
-      // https://github.com/sbt/zinc/issues/1461
-      assert(mangledHelperFoo.err.contains("compiling 2 Scala source"))
+      assert(mangledHelperFoo.err.contains("compiling 1 Scala source"))
 
     }
   }

diff --git a/main/util/src/mill/util/Jvm.scala b/main/util/src/mill/util/Jvm.scala
@@ -316,6 +316,17 @@ object Jvm extends CoursierSupport {
     method
   }
 
+  def runInprocess[T](classPath: Agg[os.Path])(body: ClassLoader => T)(implicit
+      ctx: mill.api.Ctx.Home
+  ): T = {
+    inprocess(
+      classPath,
+      classLoaderOverrideSbtTesting = false,
+      isolated = true,
+      closeContextClassLoaderWhenDone = true,
+      body
+    )
+  }
   def inprocess[T](
       classPath: Agg[os.Path],
       classLoaderOverrideSbtTesting: Boolean,

diff --git a/scalalib/worker/src/mill/scalalib/worker/ZincWorkerImpl.scala b/scalalib/worker/src/mill/scalalib/worker/ZincWorkerImpl.scala
@@ -478,6 +478,7 @@ class ZincWorkerImpl(
     ConsistentFileAnalysisStore.binary(
       file = path.toIO,
       mappers = ReadWriteMappers.getEmptyMappers(),
+      sort = true,
       // No need to utilize more that 8 cores to serialize a small file
       parallelism = math.min(Runtime.getRuntime.availableProcessors(), 8)
     )