Skip to content

Commit 0411434

Browse files
committed
Refactored the ARC/WARC input and Mimir output handlers into plugins rather than being directly in gcp-impl. This means that the transitive dependencies that are only required by these particular components are now isolated in each plugin's classloader, reducing the chance of clashes when another plugin (for a third party PR or input/output handler) requires a different version of the same library.
The ARC and Mimir plugins are automatically loaded by BatchRunner.main, so no changes should be required to existing batch definitions or shell scripts.
1 parent 60d9498 commit 0411434

21 files changed

+358
-189
lines changed

distribution/pom.xml

+2-8
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919
<build>
2020
<plugins>
2121
<plugin>
22+
<groupId>org.apache.maven.plugins</groupId>
2223
<artifactId>maven-assembly-plugin</artifactId>
23-
<version>3.1.1</version>
24+
<version>3.6.0</version>
2425
<executions>
2526
<execution>
2627
<id>distro-assembly</id>
@@ -52,13 +53,6 @@
5253
<version>1.2.3</version>
5354
</dependency>
5455

55-
<dependency>
56-
<groupId>uk.ac.gate</groupId>
57-
<artifactId>gcp-cli</artifactId>
58-
<version>3.3-SNAPSHOT</version>
59-
<!-- easy way to separate this in the assembly descriptor -->
60-
<scope>provided</scope>
61-
</dependency>
6256
</dependencies>
6357

6458
<distributionManagement>

distribution/src/assembly/distro.xml

+28-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
1+
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.2.0"
22
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3-
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
3+
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.2.0 https://maven.apache.org/xsd/assembly-2.2.0.xsd">
44
<id>distro</id>
55
<formats>
66
<format>zip</format>
@@ -36,13 +36,32 @@
3636
<useProjectArtifact>false</useProjectArtifact>
3737
<unpack>false</unpack>
3838
</dependencySet>
39-
<dependencySet>
40-
<scope>provided</scope>
41-
<outputDirectory></outputDirectory>
42-
<useProjectArtifact>false</useProjectArtifact>
43-
<unpack>false</unpack>
44-
<outputFileNameMapping>${artifact.artifactId}.${artifact.extension}</outputFileNameMapping>
45-
</dependencySet>
4639
</dependencySets>
4740

41+
<moduleSets>
42+
<moduleSet>
43+
<useAllReactorProjects>true</useAllReactorProjects>
44+
<includes>
45+
<include>uk.ac.gate:gcp-cli</include>
46+
</includes>
47+
<binaries>
48+
<outputDirectory></outputDirectory>
49+
<unpack>false</unpack>
50+
<includeDependencies>false</includeDependencies>
51+
<outputFileNameMapping>${artifact.artifactId}.${artifact.extension}</outputFileNameMapping>
52+
</binaries>
53+
</moduleSet>
54+
<moduleSet>
55+
<useAllReactorProjects>true</useAllReactorProjects>
56+
<includes>
57+
<include>uk.ac.gate:gcp-plugin-*</include>
58+
</includes>
59+
<binaries>
60+
<attachmentClassifier>distro</attachmentClassifier>
61+
<includeDependencies>false</includeDependencies>
62+
<unpack>true</unpack>
63+
<outputDirectory>plugins</outputDirectory>
64+
</binaries>
65+
</moduleSet>
66+
</moduleSets>
4867
</assembly>

impl/pom.xml

-40
Original file line numberDiff line numberDiff line change
@@ -34,26 +34,6 @@
3434
<scope>compile</scope>
3535
</dependency>
3636

37-
<dependency>
38-
<groupId>commons-httpclient</groupId>
39-
<artifactId>commons-httpclient</artifactId>
40-
<version>3.0.1</version>
41-
<scope>compile</scope>
42-
</dependency>
43-
44-
<!-- Heritrix library for parsing ARC and WARC files -->
45-
<dependency>
46-
<groupId>org.netpreserve.commons</groupId>
47-
<artifactId>webarchive-commons</artifactId>
48-
<version>1.1.9</version>
49-
<scope>compile</scope>
50-
<exclusions>
51-
<exclusion>
52-
<groupId>*</groupId>
53-
<artifactId>*</artifactId>
54-
</exclusion>
55-
</exclusions>
56-
</dependency>
5737
<!--
5838
<dependency>
5939
<groupId>it.unimi.dsi</groupId>
@@ -79,26 +59,6 @@
7959
<scope>compile</scope>
8060
</dependency>
8161

82-
<!-- mimir-connector for the Mimir output handler -->
83-
<dependency>
84-
<groupId>uk.ac.gate.mimir</groupId>
85-
<artifactId>mimir-connector</artifactId>
86-
<version>6.2</version>
87-
<scope>compile</scope>
88-
</dependency>
89-
90-
<!--
91-
We should get this as a transitive of gate-core, but heritrix-commons also
92-
declares a dependency on an earlier version, and Maven prefers this as it
93-
is closer to the root of the dependency tree.
94-
-->
95-
<dependency>
96-
<groupId>commons-io</groupId>
97-
<artifactId>commons-io</artifactId>
98-
<version>2.7</version>
99-
<scope>compile</scope>
100-
</dependency>
101-
10262
<!-- commons-cli for command line parsing -->
10363
<dependency>
10464
<groupId>commons-cli</groupId>

impl/src/main/java/gate/cloud/batch/BatchRunner.java

+11
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,17 @@ public void uncaughtException(Thread t, Throwable e) {
717717

718718
Gate.init();
719719

720+
// load built-in plugins
721+
File builtInPluginsDir = new File(gcpHome, "plugins");
722+
if(builtInPluginsDir.isDirectory()) {
723+
File[] plugins = builtInPluginsDir.listFiles(File::isDirectory);
724+
if(plugins != null) {
725+
for(File pluginFile : plugins) {
726+
Gate.getCreoleRegister().registerPlugin(new Plugin.Directory(pluginFile.toURI().toURL()));
727+
}
728+
}
729+
}
730+
720731
// load any other plugins specified on the command line
721732
String[] pluginsToLoad = line.getOptionValues('p');
722733
if(pluginsToLoad != null) {

impl/src/main/java/gate/cloud/util/ByteArrayURLStreamHandler.java

+28-4
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,43 @@
1919
import java.util.List;
2020
import java.util.Map;
2121

22-
import org.apache.commons.httpclient.Header;
23-
2422
/**
2523
* This oddity is just a wrapper around a byte array and a URL, to
2624
* allow creation of GATE documents from a byte array with
2725
* application/pdf type. Donated by Ian.
2826
*/
2927
public class ByteArrayURLStreamHandler
3028
extends URLStreamHandler {
29+
30+
public static class Header {
31+
public Header(String name, String value) {
32+
this.name = name;
33+
this.value = value;
34+
}
35+
36+
private String name;
37+
private String value;
38+
39+
public String getName() {
40+
return name;
41+
}
42+
43+
public void setName(String name) {
44+
this.name = name;
45+
}
46+
47+
public String getValue() {
48+
return value;
49+
}
50+
51+
public void setValue(String value) {
52+
this.value = value;
53+
}
54+
}
3155

3256
private byte[] data;
3357
private Header[] headers;
34-
58+
3559
public ByteArrayURLStreamHandler(byte[] data) {
3660
this(data, null);
3761
}
@@ -78,7 +102,7 @@ public Map<String, List<String>> getHeaderFields() {
78102
} else if(values.size() == 1) {
79103
values = new ArrayList<String>(values);
80104
values.add(h.getValue());
81-
fields.put(h.getName(), values);
105+
fields.put(h.getName(), values);
82106
} else {
83107
values.add(h.getValue());
84108
}

impl/src/main/java/gate/cloud/util/Scratch.java

-126
This file was deleted.

0 commit comments

Comments
 (0)