Skip to content

Commit

Permalink
towards implementing config override for [preston stream]; related to #…
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorrit Poelen committed Feb 7, 2025
1 parent 5b36286 commit f3c0436
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import bio.guoda.preston.store.KeyValueStoreFactoryImpl;
import bio.guoda.preston.store.ValidatingKeyValueStreamContentAddressedFactory;
import bio.guoda.preston.stream.ContentHashDereferencer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
Expand All @@ -23,6 +25,7 @@
import org.globalbioticinteractions.cache.Cache;
import org.globalbioticinteractions.cache.ContentProvenance;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetProxy;
import org.globalbioticinteractions.elton.util.ProgressCursor;
import org.globalbioticinteractions.elton.util.ProgressCursorFactory;
import org.slf4j.Logger;
Expand Down Expand Up @@ -92,6 +95,12 @@ public void setRecordType(String recordType) {
)
private String recordType = "interaction";


@CommandLine.Option(names = {"--config"},
description = "point to content id (hash) of globi.json config to apply global settings (e.g., custom interaction type mappings). Example: hash://sha256/02682fdd62a3e985dc06236662299f00ec5453c4e6f707d02efa93628f927649 for "
)
private URI configOverrideReesource = null;

@Override
public void doRun() {

Expand Down Expand Up @@ -157,17 +166,21 @@ private boolean isCacheEnabled() {
return !getDisableCache();
}

private boolean handleDataset(final Dataset dataset, boolean shouldWriteHeader, Cache cache) throws IOException {
private boolean handleDataset(final Dataset datasetProvided, boolean shouldWriteHeader, Cache cache) throws IOException {
boolean handled = false;
ImportLoggerFactory loggerFactory = new ImportLoggerFactoryImpl(
recordType,
dataset.getNamespace(),
datasetProvided.getNamespace(),
Arrays.asList(ReviewCommentType.values()),
getStdout()
);
try {
Dataset datasetApplied = hasConfigOverride()
? applyConfigOverride(datasetProvided, cache)
: datasetProvided;

StreamingDatasetsHandler namespaceHandler = new StreamingDatasetsHandler(
dataset,
datasetApplied,
getDataDir(),
getStderr(),
createInputStreamFactory(),
Expand All @@ -176,14 +189,14 @@ private boolean handleDataset(final Dataset dataset, boolean shouldWriteHeader,
getActivityContext(),
cache
);
namespaceHandler.onNamespace(dataset.getNamespace());
namespaceHandler.onNamespace(datasetProvided.getNamespace());
handled = true;
} catch (Exception e) {
String msg = "failed to add dataset associated with namespace [" + dataset.getNamespace() + "]";
String msg = "failed to add dataset associated with namespace [" + datasetProvided.getNamespace() + "]";
loggerFactory.createImportLogger().warn(new LogContext() {
@Override
public String toString() {
return "{ \"namespace\": \"" + dataset.getNamespace() + "\" }";
return "{ \"namespace\": \"" + datasetProvided.getNamespace() + "\" }";
}
}, msg);
LOG.error(msg, e);
Expand All @@ -194,6 +207,18 @@ public String toString() {

}

private boolean hasConfigOverride() {
return configOverrideReesource != null;
}

private Dataset applyConfigOverride(Dataset datasetProvided, Cache cache) throws IOException {
Dataset datasetApplied;
datasetApplied = new DatasetProxy(datasetProvided);
JsonNode config = new ObjectMapper().readTree(cache.retrieve(configOverrideReesource));
datasetApplied.setConfig(config);
return datasetApplied;
}

@Override
public String getDescription() {
return DESCRIPTION;
Expand Down Expand Up @@ -223,6 +248,10 @@ public void setRemotes(List<URI> remotes) {
this.remotes = remotes;
}

public void setConfigOverrideReesource(URI configOverrideReesource) {
this.configOverrideReesource = configOverrideReesource;
}

public static class ImportLoggerFactoryImpl implements ImportLoggerFactory {
private final String recordType;
private final String namespace;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
Expand All @@ -33,10 +34,8 @@
import java.util.stream.Collectors;

import static junit.framework.TestCase.assertNotNull;
import static org.hamcrest.CoreMatchers.endsWith;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.hasItems;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.notNullValue;
import static org.hamcrest.CoreMatchers.startsWith;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.number.OrderingComparison.greaterThan;
Expand Down Expand Up @@ -632,6 +631,48 @@ public void streamSomeInteractionsCustomNamespace() throws IOException {
assertHeaderAndMore(outputStream, headerInteractions());
}

@Test
public void streamSomeInteractionsCustomNamespaceGlobalOverrideInteractionTypeMapping() throws IOException {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
ByteArrayOutputStream errorStream = new ByteArrayOutputStream();

CmdStream cmdStream = new CmdStream();

File tmpDir = folder.newFolder("tmpDir");
tmpDir.mkdirs();


populateCache(tmpDir);
populateCacheWithResource(tmpDir, "/global-globi-config.json");
populateCacheWithResource(tmpDir, "/global-interaction_type_mapping_default.csv");


cmdStream.setRecordType("interaction");
cmdStream.setDataDir(tmpDir.getAbsolutePath());
cmdStream.setStdout(new PrintStream(outputStream));
cmdStream.setStderr(new PrintStream(errorStream));
cmdStream.setConfigOverrideReesource(URI.create("hash://sha256/b1a25958aa62f50ffb231fed929d053a4fbd99a9d854ffc9284b338501716685"));

ObjectNode objectNode = new ObjectMapper().createObjectNode();
objectNode.put("namespace", "name/space");
objectNode.put("format", "dwca");
objectNode.put("url", "hash://sha256/aa12991df4efe1e392b2316c50d7cf17117cab7509dcc1918cd42c726bb4e36d");
objectNode.put("citation", "some citation");
ObjectNode resources = new ObjectMapper().createObjectNode();
resources.put("classpath:/org/globalbioticinteractions/interaction_types_mapping.csv", "hash://sha256/ef045408607c6fb19d6bdf8145e7ce16a0e16bc8be45acbe31da33e1db0c9ea7");
objectNode.set("resources", resources);


cmdStream.setStdin(IOUtils.toInputStream(objectNode.toString(), StandardCharsets.UTF_8));
cmdStream.run();

String stdout = new String(outputStream.toByteArray(), StandardCharsets.UTF_8);
assertThat(stdout, startsWith(headerInteractions()));
String[] lines = stdout.split("\n");
assertThat(lines.length, Is.is(greaterThan(1)));
assertThat(lines[1], containsString("http://purl.obolibrary.org/obo/RO_0002321\tecologicallyRelatedTo"));
}

@Test
public void streamSomeNames() throws IOException {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Expand Down
5 changes: 5 additions & 0 deletions src/test/resources/global-globi-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"resources": {
"interaction_types_mapping.csv": "hash://sha256/a9199bcdbe6e9d31e29d115e95b56b4cdb7a1b8c062df20e837efbb200d8a6be"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
provided_interaction_type_label,provided_interaction_type_id,mapped_to_interaction_type_label,mapped_to_interaction_type_id
,,relatedTo,http://purl.obolibrary.org/obo/RO_0002321

0 comments on commit f3c0436

Please sign in to comment.