diff --git a/src/java/org/commoncrawl/examples/ExampleArcMicroformat.java b/src/java/org/commoncrawl/examples/ExampleArcMicroformat.java index 0e3b971..70d7c54 100644 --- a/src/java/org/commoncrawl/examples/ExampleArcMicroformat.java +++ b/src/java/org/commoncrawl/examples/ExampleArcMicroformat.java @@ -180,7 +180,7 @@ public int run(String[] args) configFile = args[1]; // For this example, only look at a single ARC files. - String inputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment/1341690163490/1341782443295_1551.arc.gz"; + String inputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment/1346823845675/1346871947461_4036.arc.gz"; // Switch to this if you'd like to look at all ARC files. May take many minutes just to read the file listing. //String inputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment/*/*.arc.gz"; diff --git a/src/java/org/commoncrawl/examples/ExampleMetadataStats.java b/src/java/org/commoncrawl/examples/ExampleMetadataStats.java index bbfaebc..d90230a 100644 --- a/src/java/org/commoncrawl/examples/ExampleMetadataStats.java +++ b/src/java/org/commoncrawl/examples/ExampleMetadataStats.java @@ -208,7 +208,7 @@ public int run(String[] args) // If you would like to process all segments, comment this out and // uncomment the block of code below - String inputPath = baseInputPath + "/1341690154994/metadata-00062"; + String inputPath = baseInputPath + "/1346823845675/metadata-04379"; LOG.info("adding input path '" + inputPath + "'"); FileInputFormat.addInputPath(job, new Path(inputPath));