From b37b7feb0dd87ff91b0b0c0dd2271e52aeecf70a Mon Sep 17 00:00:00 2001
From: David Graham <david.l.graham1@gmail.com>
Date: Fri, 14 Feb 2025 10:05:46 -0800
Subject: [PATCH 1/5] first

---
 python/dolma/cli/deduper.py    |  1 -
 python/dolma/cli/mixer.py      |  5 +++-
 python/dolma/cli/tagger.py     |  5 ++++
 python/dolma/core/runtime.py   |  3 +-
 python/dolma/warc/processor.py | 15 +++++++---
 src/shard.rs                   | 18 ++++++++---
 tests/python/test_mixer.py     | 32 ++++++++++++++++++++
 tests/python/test_paths.py     |  2 --
 tests/python/test_warc.py      | 51 +++++++++++++++++++++++++++++++
 tests/python/utils.py          | 55 +++++++++++++++++-----------------
 10 files changed, 147 insertions(+), 40 deletions(-)

diff --git a/python/dolma/cli/deduper.py b/python/dolma/cli/deduper.py
index de6a43d5..d263d4ca 100644
--- a/python/dolma/cli/deduper.py
+++ b/python/dolma/cli/deduper.py
@@ -192,7 +192,6 @@ def run(cls, parsed_config: DeduperConfig):
             # perform some path validation to make sure we don't call the mixer with invalid config
             total_matching_documents = 0
             for document in parsed_config.documents:
-
                 if not any(
                     fnmatch.fnmatch(dict_config["dedupe"]["document_dir"], part) for part in document.split(os.sep)
                 ):
diff --git a/python/dolma/cli/mixer.py b/python/dolma/cli/mixer.py
index 2ac6c5c5..41f632c6 100644
--- a/python/dolma/cli/mixer.py
+++ b/python/dolma/cli/mixer.py
@@ -66,6 +66,9 @@ class StreamConfig:
             "from the file extension."
         ),
     )
+    document_dir: str = field(
+        default="documents", help="Folder in source path to replace with 'attributes' when looking for attributes"
+    )
 
 
 @dataclass
@@ -145,7 +148,6 @@ def run(cls, parsed_config: MixerConfig):
                 # perform some path validation to make sure we don't call the mixer with invalid config
                 total_matching_documents = 0
                 for document in stream_config.documents:
-
                     current_matching_documents = sum(1 for _ in glob_path(document))
                     if current_matching_documents == 0:
                         # only raise a warning if no documents are found for a single path
@@ -159,6 +161,7 @@ def run(cls, parsed_config: MixerConfig):
                 # populate the stream config dict
                 stream_config_dict["name"] = stream_config.name
                 stream_config_dict["documents"] = [str(d) for d in stream_config.documents]
+                stream_config_dict["document_dir"] = stream_config.document_dir
                 stream_config_dict["attributes"] = [str(a) for a in list(stream_config.attributes)]
                 stream_config_dict["output"] = {
                     "path": str(stream_config.output.path),
diff --git a/python/dolma/cli/tagger.py b/python/dolma/cli/tagger.py
index 9982ec05..34df31ba 100644
--- a/python/dolma/cli/tagger.py
+++ b/python/dolma/cli/tagger.py
@@ -91,6 +91,10 @@ class TaggerConfig:
         default=False,
         help="If true, only print the configuration and exit without running the taggers.",
     )
+    document_dir: Optional[str] = field(
+        default="documents",
+        help="The folder in source paths to replace with 'attributes' to store results, if not 'documents'",
+    )
 
 
 class TaggerCli(BaseCli):
@@ -140,6 +144,7 @@ def run(cls, parsed_config: TaggerConfig):
                 profile_output=parsed_config.profile.output,
                 profile_steps=parsed_config.profile.steps,
                 profile_sort_key=parsed_config.profile.sort_key,
+                document_dir=parsed_config.document_dir,
             )
 
 
diff --git a/python/dolma/core/runtime.py b/python/dolma/core/runtime.py
index ac5e2a23..320ca901 100644
--- a/python/dolma/core/runtime.py
+++ b/python/dolma/core/runtime.py
@@ -392,6 +392,7 @@ def create_and_run_tagger(
     profile_steps: Optional[int] = None,
     profile_sort_key: str = "tottime",
     profile_lines: int = 100,
+    document_dir: Optional[str] = "documents",
 ):
     """This function creates a tagger and runs it on a list of documents.
 
@@ -444,7 +445,7 @@ def create_and_run_tagger(
 
     if destination is None:
         try:
-            destination = _make_paths_from_substitution(documents, "documents", f"attributes/{experiment}")
+            destination = _make_paths_from_substitution(documents, document_dir, f"attributes/{experiment}")
         except Exception as exp:
             raise RuntimeError("Could not make destination paths from documents paths") from exp
     elif isinstance(destination, str):
diff --git a/python/dolma/warc/processor.py b/python/dolma/warc/processor.py
index 474c6ca9..a20e0263 100644
--- a/python/dolma/warc/processor.py
+++ b/python/dolma/warc/processor.py
@@ -107,6 +107,7 @@ def process_single(
         pre_taggers_names: List[str] = kwargs.get("pre_taggers") or []
         pre_taggers = {make_variable_name(name): TaggerRegistry.get(name)() for name in pre_taggers_names}
 
+
         # create the html extractor
         linearizer_name: str = kwargs.get("linearizer_name") or "resiliparse"
         linearizer = LinearizerRegistry.get(linearizer_name)()
@@ -127,6 +128,7 @@ def process_single(
         # whether to skip this document if post-taggers find nothing
         skip_no_post_taggers: bool = kwargs.get("skip_no_post_taggers") or False
 
+        skip_linearization: bool = kwargs.get("skip_linearization") or False
         # derive the destination path if it is not provided by splitting out all the
         # extensions, removing gz and warc, and adding jsonl.gz
         if not destination_path.endswith(".jsonl.gz"):
@@ -192,12 +194,15 @@ def process_single(
                     continue
 
                 # extract text
-                doc.text = linearizer.linearize(content=decoded_content)
+                if skip_linearization:
+                    doc.text = decoded_content
+                else:
+                    doc.text = linearizer.linearize(content=decoded_content)
 
                 # these are the properties extracted from the HTML content
-                post_attributes = {name: tagger.tag(doc) for name, tagger in post_taggers.items()}
-                if skip_no_post_taggers and not sum(map(len, post_attributes.values())):
-                    continue
+               # post_attributes = {name: tagger.tag(doc) for name, tagger in post_taggers.items()}
+               # if skip_no_post_taggers and not sum(map(len, post_attributes.values())):
+                #    continue
 
                 doc.attributes = {
                     f"{t_name}__{t_name}__{make_variable_name(a_name)}": attr_values
@@ -247,6 +252,7 @@ def create_and_run_warc_pipeline(
     store_html_in_metadata: bool = False,
     skip_no_pre_taggers: bool = False,
     skip_no_post_taggers: bool = False,
+    skip_linearization: bool = False,
 ):
     with ExitStack() as stack:
         if metadata is None:
@@ -302,4 +308,5 @@ def create_and_run_warc_pipeline(
             skip_no_pre_taggers=skip_no_pre_taggers,
             skip_no_post_taggers=skip_no_post_taggers,
             source_name=source_name,
+            skip_linearization=skip_linearization
         )
diff --git a/src/shard.rs b/src/shard.rs
index 226ba194..c66d6511 100644
--- a/src/shard.rs
+++ b/src/shard.rs
@@ -40,6 +40,10 @@ impl Shard {
     pub fn split_streams(streams: &Vec<StreamConfig>) -> Result<Vec<Shard>, IoError> {
         let mut shards: Vec<Shard> = Vec::new();
         for stream_config in streams {
+            let document_dir = format!(
+                "/{}/",
+                stream_config.document_dir.as_deref().unwrap_or("documents")
+            );
             let mut stream_shard_count = 0;
             log::info!("Computing shards for stream {}...", stream_config.name);
             let stream_inputs = find_objects_matching_patterns(&stream_config.documents)?;
@@ -50,7 +54,7 @@ impl Shard {
                     let mut attr_paths = Vec::new();
                     for prefix in stream_config.attributes.iter() {
                         let attr_prefix = format!("/attributes/{}/", prefix);
-                        let attr_path = input.replace("/documents/", &attr_prefix);
+                        let attr_path = input.replace(&document_dir, &attr_prefix);
                         attr_paths.push(attr_path);
                     }
                     (
@@ -135,13 +139,17 @@ impl Shard {
         // dataset is a strict subset of the original and is intended to be unshuffled and unsharded.
         let mut shards: Vec<Shard> = Vec::new();
         for stream_config in streams {
+            let document_dir = format!(
+                "/{}/",
+                stream_config.document_dir.as_deref().unwrap_or("documents")
+            );
             let stream_inputs = find_objects_matching_patterns(&stream_config.documents)?;
             let input_count = stream_inputs.len();
             let inputs = stream_inputs.into_iter().map(|input| {
                 let mut attr_paths = Vec::new();
                 for prefix in stream_config.attributes.iter() {
                     let attr_prefix = format!("/attributes/{}/", prefix);
-                    let attr_path = input.replace("/documents/", &attr_prefix);
+                    let attr_path = input.replace(&document_dir, &attr_prefix);
                     attr_paths.push(attr_path);
                 }
                 DocumentPaths {
@@ -152,10 +160,11 @@ impl Shard {
 
             for input in inputs {
                 let doc_path_clone = input.doc_path.clone();
-                let output_suffix = doc_path_clone.split("/documents/").last().unwrap();
+                let output_suffix = doc_path_clone.split(&document_dir).last().unwrap();
                 let output = format!(
-                    "{}/documents/{}",
+                    "{}{}{}",
                     stream_config.output.path.clone(),
+                    document_dir,
                     output_suffix
                 );
                 log::info!("Creating shard for {}", output);
@@ -543,6 +552,7 @@ pub mod shard_config {
         pub span_replacement: Option<Vec<SpanReplacementConfig>>,
         pub output: StreamOutputConfig,
         pub compression: Option<CompressionConfig>,
+        pub document_dir: Option<String>,
     }
 
     #[derive(Serialize, Deserialize, Clone)]
diff --git a/tests/python/test_mixer.py b/tests/python/test_mixer.py
index 68ea1721..952e4c20 100644
--- a/tests/python/test_mixer.py
+++ b/tests/python/test_mixer.py
@@ -22,6 +22,8 @@
 EMAIL_SPANS_JQ = Path(__file__).parent.parent / "config/email-spans-jq.yaml"
 FILTER_BY_SPANS = Path(__file__).parent.parent / "config/filter-by-spans.json"
 MIXER = Path(__file__).parent.parent / "config/mixer.json"
+ALT_DOC_PATH_MIXER = Path(__file__).parent.parent / "config/alt-path-mixer.json"
+
 PARAGRAPH_SPANS = Path(__file__).parent.parent / "config/paragraph-spans.json"
 
 
@@ -150,6 +152,36 @@ def test_remote_input_remote_output(self):
         provided = self.checkAndRemoveProvenance(provided)
         self.assertEqual(expected, provided)
 
+    def test_alt_doc_path_mixer(self):
+        if self.remote_test_prefix is None:
+            return self.skipTest("Skipping AWS tests")
+
+        with open(ALT_DOC_PATH_MIXER, mode="r", encoding="utf8") as f:
+            config = json.load(f)
+
+        # keep track of local output path
+        local_input = config["streams"][0]["documents"][0]
+        local_output = config["streams"][0]["output"]["path"]
+
+        # replace results path with s3 path
+        config["streams"][0]["output"]["path"] = f"{self.remote_test_prefix}/{local_output}"
+
+        # upload local input to s3, replace local input with s3 path
+        config["streams"][0]["documents"][0] = f"{self.remote_test_prefix}/{local_input}"
+
+        with NamedTemporaryFile("w") as f:
+            json.dump(config, f)
+            f.flush()
+
+            main(argv=["-c", f.name, "mix"])
+
+        download_s3_prefix(f"{self.remote_test_prefix}/tests/work", "tests/work/remote")
+        expected = load_jsonl("tests/data/expected/mixer.json.gz")
+        provided = load_jsonl("tests/work/remote/output/mixer/mixer-test-0000.json.gz")
+        provided = self.checkAndRemoveProvenance(provided)
+        self.assertEqual(expected, provided)
+
+
     def test_remote_input_local_output(self):
         if self.remote_test_prefix is None:
             return self.skipTest("Skipping AWS tests")
diff --git a/tests/python/test_paths.py b/tests/python/test_paths.py
index e920af74..df758e22 100644
--- a/tests/python/test_paths.py
+++ b/tests/python/test_paths.py
@@ -295,7 +295,6 @@ def test_split_glob(self):
 
 class TestSplitExt(TestCase):
     def test_file(self):
-
         prot, parts, ext = split_ext("file.txt")
 
         self.assertEqual(prot, "")
@@ -318,7 +317,6 @@ def test_file(self):
         self.assertEqual(ext, ".")
 
     def test_path(self):
-
         prot, parts, ext = split_ext("path/to/file.txt")
 
         self.assertEqual(prot, "")
diff --git a/tests/python/test_warc.py b/tests/python/test_warc.py
index 04f0e9a7..4e37a843 100644
--- a/tests/python/test_warc.py
+++ b/tests/python/test_warc.py
@@ -103,3 +103,54 @@ def test_pretag_html(self):
             {"by_4_0", "by_3_0"},
         )
         self.assertIn("cc_re__cc_re__cc_by_4_0", sample1[2]["attributes"])
+
+    def test_skip_linearization(self):
+        """Test that when skip_linearization is True, the raw HTML content is preserved."""
+        outputs = self._run_pipeline_with_skip_linearization()
+        self.assertEqual(len(outputs), 2)
+        self.assertIn("sample-0000.jsonl.gz", outputs)
+        self.assertIn("sample-0001.jsonl.gz", outputs)
+
+        sample0 = outputs["sample-0000.jsonl.gz"]
+        sample1 = outputs["sample-0001.jsonl.gz"]
+
+        # Check that we got some documents
+        self.assertGreater(len(sample0), 0)
+        self.assertGreater(len(sample1), 0)
+
+        # For all documents, verify they contain raw HTML instead of linearized text
+        for sample in chain(sample0, sample1):
+            # HTML content should be in the text field
+            self.assertIn("<", sample["text"])
+            self.assertIn(">", sample["text"])
+            
+            # Common HTML tags that should be present in raw HTML
+            html_indicators = ["<html", "<body", "<div", "<p"]
+            self.assertTrue(any(indicator in sample["text"].lower() for indicator in html_indicators))
+            
+            # Basic metadata should still be present
+            self.assertEqual(sample["version"], "v0")
+            self.assertEqual(sample["source"], "test")
+            self.assertIn("warc_url", sample["metadata"])
+            self.assertIn("url", sample["metadata"])
+            self.assertIn("warc_date", sample["metadata"])
+            self.assertIn("warc_filename", sample["metadata"])
+            self.assertIn("content_type", sample["metadata"])
+
+    def _run_pipeline_with_skip_linearization(self) -> Dict[str, List[dict]]:
+        """Helper method to run pipeline with skip_linearization=True."""
+        create_and_run_warc_pipeline(
+            documents=[f"{DATA_PATH}/*.warc.gz"],
+            destination=[self.tempdir],
+            num_processes=1,
+            ignore_existing=False,
+            debug=True,
+            source_name="test",
+            skip_no_pre_taggers=False,
+            skip_no_post_taggers=False,
+            store_html_in_metadata=False,
+            linearizer_name="resiliparse",
+            skip_linearization=True,
+            pre_taggers=["cc_re"],
+            post_taggers=["lingua_1e2"],
+        )
diff --git a/tests/python/utils.py b/tests/python/utils.py
index 9813f2d3..ca194f9c 100644
--- a/tests/python/utils.py
+++ b/tests/python/utils.py
@@ -70,33 +70,33 @@ def skip_aws_tests() -> bool:
     return (dolma_tests_skip or "false").lower() == "true"
 
 
-def upload_test_documents(local_input: str, test_prefix: str) -> Tuple[str, str]:
-    remote_input = f"{test_prefix}/input/documents"
-    remote_output = f"{test_prefix}/output/documents"
+# def upload_test_documents(local_input: str, test_prefix: str, document_dir: str = "documents") -> Tuple[str, str]:
+#     remote_input = f"{test_prefix}/input/{document_dir}"
+#     remote_output = f"{test_prefix}/output/{document_dir}"
 
-    for i, local_fp in enumerate(glob_path(local_input)):
-        remote_fp = f"{remote_input}/{i:05d}.json.gz"
+#     for i, local_fp in enumerate(glob_path(local_input)):
+#         remote_fp = f"{remote_input}/{i:05d}.json.gz"
 
-        with open(local_fp, "rb") as f, open(remote_fp, "wb") as g:
-            g.write(f.read())
+#         with open(local_fp, "rb") as f, open(remote_fp, "wb") as g:
+#             g.write(f.read())
 
-    return remote_input, remote_output
+#     return remote_input, remote_output
 
 
-def upload_test_attributes(local_attributes: str, test_prefix: str):
-    remote_attributes = f"{test_prefix}/input/attributes"
+# def upload_test_attributes(local_attributes: str, test_prefix: str):
+#     remote_attributes = f"{test_prefix}/input/attributes"
 
-    for i, local_fp in enumerate(glob_path(local_attributes)):
-        matched = re.match(r"^(attributes|duplicate)-(\w+)", local_fp)
-        if not matched:
-            raise RuntimeError(f"Unexpected filename: {local_fp}")
+#     for i, local_fp in enumerate(glob_path(local_attributes)):
+#         matched = re.match(r"^(attributes|duplicate)-(\w+)", local_fp)
+#         if not matched:
+#             raise RuntimeError(f"Unexpected filename: {local_fp}")
 
-        _, name = matched.groups()
+#         _, name = matched.groups()
 
-        remote_fp = f"{remote_attributes}/{name}/{i:05d}.json.gz"
+#         remote_fp = f"{remote_attributes}/{name}/{i:05d}.json.gz"
 
-        with open(local_fp, "rb") as f, open(remote_fp, "wb") as g:
-            g.write(f.read())
+#         with open(local_fp, "rb") as f, open(remote_fp, "wb") as g:
+#             g.write(f.read())
 
 
 def clean_test_data(test_prefix: str):
@@ -127,6 +127,7 @@ def upload_s3_prefix(s3_prefix: str, local_prefix: str):
     bucket_name, prefix = parse_s3_path(s3_prefix)
 
     for local_fp in glob_path(local_prefix):
+        print(f"LOCAL_FP {local_fp}")
         name = local_fp.replace(local_prefix, "").lstrip("/")
         s3.upload_file(Bucket=bucket_name, Key=f"{prefix}/{name}", Filename=local_fp)
 
@@ -167,9 +168,9 @@ def writeUnits(
 
         return [str(p) for p in file_paths]
 
-    def writeDocs(self, docs: List[str], partitions: int = 1, ext_dir: Optional[Path] = None) -> List[str]:
+    def writeDocs(self, docs: List[str], partitions: int = 1, ext_dir: Optional[Path] = None,unit_type: str = "documents") -> List[str]:
         encoded_docs = [{"id": str(i), "text": d, "source": __file__} for i, d in enumerate(docs)]
-        return self.writeUnits(units=encoded_docs, unit_type="documents", partitions=partitions, ext_dir=ext_dir)
+        return self.writeUnits(units=encoded_docs, unit_type=unit_type, partitions=partitions, ext_dir=ext_dir)
 
     def writeAttributes(
         self,
@@ -199,10 +200,10 @@ def writeConfig(self, config: dict, ext_dir: Optional[Path] = None) -> str:
     def combineIntoDoc(self, *lines: str, join: str = "\n") -> str:
         return join.join(lines)
 
-    def makeDocsCopy(self, path: Union[str, Path]) -> str:
-        path = Path(path)
-        dest = Path(self.makeUniquePath()) / "documents"
-        dest.mkdir(parents=True)
-        for fp in path.iterdir():
-            shutil.copy(fp, dest / fp.name)
-        return str(dest)
+    # def makeDocsCopy(self, path: Union[str, Path]) -> str:
+    #     path = Path(path)
+    #     dest = Path(self.makeUniquePath()) / "documents"
+    #     dest.mkdir(parents=True)
+    #     for fp in path.iterdir():
+    #         shutil.copy(fp, dest / fp.name)
+    #     return str(dest)

From 19089da15b01d800217f4574560073e10c307655 Mon Sep 17 00:00:00 2001
From: David Graham <david.l.graham1@gmail.com>
Date: Fri, 14 Feb 2025 11:08:09 -0800
Subject: [PATCH 2/5] .

---
 python/dolma/warc/linearizers.py |  8 ++++++++
 python/dolma/warc/processor.py   | 14 ++++----------
 tests/python/test_mixer.py       |  1 -
 tests/python/test_warc.py        |  4 ++--
 tests/python/utils.py            |  4 +++-
 5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/python/dolma/warc/linearizers.py b/python/dolma/warc/linearizers.py
index a99c0775..9ebd594e 100644
--- a/python/dolma/warc/linearizers.py
+++ b/python/dolma/warc/linearizers.py
@@ -143,3 +143,11 @@ def linearize(self, content: Union[str, bytes]) -> str:
         )
         self._flush()
         return output or ""
+
+
+
+
+@LinearizerRegistry.add("no-op")
+class NoOpLinearizer(BaseLinearizer):
+    def linearize(self, content: Union[str, bytes]) -> str:
+        return content
\ No newline at end of file
diff --git a/python/dolma/warc/processor.py b/python/dolma/warc/processor.py
index a20e0263..1f9744ca 100644
--- a/python/dolma/warc/processor.py
+++ b/python/dolma/warc/processor.py
@@ -107,7 +107,6 @@ def process_single(
         pre_taggers_names: List[str] = kwargs.get("pre_taggers") or []
         pre_taggers = {make_variable_name(name): TaggerRegistry.get(name)() for name in pre_taggers_names}
 
-
         # create the html extractor
         linearizer_name: str = kwargs.get("linearizer_name") or "resiliparse"
         linearizer = LinearizerRegistry.get(linearizer_name)()
@@ -128,7 +127,6 @@ def process_single(
         # whether to skip this document if post-taggers find nothing
         skip_no_post_taggers: bool = kwargs.get("skip_no_post_taggers") or False
 
-        skip_linearization: bool = kwargs.get("skip_linearization") or False
         # derive the destination path if it is not provided by splitting out all the
         # extensions, removing gz and warc, and adding jsonl.gz
         if not destination_path.endswith(".jsonl.gz"):
@@ -194,14 +192,11 @@ def process_single(
                     continue
 
                 # extract text
-                if skip_linearization:
-                    doc.text = decoded_content
-                else:
-                    doc.text = linearizer.linearize(content=decoded_content)
+                doc.text = linearizer.linearize(content=decoded_content)
 
                 # these are the properties extracted from the HTML content
-               # post_attributes = {name: tagger.tag(doc) for name, tagger in post_taggers.items()}
-               # if skip_no_post_taggers and not sum(map(len, post_attributes.values())):
+                # post_attributes = {name: tagger.tag(doc) for name, tagger in post_taggers.items()}
+                # if skip_no_post_taggers and not sum(map(len, post_attributes.values())):
                 #    continue
 
                 doc.attributes = {
@@ -307,6 +302,5 @@ def create_and_run_warc_pipeline(
             post_taggers=post_taggers,
             skip_no_pre_taggers=skip_no_pre_taggers,
             skip_no_post_taggers=skip_no_post_taggers,
-            source_name=source_name,
-            skip_linearization=skip_linearization
+            source_name=source_name
         )
diff --git a/tests/python/test_mixer.py b/tests/python/test_mixer.py
index 952e4c20..5c6d4718 100644
--- a/tests/python/test_mixer.py
+++ b/tests/python/test_mixer.py
@@ -181,7 +181,6 @@ def test_alt_doc_path_mixer(self):
         provided = self.checkAndRemoveProvenance(provided)
         self.assertEqual(expected, provided)
 
-
     def test_remote_input_local_output(self):
         if self.remote_test_prefix is None:
             return self.skipTest("Skipping AWS tests")
diff --git a/tests/python/test_warc.py b/tests/python/test_warc.py
index 4e37a843..75b10fa4 100644
--- a/tests/python/test_warc.py
+++ b/tests/python/test_warc.py
@@ -123,11 +123,11 @@ def test_skip_linearization(self):
             # HTML content should be in the text field
             self.assertIn("<", sample["text"])
             self.assertIn(">", sample["text"])
-            
+
             # Common HTML tags that should be present in raw HTML
             html_indicators = ["<html", "<body", "<div", "<p"]
             self.assertTrue(any(indicator in sample["text"].lower() for indicator in html_indicators))
-            
+
             # Basic metadata should still be present
             self.assertEqual(sample["version"], "v0")
             self.assertEqual(sample["source"], "test")
diff --git a/tests/python/utils.py b/tests/python/utils.py
index ca194f9c..6c336c48 100644
--- a/tests/python/utils.py
+++ b/tests/python/utils.py
@@ -168,7 +168,9 @@ def writeUnits(
 
         return [str(p) for p in file_paths]
 
-    def writeDocs(self, docs: List[str], partitions: int = 1, ext_dir: Optional[Path] = None,unit_type: str = "documents") -> List[str]:
+    def writeDocs(
+        self, docs: List[str], partitions: int = 1, ext_dir: Optional[Path] = None, unit_type: str = "documents"
+    ) -> List[str]:
         encoded_docs = [{"id": str(i), "text": d, "source": __file__} for i, d in enumerate(docs)]
         return self.writeUnits(units=encoded_docs, unit_type=unit_type, partitions=partitions, ext_dir=ext_dir)
 

From 41bf80e66549356690e8c3690416d414f13afe84 Mon Sep 17 00:00:00 2001
From: David Graham <david.l.graham1@gmail.com>
Date: Fri, 14 Feb 2025 13:17:18 -0800
Subject: [PATCH 3/5] style

---
 python/dolma/cli/tagger.py       |  2 +-
 python/dolma/core/runtime.py     |  2 +-
 python/dolma/warc/linearizers.py |  4 +--
 python/dolma/warc/processor.py   |  8 ++---
 tests/python/test_warc.py        |  6 ++++
 tests/python/utils.py            | 50 ++++++++++++++++----------------
 6 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/python/dolma/cli/tagger.py b/python/dolma/cli/tagger.py
index 34df31ba..9d29eafe 100644
--- a/python/dolma/cli/tagger.py
+++ b/python/dolma/cli/tagger.py
@@ -91,7 +91,7 @@ class TaggerConfig:
         default=False,
         help="If true, only print the configuration and exit without running the taggers.",
     )
-    document_dir: Optional[str] = field(
+    document_dir: str = field(
         default="documents",
         help="The folder in source paths to replace with 'attributes' to store results, if not 'documents'",
     )
diff --git a/python/dolma/core/runtime.py b/python/dolma/core/runtime.py
index 320ca901..8ebfa0d3 100644
--- a/python/dolma/core/runtime.py
+++ b/python/dolma/core/runtime.py
@@ -392,7 +392,7 @@ def create_and_run_tagger(
     profile_steps: Optional[int] = None,
     profile_sort_key: str = "tottime",
     profile_lines: int = 100,
-    document_dir: Optional[str] = "documents",
+    document_dir: str = "documents",
 ):
     """This function creates a tagger and runs it on a list of documents.
 
diff --git a/python/dolma/warc/linearizers.py b/python/dolma/warc/linearizers.py
index 9ebd594e..c4d588cd 100644
--- a/python/dolma/warc/linearizers.py
+++ b/python/dolma/warc/linearizers.py
@@ -145,9 +145,7 @@ def linearize(self, content: Union[str, bytes]) -> str:
         return output or ""
 
 
-
-
 @LinearizerRegistry.add("no-op")
 class NoOpLinearizer(BaseLinearizer):
     def linearize(self, content: Union[str, bytes]) -> str:
-        return content
\ No newline at end of file
+        return str(content)
diff --git a/python/dolma/warc/processor.py b/python/dolma/warc/processor.py
index 1f9744ca..a3d949d1 100644
--- a/python/dolma/warc/processor.py
+++ b/python/dolma/warc/processor.py
@@ -195,9 +195,9 @@ def process_single(
                 doc.text = linearizer.linearize(content=decoded_content)
 
                 # these are the properties extracted from the HTML content
-                # post_attributes = {name: tagger.tag(doc) for name, tagger in post_taggers.items()}
-                # if skip_no_post_taggers and not sum(map(len, post_attributes.values())):
-                #    continue
+                post_attributes = {name: tagger.tag(doc) for name, tagger in post_taggers.items()}
+                if skip_no_post_taggers and not sum(map(len, post_attributes.values())):
+                    continue
 
                 doc.attributes = {
                     f"{t_name}__{t_name}__{make_variable_name(a_name)}": attr_values
@@ -302,5 +302,5 @@ def create_and_run_warc_pipeline(
             post_taggers=post_taggers,
             skip_no_pre_taggers=skip_no_pre_taggers,
             skip_no_post_taggers=skip_no_post_taggers,
-            source_name=source_name
+            source_name=source_name,
         )
diff --git a/tests/python/test_warc.py b/tests/python/test_warc.py
index 75b10fa4..e69d3852 100644
--- a/tests/python/test_warc.py
+++ b/tests/python/test_warc.py
@@ -154,3 +154,9 @@ def _run_pipeline_with_skip_linearization(self) -> Dict[str, List[dict]]:
             pre_taggers=["cc_re"],
             post_taggers=["lingua_1e2"],
         )
+        outputs: Dict[str, List[dict]] = {}
+        for fn in os.listdir(self.tempdir):
+            with smart_open.open(os.path.join(self.tempdir, fn), mode="rt", encoding="utf-8") as f:
+                for ln in f:
+                    outputs.setdefault(fn, []).append(json.loads(ln))
+        return outputs
diff --git a/tests/python/utils.py b/tests/python/utils.py
index 6c336c48..a96c24df 100644
--- a/tests/python/utils.py
+++ b/tests/python/utils.py
@@ -70,33 +70,33 @@ def skip_aws_tests() -> bool:
     return (dolma_tests_skip or "false").lower() == "true"
 
 
-# def upload_test_documents(local_input: str, test_prefix: str, document_dir: str = "documents") -> Tuple[str, str]:
-#     remote_input = f"{test_prefix}/input/{document_dir}"
-#     remote_output = f"{test_prefix}/output/{document_dir}"
+def upload_test_documents(local_input: str, test_prefix: str, document_dir: str = "documents") -> Tuple[str, str]:
+    remote_input = f"{test_prefix}/input/{document_dir}"
+    remote_output = f"{test_prefix}/output/{document_dir}"
 
-#     for i, local_fp in enumerate(glob_path(local_input)):
-#         remote_fp = f"{remote_input}/{i:05d}.json.gz"
+    for i, local_fp in enumerate(glob_path(local_input)):
+        remote_fp = f"{remote_input}/{i:05d}.json.gz"
 
-#         with open(local_fp, "rb") as f, open(remote_fp, "wb") as g:
-#             g.write(f.read())
+        with open(local_fp, "rb") as f, open(remote_fp, "wb") as g:
+            g.write(f.read())
 
-#     return remote_input, remote_output
+    return remote_input, remote_output
 
 
-# def upload_test_attributes(local_attributes: str, test_prefix: str):
-#     remote_attributes = f"{test_prefix}/input/attributes"
+def upload_test_attributes(local_attributes: str, test_prefix: str):
+    remote_attributes = f"{test_prefix}/input/attributes"
 
-#     for i, local_fp in enumerate(glob_path(local_attributes)):
-#         matched = re.match(r"^(attributes|duplicate)-(\w+)", local_fp)
-#         if not matched:
-#             raise RuntimeError(f"Unexpected filename: {local_fp}")
+    for i, local_fp in enumerate(glob_path(local_attributes)):
+        matched = re.match(r"^(attributes|duplicate)-(\w+)", local_fp)
+        if not matched:
+            raise RuntimeError(f"Unexpected filename: {local_fp}")
 
-#         _, name = matched.groups()
+        _, name = matched.groups()
 
-#         remote_fp = f"{remote_attributes}/{name}/{i:05d}.json.gz"
+        remote_fp = f"{remote_attributes}/{name}/{i:05d}.json.gz"
 
-#         with open(local_fp, "rb") as f, open(remote_fp, "wb") as g:
-#             g.write(f.read())
+        with open(local_fp, "rb") as f, open(remote_fp, "wb") as g:
+            g.write(f.read())
 
 
 def clean_test_data(test_prefix: str):
@@ -202,10 +202,10 @@ def writeConfig(self, config: dict, ext_dir: Optional[Path] = None) -> str:
     def combineIntoDoc(self, *lines: str, join: str = "\n") -> str:
         return join.join(lines)
 
-    # def makeDocsCopy(self, path: Union[str, Path]) -> str:
-    #     path = Path(path)
-    #     dest = Path(self.makeUniquePath()) / "documents"
-    #     dest.mkdir(parents=True)
-    #     for fp in path.iterdir():
-    #         shutil.copy(fp, dest / fp.name)
-    #     return str(dest)
+    def makeDocsCopy(self, path: Union[str, Path]) -> str:
+        path = Path(path)
+        dest = Path(self.makeUniquePath()) / "documents"
+        dest.mkdir(parents=True)
+        for fp in path.iterdir():
+            shutil.copy(fp, dest / fp.name)
+        return str(dest)

From f62f6287dccd3c3cee382f1c6afdca8ca5dca2fa Mon Sep 17 00:00:00 2001
From: David Graham <david.l.graham1@gmail.com>
Date: Fri, 14 Feb 2025 14:22:10 -0800
Subject: [PATCH 4/5] test

---
 .../data/provided/alternative_term/000.json.gz  | Bin 0 -> 25985 bytes
 tests/python/test_warc.py                       |   3 +--
 2 files changed, 1 insertion(+), 2 deletions(-)
 create mode 100644 tests/data/provided/alternative_term/000.json.gz

diff --git a/tests/data/provided/alternative_term/000.json.gz b/tests/data/provided/alternative_term/000.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f5419508fa4c3f7552b22a599130c7208c6e11eb
GIT binary patch
literal 25985
zcmY(qV{9(Y7p`5~-L-Aowr$&XyKCF#Q`@#}8@smMzQ6N7U*3~U?v-4#Rwgsa`m%Bl
zK{OOphMl=75U8nxiJP67y{ikowTpv2&{dC&3yD-KdFxu;-Gz;MZUrpy{onc;RSd8J
zQflZVF~a2B6|b5ZV9o$a2;%vBLR2rC>B;7K#Cb~=^PMdw9GvUs1gXuIAE|GY+*{mN
z&w%Oai;7-=zN!vJB2>H2+tJ(o(dm7z|EC;o>B8XGiRQOCvVEJ8V{^n)@Nah1M{L{h
zl3dbNoI&^e)Td)1D(WTT3Zr(P<kLqe0*b>;Xk;5_l<d_AG#_WyrI<0XLZ2XNq*s1H
z;^pOlU~ah1BR%Hy5UY}gRk8e+<2OC#q7FRUpXwF2GoR@1uKi&}jhMv?xwA4MW7#MA
zb|%=P9UQi0WH#m{8@32^9B&kYs<MwlU4dQjrHu+~Ud5jNVO4M3s*O?6<-Y}YOSS-1
zYxY>VYI5%$gygqNj{!GK3iFCV!Q4!*lncu`ZM2p06&>qdjbOrgYfV3l?p-=7<?;nV
zy|7nv9u8!TO2tbf;X0+)-Ng`U-37<i?aqgw)Ixr8e6}0#r!S)Ii~TZ!UzO@r@;j94
zv5UackV^5*I_0vUo<*l5a&a+n2Dv{~q4~<Ui<t1Fg;7ifPYB+ChX(m&cUr`5&CSF(
z)#V&>gE~v=@*1BxUc*DB>w#&L_|>+eQEMyoUW|Fzaw~=c4?3?_W&A7U1qTmnd-Mzz
z>K>RS!$|g4N&yaQj7-vvsv3;&OFzNO1G@4L<;+(efhWcXAC(c}a=MxbeG^TcYo+*w
zL`?^|&>ytY%h-?ev0u6C1uBLE=*>{4s~Nvwjv6`RGS)P&?ll{R`92k)jMOqQMHvR>
z=;1w@n>&<C3YlM?8@0lx#cEz{AQ3aA2E{z?hf~g-Ov}7l|BrhHYI))dMg|%>gcdcm
zAtcqDN$mS-X$&vfXvXcp4E6(&RSV*I)z9s^eiSirAherJP|3%E@~(hNt6k3JEQ|^<
zOfXh*MWmikeF9WE?L)Q82<bAOE=-y|mbd4Nyfj?D4P+`M(DPY~BrnDjglKtG!7Uqm
z*??6I<XG|1Jl+uNuK0$`aw7fKR175?#H>-lP`^F*Q0?g0QoD-sJM=24o!5#~F>Eg)
z<jb1OD$ZE-0>cYDCww*va>3XOo~u_j_$==V)IxGuA={V-b!&oBUb$v=ez_@p@Rz7n
zAlT*|nx-;6Z2sY~TT5enORuztm(VL4p5t4PJS7FF88p|+4_A0<WQ@OsGtwd?2*b4h
zfc?`BAW6FDURv|&NO)kEqw<qMZbf5Si)8zo<q+F2IqY`8@_{)sjFQI2(|5Jcp?s?9
zTZPFjo#QvA-a;{g14*Z$&g33=VijK)pEKVuP?1{P!d9y8{)9aG(8Ap_JTt?sY0?n8
zI*Rf9o#*C03O~U6hH_4+D3CAfs_fkHT#sSKDhT(I<=wh2h{fX>PsjvoN7GEv@`-FC
zGY=9gm|&Mt(1@BrLM$8#fD+@GEaH>pP2AimaoLOJSkC5z7D`^%i1V3{4$QDJ!AbZE
zTMc@9i*o;yh>++mjI+knlQD)oj@3@fEUhn}HO**v^Jf<jwbjwg)4EYN8V~P+Xe6Na
zY-`BDu1RIbN-zfHmg5QgarNG;ulWzCg>Gr~Lx5iPS)5^o8s5|8;CZ`&8jm#+k9=;0
zT+gJlO_c9-I?rnGUClCYimu1(nJbz09+z9&W4LKPFD_!=7ql0Hf!HCgtrD%lm_V2K
z-VuV&b0REn^rYvG`p`$H`x&L%QmRW~Da}(H>@nwLQD<{RUr~vOtTO$&E(&R>EdCM(
zIYR<>_(7lJXurhJMpV$Ze6xZ+DId~)j^ZP#wcM<l;2KiJ|6d`N!Ffttc7#RpnSJf4
zDhIQuUn1E)MOQZh=%$9KLG9df@L^(NKC-=Itw{Q4w8>w?EvVR5o0dZUjL-SLGUFRH
zSQ5ApO}Ug>B3w#GVsCeeVbO36H90yae#Q>FrykYFr^C3nrC?j0QUY={qBod065Avg
znBYsF1@+E>Y?54mg&aPCvO`xf7hyb;kGZh9@LT8ejw2|yXl4Yt0KHKPgo*s0^j|jA
z*797crA%&)O^VLXK!mVoY}3q{K5yqA0TJc*K6&Erfkzr^sKEh=<}yE7SSAhveoyWK
z;vvU|`yR)xvND_xy1Sbp$-X+nptJ+_H2*6<luZ=5<YZ;!4~@{);675V!Rj^Duy{RR
zB#e4p?3At{k=I<)#o{k`ffIo9@iA%{Z0^SGK#8)qH~(J||Jw68a<cbC_p7+_KyNeu
zy{on6V5@|qfDRiYDe5Ts3LkIr?EJpTk9LL1n1v-?^Iu0_X?Fc%E0|g&hh3|88G}++
zsIabB{UwsCKGJOs{s~F`Z=0{ge5Tsa7-|HPZV&fKf9JydzK1<0G7C`Z-P;H_#!1qm
zZ=A*Ju{iF?!CZ;#mL&MM-2_^Rn1Me(Qs6R0&wdwsm*tZ9Xvi3MLSqDYJ*2ckoJTA+
zF9E=!2Ia7~v+EWX-bN>`K)!{g=eilpt;;j6AEC|~guSwnwD|t_^qw-1-vMuXArSFX
zvIV=stenv4pC)XJw6r-%l=wo;lcgc1OfIkVm^*NBD}s8-qUb@W`tw~Y&`z-)19bPm
z=IENQ>OFBcY;hM7Zf-#&SC<kPRW8^YR+43laKc$fk|u5l_O7$6UoD+cf=v%xe??-u
zM_7CBiL=ZV&%t-}ec2j`&0-)Zu$M?r*IJ;h5FOhfnw`?JN>HEfl4`Rm^{XmWdg2M5
zuiZtQua|2%Cb*k(>4zvc?)+Gd*`KSI3PqA;?QeIZ+b*=4LhBM=1Ifd*DVUD607CKd
zyp>}yF`|E-tJj8EWVg~UrXSmyW!u#|7DeiBBebiQ(tS1OF8IDak~%StzvL@(e2{Ek
zF0}e&_UI+mqx^bVDs*z^nI5Y{<Mg|EGyP<)U(WYXQNP${aR#dBy#Ojpt*3n6qah@F
zmzdjbrmMiu4hnjGqpAy3nDiBL&Hexke6?TlSS@P5)$)VF`(yi#dw0vwd}OmF=-|39
zbK?W+X4zvD>1XbT>i*UIm9Fov1004Y_}O*~H~P*;VnoO|35{`!Gxt*PeXAp}#B70F
z^bEx!-wUt53J%NIY+1#p7dUt+1UZWc&KghrZr5Jti}VV$=u#O1mi%SuS4qLl!LD*k
z#(hU(6@7CwzR4&XQ*AFUMZ13bj`h*Jdw;L@Idsz+qe79}!<vxSAxDs(-lUws9*P-3
zyypclg%FdqF#~-)ND3Twe8Cp;@Pz7kgY><?lp8pM@RuZ<pp;8EL+Ayl5DI<%iyXSx
zLCcfj^T$e^M6NO-XAgH*NdIKmL+rhvb|gd+KthovK}V2X@nZ{yrt~p^`N|0vmBPDn
z5P{7$-!W~`I3nV%&-fh#a74$HGx0)v=@5!sI3s2OW|l4`hlHEDrHoKi%Ge@0P{cSQ
zU7}KUT`Z<xc6mBHAn!@B1$z2n><V&DV9LFWK`;N6KsThiI_j1)hM>E&K42==>Ent<
zd&#m#T~)yWM*NzFEK|c58dJ=}i43BL5-38Anmy&Chb*0wN0i2s2Pw0*rLB<UuzdL=
z%p+;o-r4M-W)7uU%_usE*25Z^)^A6I_LTsvQ73>AOI~cTBga4R{ECx9mgBmoLzXEo
zZo!BVwz#o}oW-W>2(Q_A;v()UPMEP0(-rrYn9mXvV0;znVM_kK!s7s6BsU-Oal&G7
zjxg^;u9c~Ostn00-^g4m1T`Uk%D5ue$Uc>Ljb&M~K2}IApf9RE!)n&N3Bp4y`~njf
z%Wvh=WaY3)){-Eb-amP!Xz%XVmq6AhL3c*c0`FUyTIhd-gXwkz&{ZEME0Nc#G0U{8
zA6G63W8kaO2$h5>U#qooD{sNG@g!Y~qb)%|!FCGaT@(1Fcz~<$ZD;iUERIm2TUm>x
z+HH|aTquK{v0RE2G?}{XA#~cQ1`7U+&ntz7hS1N3UDJ{lv&LJ`0L1;&FL608#$w%D
z)@++Udi-lw;%wOd=<n-|96kfYJ;cqjdydpkdH!rHu7ub_WKDi;IV_I<#z0%hJ$j33
z5Ohu_#Nd6{whYW1#wVPADf&N{$vDV8D-!RXecWHoa4a?59FJUEI=gwT-uzwS>ie#}
zf4H7HX*-Rbm4j51T-Kq$sa&*Tw|?RaC?qPqy{7j=?E&k-IRR?+nYU=A@`{{$sZ`Ma
zSV83d?}0|mW34PeF=%xpGIBA74i-!qdwG)*-SpR$@1woTwl3VYYyM_y3b(9z-y0?=
zTX}ESyl#D~Q+#e(-mPiZEZAwB&o-1Rmt1^+dhff+`cFe_)?g5vJ}%rGL8g2F+C&Eg
znyD-Glyajgml&QQzSoL>&#ZFs*Y0yps&@guAB=Jf{;IV?$Jna1z%FR1Hi{h(|4Al!
zs+IEpU%q#X-Lq6n#muz-AGvNW*NvWRDc8-OY^nIag40vtY)2j5Vyn`@o_eX6iSWH!
zxW|L}zrg+NKjP?CFK?A@=p}oXJ(w&o3fnoq_^92Yu<}<V-Q7~)OkHp;t3^L`^L>U_
zZyQU0E0#$48}awR|E<lyc;45U<LkA-=y0X-&f2E_e9e&~`~-&<QN#He%!C)-DY6vO
z74!zL%4ipEXJkWHk=61fCvPGQ;jT#RarJH6rE2_<NvvLL@#W#W=6YN~;OE`p8lT2r
z%{FKBhidmWEo7q$7tD7B(xdG{$)V@|ypQy65pcu7g0J{+h})lqj!E*0vpmnvvu&+w
znNLMO5k1eA*P6tAgjPZe0<7pwk}e^PGM2vehmvIJrCfGtW;M?2>Lkh#E*j>{?=kGu
zat@ipeH=?}CsJBr;#f<DQGU1@Nnk~aA@eW3SsI336&ho^Cx;x0#{_KTRNNj`fvw7A
zI_(-f8!1tb-|`JJ%2Hx=6rV`xjgwX+KXI`IU$EJai-wvQgVyLf(5jvX6U4nr>-vJ+
ztn7~IQ}Y{cn6<>+#q#tD1z4%{M?N`X1w;f{R)UW+)o!Ztrja`>OQ!}VSWW5rp!!-x
zBKvUfKeRN=-k1yx8eX-;xOWfoN@jq$>M8!5;;2p*^Dz-)H1Bp4pRIuh&n-R7gY}B2
z&KNo%uf)1Y7Tns?;J=7sAG%K;)s6pl-{*mAS+|bY`yp7W)rJKuGh%cJBDOtI)z)VR
z+;OkQa_fFmvZz}(;2I%!PsR#cOd_--O;EqQ0Duys;3Tz+PIM)Xkastc?^q1}A8*!N
zhoKb8(bo!JW02Nzs>FOt@`ZbttXw-nhZ@%$+?tR~EaF)&>;~NzAej-6&vsU*Zj?`E
z=F8<ZWatTT*0gO=<I|M%@E6(YWL9IGwaM8;3_<d!*cg-O?+_)lHK~)OXmqMa=H{lh
zv{Of{;J7xlHF<N*RCtWW_j!v4#K_e)CpFf{+RxcVluA2k=gw*&v6v-br-D177xuOe
zBY9xZ_zctAKXsxZbzQDq$@qctUXM}~L&v?=V%v_99F1~qti6mZC{Z4eyYiRdh!~et
z$hV-5Ft?x%p6jb7M|(PH84$8QI-JbgK9ReF$e_8U{(@Z4*DVJV<~D5J;K9lbu(q|W
zwj&!k2OokAN7tbv)oJa(ahOi}h45Kd9-JVNVXM<4zskE&n&3!4$aZig1-IA2+VR7R
z&laa;7;_`>!Ig8*$t{J>{*4ddYPCg&?*l&)G~G^~HE-Aa4OknY@6qjn%qmDSICZ9K
z3v+}9?3pG_qiMpr4Zbu2^+d@KN0@f%u6f=+%sXnHfQ6rop=vBKAB(|Jp8xuwfleVN
zkyVoW$?;;IdU^(C75MVRco(&SVm5B<8$8o*mD8i3X12y<gAqX!lGb7B!FifVzUOX#
zLue>CmNj6YLbY5}$Hh32=aROCC@_Dv-`NWsIyBGI27Zj>DcBu`&&Y-<*YZ|M^xjZk
z+v^dkSMteQb2DPkGkTI_p=$6_(Yu}s|KX;p9)i1vARnIqprLu3=K7Thq+9(K>v21B
z;3FX1f#$=NsvX!?AfspqsjlSTw12jaz7xgA&WSz@uelSYo8D_b#6yhm0T~ks3{&5)
zf?S0Q^KE_hu`>T$Rd}f(vQ`&XYXGm;hu!&?&GrA;pn%iM)d&1)iD+E6FL2ei{C;9~
zMltX(y6Sb~_u$5mQ!wx(-)g5PzpOaEwKCb|A(Crg^!0Urx0;?=zZx1-VRmNu|98Y)
z&_}$@)`e?+tKEvtPG6N>mF3z6-g;}4d(G_LM(auodrel04O@=h<>^+Vc6e*Gc%RPo
zH?z(vuig$rrMHsYt-j^nX&nP8R7vO7RqxM|hAV@9ad%r^$mn}j1pS}a_69UM-6bAG
zC6O`zjDCl2<vkx8BVhp~mm>JMDnjJNzD2eKdwTtDr8>~o)_IK1;=7VNna&eJUt`SD
zfx*W;aCX<hE$)@Sa2o3{W32DyRs<K|DQfLt7m}(=uF+)lh9DMpJ{Rc~ZDH!;BzY2T
z=5P~3NEN6Y-sho;eZg%Xl_h5|m}9IlBX~g`_M;dq&r9*YO1YxVj~EdVAb`@7^EIb>
zRvzQb`P_B)TJ<VGV0hI#4%=fvr)c4;D^cn~td}c;)i3`U-;R{iGi_Y|&e7QkZrE@f
zT#=*YdYOZ~Nd}_FCzLqdGxKwrCR*0PnuSBya79B{yU@eErZ1HHl!NnLKAL~33<Kor
zIz^cN(`az!)T`FSc;Co|g9Q~|x|yL0Fyz$R2H$RS0q*VY<4o3&`ad@vesm|f+RBDu
ztAV=`zcVCca3xv@HB<uJdQeXxH9aANxBUfF)$JtTJ3q-{iQ)rVYVWGi)^yedgFaY!
zWVSyGqs$j{eEj6(3%C0U82;XGOBAI_RR;=q{@8Cz!};hx&G%)&0(xKP2Z+|oN`N?Q
zx8-txI3C+YxspUI_y0xk%a#S=plsOxP0C^+tXUNSf!O~;sBBr~3!<^))~rf^NNl-f
z%OdgrA>mkb>t;njC^p@)WuZ7Mn|Ara|Bw<O7Ta#w5+IJlwpqR)9?Nds{C^PM@>zj6
zBpY}6f>11iwW}f^`2Qjdw$Abeu~>R*S0z9+w%+ntk@)|RNG!hflOiA-n{WB7P#lrX
zyL{n)NC^;+?YDde5GP>!C=X=2HJSO;eB`g;k}_udsyI>r_0M`+8q8<^alI>x7T`nP
z((E*GN*XanzkhEX<Y4F(d{E19c&g4c1c&_=`tP3eSxe^csbKzO1HS@NcXhvNI~nWq
zb^#eADd|i&9qIbrkIz3Dvxu6R3NK@-(|Mm!?`JIQ8|QaN0lTL!T88*AbLI)ck=L88
zf1WX>WI$uDY$US|V1aU7WbA#;2Lt>SPe)JNPh*1nU@s(=!Ey<nVc5fMhNCdueo74d
zF}|LTenUk*dv`5cH>)3i{u|?M#SN=v-TE&CRErkZs^hNk$^dwv*w(~VTv&94UZAch
z8p8dix`C*@d_bHW1rW^HK_Rj2g&5qVnXz*Z6L;;p!E<L1RAUva#-HqyV$plsNbZf@
zD@tkC2Iz5a&i=Yphbg_)$E;+*TV~Wh$QT(jK_r&>(oK2GCm(zF9O*xj!J{8FLKTIP
zdONg;Zs|9)*OG$|T<P7>**YPx0l6Gqw=Xx*f1-`doi9eik1XOzVVK~R<GAIKH&-$Z
z*Hwty{`r-CP=yoGRqNs3OakjDI6Yme%<_L%G}FR&=JRi@`Pv)nGjbf=N!rN(sz?w$
zV4f-3HZobbNAPykeDm{``V*#;hj4y%yUw4qqmt_*QuNKkM03@Qk(fao0fA(hmM9MC
z1UrlKB<ZO4AePy+$f-Wt20BnhN_0I84?*rbZHS}{YEY%JmDbg!QN@aha`#8ZoA$gM
z3m)`@YLl9&TEiF_hhhvyU0t(`(>-;9%i!2zS{Z^WvS^+K2#knp6KxWmQn&!GnfeC~
zPx#yo9YqL4%|P57o@UxgQW*^ull+=|+r~hS<70L{N2C}>dcU3!=%0gW6A{D+xiPz*
zmy}rUndQxZRY2Yun2{F-R6ZPT9+=cT_dXSMFW~J~ul2*pXtzK~Qd3W98^Tz=@9u<=
z8Bec28J3>9zUB_>5X@75^U)=l*U;pu@ugZKI&5L-1*J*`wQzQW5#!Umj(m+*>;<j8
zvlv(<y6u6}G+p#EBikr&FR9=fD}8L1eW6E@lro0y)Ez22MZY81NE^VCI@nhj-ze_a
zJmlix#H@OhEz?oj9y=2v?2(4I6H~qc2+j<Wh>IU(q^W22YuzURyDbcd&uaA1-@pBB
zsr+$7>DA!tM2KUs?_W%sNybAC(j$b+$3rNUDmIbb`_Nnt_1)wjGDv6}k&BcYJxnv7
z&EwJKsdEB*<vfhK0y*wyN}(!H;!J_m{qaoP9#X;%6*U7s*^_5r8^bpd^$TmjIe}va
z=G$5{L3Je~K|Xm8TG%JAwze<=Y*bxUcu^v~?#ns(N5>`nYZmG>Zvfg%I1-ohWmvZD
zW4W99V?i=}#|;D)>vpK|M?LN@Nv9DKc+s*ejRiz0qX=Jp;Lcr}$lBRz4!*5B9Kv%%
ziWi2VdrttDwDP)>XuPFVf!6-2L^L@ZNDt6du@wSo(BHZH^Xvp@jr9X~7X)&O4(V*L
z(6_6i)CC*dVpr}#Z{Iqr&{A!TIROtGR^KY!6Yof8on}_291z1`^gz1Vx3zy4GoGwn
zowyYT`zNY1qfOBZ4c&$H?89%z*J^BheOY=jWlbh*k;_#9mIe~-NJALLAZd;Y$&!2n
zy%g4U^Ezq1=@8lw2;o3^krU)_FV>>V<M@2-U+OFHyW&+H#fjBg<{ft^)?D5Gc|FD2
zCk8%l5dLkN@Hc#S)gFOrqnaw5#c?tEuTf_sr1^hf<@o}f_bGYdF)BD;xq5vFX&q_c
zI$XrdP9SdZ69ehZ4}otWFRW~3|1{_lG_y+jLb1YH8YMlfzKgc;uY^{QHe5V8r%pCp
zxOJ6$L(u8KZ)RtD$o<~1=e-m8N?@T4p%NfhuFMgjTKEa&JZrXSKYv+`p2ev>-ZfnL
zeh~T7cg7!>debt~xqw?YZvDmhGfLU>JHBDjHH#CLRk5t7jtRSB<C@PGp${iQ6c=1k
zl<1muWq;$o#`LAV&)@LWd`uxWgI2^8@mT70%Z$RQu6GHa82LPpj&_czPh=$u7#J4J
zb|a)uc0{*l%Q;k#nuMpF4~C7v7~Ievq_Wu?mKeR*N8lm4N&t>_%?hElWajM8@{>au
z9do9=jmpBcHt<pOvI~WCEBN-CR#+FoZ7jQEF}i72j{LaRCbW1f0FG4BAaTeepAIrl
z1W&<Qq#dr7UV@#&$UW<T)bLNrc7XTwETbJob-S-EFW>r*W$XJ6OHEe`&DLv+)G_Z8
zm*FQq*m#VOOx^pI5gL*S9m&7Y{D!3w8|pvDs?j2M`kUp1y_5ZIQO&SJov7OM`OXN~
zbZYY3zVV;^cz{h^$ZweVr^i`8k+av`o9kYRaXorf`EB?4@8S7+qODD%P9pv2RzLUq
zr_(ms%I!leTNW=Zd`vuZP*EBJ%VXf@^0(q|EBP0}imA#iTVCr1D>9jC<Hr7|I7VAk
z(0NEK#}16K<V6c8i~NAE3iD1gf-L*<T!u9^Bzabm*9qglat|cqWj7=7Wf|(dk_Wwy
z9+^Im1(*MY^H==uT|n<sxorI3%Y{dLzS|`iFtY*kV9a9vcr?%&qm+89m1CGGcUs?(
z`3rdT{X004N)t)9n2I=|6{CPk6U%4x>~6!y!Ly5c!4;-a@95hhN=R5$Jd>KX4fe1$
zqjtB9q6<t{ywP&*Y9+S}z(q!gcX+HNWw=+l3CcxAD0k4@g<i@~$TAa&o2*pNlug)R
zMyhw+PZOPXt^|6Gd6T-`FLN~d&S;OEs@@rfVn|OpoBlG+o5dv*5xxY7o~LLdms8;z
zCA5bGtTksLL5BZQGNlyBP<{zzElUp1{whV`eh@Opm|h0GpRr+Ii4LDUk#YSi3fTep
z^d>OM#MO89JgmO#5}vWZvW<5?3=bxWqG~sBd<EmjXwp6H_6E9rvCmUT7bF$lZ!xMX
zV!jZ1RQLfzw=ZtF#z*jmw>6lA4&Y{m^W`}Wp%!Nd(-QXSuWY(|v<w#>!Ul2VV_YXj
zW=%s~iSr^}3sWzA73Gt|<aK<TPUj@K51Z!v_klUL=?tZw_wo}IzTMLOK39E#v6<6v
zfVk-Mi8e9ju!G*_GpBE;z8!-d>D;yt?{%V9BiXDN7p<XE(8I`9l>1aI?o=(v9pIF4
zQO!%bK)FQbo?LeDmdMqz*TT1qBT;>>_wnSTBbTic7_C#-_8oKr`jOFiTuv4n;I|+Z
zDSzD!zB{qz=H7sg#onLPxI%+oKAXAWDEGELiUX^LAx4Q{VLwgh^Ro2hEQ>NUCFK?g
z+7NsM1RVz!%pQd{gtp}XJ12Hr7P&Dn@t#9!i;uKV;so#xcrgZir;19O?Ug^i`AxlA
zI6YZR2JUsut=lti^!|-di7#>COj$I#rRjnAF(+mr$s3JGBM!`}q;=vMX2_UwY<bvc
zlu0@;RKP&oTf<?y<9oJ7wTG7yMx&<=F{`)>Qj+nPNT&e{mqlWE7TQyh%2cH1+4eLq
zsST}{LWrIr7Hh{rT3Tf`p}o5|3+Tltl}QsU5+pwmcWCpjkbl|<6J_5(Xm4L!<t~X8
z0i&-QJb@0Nka7lVHYMyuo)~^|)?)0X_JO{fi*m>=V=@&>l6YOkpW(6R_4GEm<I6n5
zp)OLBl`};3x&fDI3i9Bf*KyfJhHl0#RPh#`?`|@zkZHO*3^G63TArQ(Nwy_sh`G{-
zl<S0LO2ZrtwLaPL1+}1bC)LO4CqKD&!$$a29ZAALY^ncp4=1jt7*^#A@Tuj4Mi(_K
zHEJJ(Bc6rrRnU?HxuNPl#Zrp*LVTuzATkPfKgpB%hq43R){llN;qfVrLj#KTIUvpX
zuzJ`@umKCgph&x(qvekom`G1K4|@EClm%7vx`W`;Uz?h{8smcSrh$`Aq2NM)q0Ya(
zK?C`GY?6ZvtnPk}+lPwF7wK_Cm71zA80DW})NwO`{s!hy2gOY>_M@1*ycRw1_7%3J
zMKTF7-ZM%J>iyzNve-Bh0ZE*J(%!(E&W)fKQ$T_*!REfLutebFB6_*JjwL+Iu>7aX
z6A`1-ab|Lrbn!eb5OqM>rD$Tbv7c^HMsr8A2xs>XXC1GHJ~Y{g+KQL>)!r;4UDWjP
zn7lB`K2~5#A$}i*9p`6b;iHZggKW6oWxRbKC^s;9GuU$Bv`qmt!*M_?%?435R%OyG
z{c5a|TMVV#75`i&xQCGt4*C!0MpA?KCN!kz6Fvd~B2Yj8DxBR!|A-h;SNf2}m}frw
z^rH1GXTc;qHHLKa2R}Gb9k4A<$iga!qjrG>*da3}J!{1Dbn)q_kzmunD6#|##>wzo
z>U4Gg!F03bGQiDL)(FF|n5CJ}OGDQepxU6>0Bo%Lo8ql`+#kv!kQG!kPa-YF5Sbej
zBxhYjH!I5f$10FuY;^mZIJ1NmQP7yo+TiGr8jt!cADk-fMUrwokBEI_;QXLd*{FWs
z&~N&Ld2Mi%S(xvYvmC5)4BlswaZZYaGh=h-sDIGPd~on3ir}^m2AvRC%@dC75|f#Q
ziq)hjVwH6lCiR{x2<RbW2-hT}U2>|MhFoa{kBR-_8#?lMVVY+&VJcP4jnPqjY}@3=
z7?shN!?YSHcX#Po<J+j2%qc2Ys}Q#0r)pU1`{^kE+ya84{w3Hj^FATIJ}w4zy(Gqk
zI;}`lEDVg9OD`1bE+ES9#8BIuvr$})O&gg`s5>r3@jm#3?P;i^MXY+AVZDqbcz7xx
zJJ8w>>jq_1HIB?us*}Wl1K2@=!zVtnQ?d_>r1hW8y_?A}yG<#Y)t{!W6UPTMQd1eX
z_;oDDmD~tmfq-sQS0vidY2)zMDV;IlO!1&QLqumo^fGks{s&}Z!9fpW^n%PLOv`5=
zJc6l}X{aK>Ow^Rk(+t<}RM;-N?oA8;09<I_s+T<@`CmERp(tE*{uV|W6Av-hi4SaD
z9=eWg*7<o2)5fMHVn;#p#P!8prerv9QPajIxL1%wIGYHLaUY1CV3{<WU@5x28;}`#
z4ZaB~1By?>H@{wOZcWb6IZNPN19akxQM=NsJ|)r9;iKM}wV`3!aw25PzT7`>4`;$V
zI?8!jU4>XibeWcon`*5vd2HI^9VJKMX4S#5&>f4ottiMZT_DE6enD^QZ*!s%4LDey
zQ(TCR08`(3(e@^*bn&kFL08kk(Ds9)ePc}`s2^7if5yj}QL=l;a*8<XSb(>QdL2RA
z*4WI6QjMQ(l^GM*vv}hHj`t^I!N?aOCnM`|5)a5vey?F6VyKqE{J-j^C#1AfQzy79
zgdI!4Ir*JfT8EX0XSj50%Q^a0fqsRsZ!*mBg61Y$+6w5K9?C!R!jD67CoYyVR&0H(
zMA+}JVu~TudUDk}El5;}uhB0TgOU05(uC=Pno1h!7m#M6W$-$OeWud*-uxl&qAza(
zE7?5=Eh{&>+soE1A6@%(zhbPo8(5Bo1TX7*nCtD$19M&(9K6$W)V<l);!XO$SW&m`
zxzfJ!_un3CT7Keac^(&nXg@oS&VDYde0B)gd%aoRwtaj!^5&-u4El%+o56eetTL5%
z>Rq^b^PbGSL^wK9h(FP22AZDa$^NR>oRBuW>TzwC&mza3EK6Z)c8r62C=rfGAqkL#
zUtj$}vq$|w@-4qIEA_kKT-TB9(j)r)i<U(II;MybWP8vqlD85g`G{lO8Y#tvA0=b-
zY#@vtzPjB+FNK~UqxbSXn`N0vP0u=o`K(Sj0>XH)gCSf`o+df$5t;VI-zCpCcjmts
ztTu)c(-NN;M@<X1yjT?*soZ$E-FX#FRngH^I?o^4a&;0z_xr+f27BFDeOLy)svK(1
z)~-(XFPby664tyO_;3~%?z}u2ry5I{S^9c0@{na0QIs$L%y~P(^0VM}E7MSrKrMuI
zv%Ipn5+df77IHAdERoLP9%FKxF?J^fQ5vZ0&+=wow-JZ-k5QP!D7xUSsx&21LxhYp
zsSncsYkU9FSb&y^IYKGW>LSqSy$S=ue){mz8%p%xEm?=VV7gdH6ke55y{?Vx83Rz@
zdeiI^wV_S1Iap!E&^R_BB+3r6-v{!E;i^VQm@W?-I(Pw68o3w3GOsU#MOAk8IynO}
z7=ZC1J%$AIz#ZVx^*ykUUE}MxR5jIR{ou`wU%7B>>&gPS<}LKEm>Mu;Ra<Jqa2N%3
zbD8bMISsy_F?FnfNH4SI>{QmUuy&PWa}Xb@%2uI<XK9(9g`xnzS@hjCgBByCF1~8D
zK1Qwj3n<CC`Sww$%7#B(gmRMha;!v^p!dc}#k$rcn==Vx{fG=yB9P!<<yPy#htpR|
zXQ<ORK(LauIKm+wS;$_9<tBiaXBbT{*gof4crqMy^>KH5^e!4rzc{p(Ml*<`FW=;-
zvx30(BR=;3>&MrfRj_z<@eGfi-ZK#sTD@?QuN@a~Od+T<y#WS7zEi#;DoHrm9O1B;
zU#9aMRvJON7t91-pWoEeUk+y5{whhImS5V%Xq3aAW)H(FO6{_W{P9ZXl0tmXtfU=R
zq|Yp&)D#0KXE<1_S{c7j@et~X<(!XtlTOA=rF;#d$nJw$J4hDb?aJ-;mA8@AEfYl6
zZUoB&f}z*;JnYcv5S5_Xf@3~kI8j3;Oza-IA{&h)E>Nyo3_*bhk%`n}!ZeTMG1?W`
zo8b@U*xa)#cL1huq`V)61<BfE>C8M2S_5S*lBt@;$M9{c^~4ds*l_fH)}>6Hx|=>0
z$kXSm<AhG!jTS{q92l7(?g;EIiv-p;f_f%zL^}3Mnw=&YAn-xzhJCx>dgHUK)kMDF
zO_&5W@TK-7pR*Iz;Fe|61sCHt;~&R$I!M+;7{#iE&mJ#_NQ5-cN0*z>8{SOk?G+4$
z0G36CxP(NZ8R+7SPRuhAx#Np8$06Ypj(6mWNG6Go6Y>O3Uq!UPe9|jUvt>Hq7FLCj
zsLL(aA)P|MSGY*RWF6H&Ym4GV4*=#TM&n|Qb^s$)HeU6aT}M8TSrqLdjxz6f&+}no
zAoRP}CHSXCZ?%O|pmrcn?;~WHE6*i{nk5JW$1TyyzOx?J+S{+nHFMpgh9^WXF4tA5
z14M0=-_V=U*UohGf+;h#L0k~mrBaW8f<0YnX<#{0*wnzGQq5PTg`Z3_o8l&@k)44N
zV1ORltENs*d^t1~cN&#ig2jnHxS27|;;?PkUW^mG<5_E?CbP)v?~<%gy~6#cRQW9Z
zl}1Him4$g<GlK*t*fNpOi>d@TrmSDMntkEDM!N{8F?ILGc9i1F@VQzyw+5Qf?Qgeq
z!y?nVEgCSY|7eQ&Pa!ylC*>c4>P3OqTJPx<)5nF-jnnhNExh5va>@t4QoV9WN{+H}
z+dYd%B_Ra>0FC`K34JRT^hXLhl3@N;)QHw$$es}nZE~Yn`U;Xbwtf^u!PHjzKv<?}
zUIwz!%3Ta$$Xm+7m`hljS*U)|Mx118q5jEOW#Wm<qxy(ZM-0`Dg}{rw30;Z18E!Cw
zc99?%&gVv`|0EHsYRZJ?b{6q<<U8HYohdY=kU$5B^TfLGE(_pgxY9t?#k;Um;8q@L
z8LcCp3B8UMrzCl`P$y;)`5=$s){JJA+GZMgWJXv~pxWhT*K9v397s1|8unz}AN!w|
zB&)=-kI)g-E||oHAi-=7HG9XIp#CUmMO;TL^Gbk0vy@3tu}O^~&%gVEoug1eui@vf
zBxlPramZvfeuFA(r#GoZte&m~uZdYj>Wwrj?Y72xDjZVeI!q~7t00bnqEHLlT|kiD
z5^zuoCi$IVsV3~Ga9wWIIFbnB6_*U^sCzvWlJPiV=LlB7ii=@ToY!KG_{45di8ZAm
zsu@`(>ZY(7Md*o&Rtl~swF4b}jjq&sXgY({RD@iIw_Q9|^q&l-)nW4@I6w;oPTHfe
zFrKq<%eib}>q!I_Wh&UYQnZ>i{Ppv!GWjTkwN`O&PVFFMhLMOSx28_!a*`vd-4rf&
zw+5=IO31KDW`>#cp#Ll`b*Lud6f$f$lanZHsn8J?EjlfNO74Ow?V^boydR~YZSgzo
zF%#jPC^Poq$f>AbM3{0+?&x5tjsqv=euCMJC~SWeaWNF!`mQjFC|fci>lLZf!KQKO
z5%fBTiQV|ZB)wc%A>r}%0f#)bx>Vfx9S8~wnW*(OaF2Q}4)k5=vsURU?^9N=a2V02
zfhsbueu8iZ>t_Tm^d8^(jOZirDStiDSs>;)a!~8S0*J|MP$0G(?MY7Z?BwCSvwRq+
z#Bv(##xHmd$E!Po<b7zQ*kA2KegMbX;lGv>QXA4A7k+K4o8fC(kKdV)0EwpD@Pv9n
zzY&eT`(thxk@2e%{<{t>N0YP)E%fd%vAeZ}TuUXqP!l{u?1OJ+E-a)Y^OiRF>;)w2
zw2msO!!L7hIgBsVs_=4NA|5sVzhdPno^~a}^wbykWaoN!3Yl@E6_#-ooX-g1EZe)N
z@@8KcmgN1v#Dr#B6!ql|S-(+Cs$)+eFZaC`Bf}H=v0ouk{;2jr9s@LB2KqSv=z5Io
zrGnTfgOby!!}tpDCdZPC#B`w`HdXLU)gn5<ePPzRLg|ARbUKcT1SVX``I)hb`sVWS
zuVRrYL20*!-|jH5>Dr74o$mqwx1AgC8{%mX@b8!zN1__Tl|(#1RY>>U4)rNUf^|H`
zY88=QK?swR<oz-X^kEFlkb+$t2(_mhvDr5h&3&5GXyM}Vpdw*Fv+5e?gFnheCUYVK
z{fcY&f=uDAosXXcAg0s&;gA@v;1@X8?*`ql`Ud$3<|J<JE^JE412cEq-k+;F*6i;H
z=o`WEM8iQZ3sN5k!PWm<tB&A>FY*p3&e9mWKcc-~!Gqa}b-8F<&gq2nzexhhq95wt
zg)uojXFQGK%)7G~BKK$y+9hnWJ?Lee>o~tJ<RsHE<5KZuQcyyQRb@|{p-0L2Ewxn>
zn`0tmbu8J)HZZUE4pG2#<GJE$Q2{D!Ak(x^TEZq6Pb373+-MdYhYUe!IgI#xi)|0|
zTg2<(2nRz#HRqJrzVoh&ri4AXST5Mk4^kwmqjHR1Q*fZs)KUR7aF;YHpImN7F=F=_
z@LIu!SVGWU8E5dNFw;7pj19Kkc=ttk(ZrBDT);i!TaKk8MLzFBgxbq&IV@bxHV&x~
zxMePGz%{AfZeDJ%D=RZ+#;h5O0ua{R*wbgA+O-?||BO`+{1NQUFaBLzpf%K0t*v1I
zR2DY4zi;*r|2>Q5cK7m@tnPyQ7tsG)N*FlL|4uE`c=KRytN!^Ji5g`<3P2$GC$3JQ
z<W*+?UwUyov+v6oSh~S3zSG0Wb7<M1^4!T?qm9;V8zhc%akMosb9^SAyREx0r;dF7
z@m6fUqj2l$E1E3s(FY*qZA9>ua3tY0cA0vh<co^e43$v!ZC=M^+ms+((8>ME5iKLB
z^5nBN>3(RX7}uy*GsyH~+UA5HQxt;!G*$+dx*wD`cd(@luI8FIk35W5m1+v7;$iw^
z)corEB?C-p_GHeJ?jGtE&hQy0WJ`^skC}`(vV!gv@f0Cx?qrN71jlZK$8qNDc68wA
z$As4{zn_IY7eZEfOF!Jug2Zt*JZZu`1J8R$=zJ!u_5H1!ijUT7)Q%0G^ieCv<4SaJ
z43Qlyxz`IPlu@yRF8ld>I(>F!?^SKO|7Ysz!sN-OOd%?u=wqUi6Jywbsi!NrB*SAW
zYfv|f$v)@?U8X2_N@>T_xq;_CX$)>wH&Y)uiPXf^XcKm!E$B*D(2<s?D??dp*1d|N
zehDmZgA8CBvzTenV$od%A-4gJZ;2Vv4l<$>sZTr5keRQ7tgjLqU%|C!3)h}8REI^X
z0{2{DZnlP2Z-J}O4pX5Ms=zNOM@Lq5xm(8NU>lO9+b=<{SDxamAd6XM=C{VeX$gt;
z4v5qq7MW>KbirMQ18oWL*Y4-1JIYIARG9KCGb>+0nLY;|v_jry9b=mzl4Idq0!gs~
z9&Ls*(hg#v6G=}ql95R$fpjbpD_+DcVU?hqnU{hcF9ug!WX89Gj%tPz(GDY|8;Hjz
zj7JxeaOq#bwP78?qBD#^zZ099FCtT2U@o%6l4}kj^$sA`7Dbv_5OMi8569LVfT=AA
zOGgr#CMZ1hR$yj3k3xS2OlpaYu>4$}_FR7AtvcoNe-@7*BHdHD`jt|RKM-}+YeerK
zA5nv7LqMy<7sHLxjMKUHY>5q5dTnaEYt4AcxlDsZk8*qzZ<qSXMhnPXtr>4u+Wl45
z(MC&-#hP=T9?kugcA^ErQ(=Fk4HiYMSx$Qz%f5Ev+^V#1tNOp{7G{eYG)^13qw1EJ
zrd3>vn&X_7G?!KF#Pn6^?q>Bv-4^20%2Y0mn*I)JtkX)foT?K}4Z2g^mK>KA>6|L{
zf4VIw|0Qs0)Qopno6V}xIM!;9{x6klR&DCPy5U-DF?ThZdYS?cG>39QzbNoA#bTKP
zW@0O>!5pjy$$bI?7GI)}c@FO9^EHCM1+bXUnz#J7CoAV|TLKiZ#sI%<OW)m@%e@hb
z6Ba3q$67p2pIC>zJh;uZy<RMiaEDn6hP@bb081xii#-G&d%}(D+_)|la}5OD?$^qq
zZV*Za!Juzl-=Ff~2k_y?BztMnSN9j-boQDWD)KtnFpn_V3T5(H@e~uz*cv;wYZA{p
z`lGYRgkETaxX4WWkbYEf-lo4cD!A15Ib#6gux0;a%+So(PVo@9!6JKRyndJ)F3I%e
z-}a~vq<<Z;G*@lOvIy?&)yKQI#kfgecjWwa`xa#X$PqkszHt_&Ir--5`uz5ZW+)1?
zN4`ck@ZkK5uK>XyVa<)JJE#(FuT-Zn)=IiFY%gK$>Gp2!%A&G3oG!g9a~)?8XAvCj
zG4MQN$=&J2A}pPhUA1A!sp4?Z>B627!?0U9@t=v3CCnPIopjmHHmG{udm@&&@$PN3
z?&fY|Ih`k05OF~BLpRI9H*o*A;?+s4KwHng(G#PTk#G^Cvusc5Z*fFy2hN;<co_0|
zJ<RrCPk*;vYOuEn4P1?^ExgDgS8L%QM1%Ckph+T^ZVoiBaP{SwxblGN9(W13az@cT
z(vl<ge}6%~Lq%p)5rIw}KQ>^x@pa(TvHLGKM1AQFfPvl<G=(s2{io3Dd7P*YM9pO2
zF;hy)kF5eeFUJ1!c9hSagZ*c{IwCYAj31E$vf3%otuS{nv%4+K`lKoY#0YAlEH|{H
zf)WI`MIV-A)wyRDa4sxJ?~%AqW@@)q%VvV&Fp5PD_LFhzI)llL23Mu&4PEqaa<-XM
zFXUXGA7&LEa@`-@r6eBvPr+yJe>%=JHSUEaP8es8#Pxdtb1~#h&9$q!mqjUgcG+m-
zFzc`Y+fI*Kgs}N46<j`AwH`=TX0V{&Io!f;2{$o~Wiw8=5Soi-h6evr0I^S`Vx^d?
z112!#IR{@g+$v8FAtCSi#u#BUNv21K=}CtxbOb|!`8*#J7z%Q23x^LZQw&ozTl26F
zO1C+DA(qR&5@WT#R%Ch0^m7ut=D2!FSNhtIy+-{^Hl@$Xa2mTXWv?ee_N@tMxn}oq
z=&Iq}rG&zmBp$E~L32w|B1a7%he@X=w%j)Mij**nDS)ZoR|lP3T9+h?WZ~Zo2wcyg
ziE|%FbN!B~$|0Z9a_u0Sg`v8kgN;)|nkOLvIygiE#;CO`?4~DWD}EhTC6#<r1L0{+
z&q+UA1UYT!58Z(rJ|t#Wq=Hzit6V}dl{d3ABP|Fk_aI9UB3)vxiK+ua3o9JvdrA;u
z5HiK|O+VvhCMDpyrF23-_r!c+XNQs(@()H@XkBNS9f^=@#`iVjJmG@nXnd}T|AWh>
zI%@KdqqVQmAVMtYs58eDCG!-{Cv-i5UwhJb0K`dws>It+J+J^1pAr%U>m*86H$(Sj
z+bvKTmZ0A&4{Qx=QXx?#9uxxg9HMKCd>8pNT&JJoEN_98Ops8ffePLykdrVfKlD&j
zc$b+8-tHhrjdP`nhGd$%^_HS@A&)GU4sO91ILrncB*^ptj4iR;0d0VicW=x8k0ds~
z09jV*0N8jfR>RuCq(p>k%5&uPPxspT5}16XOlwNhy_oR=`fv~l1ohHQg7NJQxafFg
zhSe}&%#QHYeH1Bc!y(mo689O*{CLqO=?P71UW6e*w)c+Sf`I_x4f&Tn;Eb3A;DFWe
zrmpx~J-3g(t2a7QMk38|)CQhc-%@^E3+r@Z+itAF)_KOVD>5xXS{Lj_wz(<no1ZY;
zs6^iskxc3xr$+Ay?1-_CJGpbm4^*<B^6rAE^`q8u=CD5Mt3DA&FIQx8p;Z#hV(G}2
zrTtF*!}=;mk}Z;UK~(881yqhaw2co>bFn>S7jQt?-496LJd%`$<@YznVEd)W29DdE
zq}9O0;<ZfQ1eq}xxZgU(MT&93`ZPP=sNTJ*WyFHJfASvpr829dbGt5UcvfwC9;ujX
zh^1xZ)Fnei_d|frGO@8o+rU*eIk(+!uBjby4Y#Sv`D9*QCpH2RB1vape4r~1+`zeK
zdjzAE*IMnb|8?{@U<8>3nLyRcJBpvJ|5xcZkAi9Qh>;RN^WM-x*T8$leD`4E$Y4M^
z+xN2irQXe&VuZ-7%{O(Rn$XA|%$0!V&8g$w4{0l3Ca>CPi)I|_^Udrp@QTg#r_s)y
zcFvQJ4>?B64F-{J`iRSS;Vs*4HA=?4w|%1q9OHmH&Qf_xAhCef?+(}aVJ0cL@6jvg
zUpRk0#nqA4-KSXl_iQLZ*TEFXzEog}gIQ3BfYvB9q}tpo|31&B0}QG$q+OboNLN=#
z!oC=Ubd;A%9p?2i>86OWvGG4=LzZyK3jcsCN<5EYn_J^c&Z$Fij_Xd#G9=K}?=c8R
z*>&RQKc_O3aLxm1JNueNtsQ}EVA$>)=(3idxRJA5A$c2xV3RjQ8+VgvqQKBXFL~(T
zKw07QyTm?C59H*6t|rXyTm~dgdB>koJqmD;OXrsCF21$zO6(Npo!67nqxSd}J<RP$
z1hb50zE0WwWna^tbqT+8-+=AA6e0nd8_mH(l4Y6US5oPkeG1~TZaUDrlUNECv&ace
zXr+G>v2D^<sYAUq2cWqQeQ7WqgwbjA*Ia!fi|t4MbS6uY-;DGH3&Tpr&Qfq9P*>}S
z_x5kXlp18@MN>_aF6<#45nd2O2Q&m6ZK085@WJB2SqzQX+HKHzU-O(89%Gi!?S<^^
zEHi;oui2)EHO>Yn^sxy!jt2iumQgl2O%?Hb9Rh1xIIsYgxTB+kbR3+lLm9}wZNg=k
zXr~LhTiP=pVqqL!dQuz8eo$v{rHMq*Z>SSow6IuO^ovm?aO`#H?e<@B*Xf(jbDK$I
zQb4ZP_jq1cDxWtSmkyJtU6g8PPQ!FGrev=dr70$99DL`vj}0q0&eMjFNZX^l&~%$h
zN~@)eTJ#<h%?b5)sk<wubNyfRXV}0wai?CtZ9n95%Q|)pzj#gjQbEAQG9NPoxazG9
zNqRnp|M{8H%#ToYr%w#9Nq5zZa$?!XVPcDx-U!YauBe3}5);?;82l687%yDrB*Z!q
zV7*K3`6TO#*6a;!>l}3)nr4StCZ6X9wt&0hI3>GE#qKXc+Ujjs9X^>bnTsyNdJ%lH
zqO^UJ5=v1c&ewJ=-9Ni8o>xkkD7re8K0Q|NYXf6si0v!5Dd*m(v?u|gXfhY3{!-!^
zCtl=1T)_1uZpan@kO5C+F?=W{LsNq*Y&&4F4C~y*jwc}*sGaTC{V_+;-ULw((L*m#
zrUmQO<xVIOpPsYpZZjS`R?LDC1`Cgw-!|6)%52M-YYMFUSVV#}D~Cc(y~cyzX$I6a
z#-{|7Ol+jyyiHD=$siz%g(LDX6Fo3BmVn|B-2&f)qR#Uco10&#@7DAD2>wgBF^fCj
zWs@jiZqU0lV?U_^LKw12X&IZ)6g<sD8H7L<j0@&HRbRx<LnK-|t%yLK|06?;4n=GP
zPjN_(<nLlbOOCv!7nOyWeYe|^TL)5120>PM&|rCLOjS)k!oAlNZjGKZauJ8;SZUhN
z4iZU?L&q4Kz`YUn`fvVfyw4DFY$#X1JEDV5-Us4Qk80ecK|j;2GW2pnE2Sq_#FP{p
zJY^2`sMy^4^b7*UzP_1=q!sYx+&!XS+098%Q;*wwaq!pLj9k6iiPt*S#C7M+Q6f>2
z<-`J}V9nfWk2apU2N$dX+{8qR6uLArcLsINSQL8dkuMx870|r@@2J6Pxp)b5`0ie#
zN_BC_i5)w_#1LG*rW0g0eEUGFrQx_dc(|RgSCnyYMCXZQk+#*E67Fu3i7O3fR<tjl
zN`nh49KL-l$pS8k`!Be)AI4Lw&a?0e&y&(3Bv5d!2)P6NquTCe@^<BL=^0T9zhk!8
zI!klAsM@9h{}r&0JoCbzdEE<Q*`9U&b+IJzyWuy2!G3_h=ifK{`~z<A@#u6?B1;!g
z37xOto6Tj6_8)Agy;2&B?qwha0`X7Gr_2`b3oFm;p|CIp7q~TviNsr<hEER|(+rP+
zOalISd5GzcPsmMCoLMDNlhTGh+3%IO28+a<kW%1t{xjHm)gUsrJKX;~NJ%<-;73n!
z+CIncj?;)oq3NR@s)s?fDvL9b`Z&IVWL%K5ydVUYzWk0Si5|!SA-6#?niA8H8J|ZT
zW9Lx&`Tt)41||90oWM!?Y5}!kHqNU%qAsMzy+SApT&Dv@V%PCPujvTU{BOX9MhNK8
zKtDIg#}jjF*L33f$e%$(nS=}ci|zt$uCC$Y`_h}UclI@uA0CoVUIVLvK5x9mhFiJu
zmTQuH0aQN)FNpC;;-qALA{j57V+A=Q038c(^-gH*>|3VkHsUFmA&h}O(ty+W&WL^b
zG<~zZPvjmxP_;KSI>u&1Xu9un=sLuzH`v>I+{<I`>#et5(-7YUZ}o>T1lkY%_?-uo
zm8B5!kWb`8%5C;Kz^%qdQ$9NK(dQ2st??m6V7kUfS3Y|3Atf79vLPj5z{ZC(){qJf
zsnC!LY{#Y#d2LfF2oy@EC9SliNK1;eq)1EJY)Q$sytXYB+ESq{{cKBxw!F42ukA?5
zj<ng4k{v18k)}IRp({;yr9xMl?n=F`)ay#UuGH&Eg`O1Y$!|UWtt*q<l}YZ(WOZe-
zx-wZ^nXGP8erw8aGD}^VrEb#};jwpHQnJ;O5BaUdo$tzoblXx-CZyYzA~F+QnTc*k
z-q4YUGTN>Tvn#{wcDc=-jG<TKhdt?huVKG+<fAJeQbAU_C!Oy#rDRinlO^uS+V*4&
zJsCq!*0$HOC8ZTv>7ER%CoA2PCGN?<dTpsE3*M7q_Bv8ehS`&K@5#FNWZiqR?mb!e
zo~(OMW}+uE(UT?a$xQTRCi?ZJe6-}FEgv2E;J)-_rTa2j{YFDRq@GM&UzWe$uodJD
zGIf0+hJMR_NGmdDeVMbq%u-)wsV}qCms#q|EcJy7`a%VLS@*uoM89JnN+)C{`oM;7
z_G<!4R0A#%eQ+n}qb?r}`Dn>UTRuASA=TSbvMoj0@>|>fCQY}cY3>VsND=$?j{Me@
zR=QH8D~)xfdRL0{q%S=w(vwztQlu}v?n{xrRPRf7`%<#c@2l6iLY-M2ebnWnB_AF6
zkdn;u`t^pCY}pSfDKk-*nP9wr`e;iLUXDJa_Txh;$h_2LUg|O-4Bd|psUY)G???w_
zUKq6>AJSM?UfY$DGB5R>6p?wU_vB$;1|}0ymkDVwLDNS=KAQ4jf9uFcPd>PnhRjk!
zW~ot^dUdHMlhu&PYSg7(Q;Nt;G-M17S=$ElR-*N~e6;1GD<534DRbRyNJ-v6^dTjg
z_t1w_;0@GoHl>2Rw%L@DO)1%wl1*Du-XL_{6uNH8GB#xyn?lgdmQ;{2G-WlLvW(5P
zRFKtd3VAnW8JitxT2`|u#NO;ky^hqAC2k7ow`3+-vK%d0fR;>1t0BM1OtfSsS~3$Y
znTb}zmh8y~kFO<zZpmm{LbR=xRF|FFk`c9JC@mREODM7>V`#}3TGIKJbiO5>ZwW=V
zWQ(?hB3oVioAgBpvn7Pt>PefjeOp4NZ5eyJX1{T-+p_#^8FX7l+m?Z~Wu@CqsV58G
zmhrV^tF>h)ZCQ@C45ckYY0Gl7WkhWmQCrrbE%ezI`fSTqYs>iBGO%`6O3F~$!dOJ4
z>Wid9jJ75p@{mtM`rru>NvAI&PG3ZvzQ{K8Aw^_WIx-ZIZRn#TA5v0A)R7U1Y||In
zrZ2KhUt}Bluoa|f*@hyV&__c)xI$NUxQHY4ArFO!x<W)EfY66jmobPqLLX8=Hc(ea
z)Rmnh!U=uI`-I#?IO&UU((g)lyVBjR?WZh9SC&I$6#D3}aGeycYsJLrj1bwVsUOrw
z^kqMApqb!8a%Lxl-9xA9tD)XOxxyF3b11JS;zr{xp39=+&Y5SCr4q)(-3h9r1KP+h
znRy?+pf{61{+|(m^*`ZS^?>$n%wkU>KYN6EF>puG#+yu8CF;SUQ{B#>u*fREGzkrb
z4H;LyD7I$ei1est{B_~o19}i764oCl3o4b;b!p2Oaf8T6iqIVOg2o5G>}y{OO4A7|
zjQJW~oF|kl+1O>OzGgHe>SB(B+7cwxM%}$1LA=l@*qGL3p|7{M<M9)h_NcrZ+g-~o
z2@+Lvz#iB9RHcKa^L5VOj{L^Zn!?9%dQBAgqV>~{|3{wB785-^$|wFKZ?auGtZiE^
zHp43~YN^vq?MyA5pXOsk|FT#<_1FI=%LJy*Xocp0m$+t(AE<d~nUZvl!0$LgBE{`z
z-Q-){db{2Fd2aH})@R-1{|-*OpZAHk{W$Q6|9xk2b#~gkeAB%hT~9{SZ(5^oTd!|#
zuD=}&hNJF9$tRv9LNKL5GBBLIPk6;YHK3ArJnM%lYBJ5Y!x=j{VS&HQx1`njS74K{
z9-+9fKh3%RQ9b1A5V@alktZP<r)0@TzL+(v?GlIep3WWS{N_6czVo&J2LJh=gTUJP
zj351>|NIkv^QBy4R2L`L&&oH}usO#NJ<ztf$GVb#EXzICwE4&AH{4+QJ(q{9mL(&@
z)Io}`<4mtZj^tZTofFB*1fv{JG0!nivScpf9A+rYIk+3V5-5i`u49txjXXUV%3%O}
z<)~u_oIQ@O9(9bOj$za>j(U$5ulKVf2QWe9$N}oyFOD48Eqy(&nlz|}(fWnW5OZYH
z*2cg4-ewv0mR}+AbXY$<rQ2Blba-;oX`c><r$f5mbWS_HcC8phFuXp!&~uVM_>&Jq
z2qQ@U=zzkZ!+X73gxhjv$l+9Cwa>&MewZa4=k{Pse(D!k3kvY<S@z;7vPJlTNh*wt
zV}bAS#%9f?)XY*9XUIGbg3sYuDM|kei|vNQ3ubeHbsWu-qFdKdwrRodk)0;vopufa
zFbV6}<76s=5oOH}wod~6#g&r1Zxdr1_R|q#uWPib#sWP>vejk}oDE;xO@fuTwix{U
zUbbU|c#DIs2jCDxSr&4+GxA#?2|uJ-Zd6-|4Mhx<iTXf7U|bPbFKSY-;Mmy$-cCE`
zmBUunmA8OfXy9eAkVSv{nSlL^LFIvgtORtPrpMQ5u-Hhhn_~E^b@n|QUS!@3)`D{k
z!PncSAl*XNT)V2ip9;z#iJ}<&BF}@wu4oY>uF-g$m!FN#aIl|cT)HO~M8yje=*SCW
z$&bq5gBw7r9EI$NF0gN_Jm)DRqM|AGmSRXe_f^o<hBH8~5rr@g<Y18qiX)L-Dp-p)
zt2eBNVRRR7!ktCByJvU`6q)4I4Gi98y0E?nky#7v8utw8VK?v!a9g3^$yUIlbYs&Q
z4yJV_MvC~wo?V17Aem>*3Q!mj=-l_5hlq8Cv2dkckh>X1hT#W!t35s6>|qLbA$DHk
z6kdfmg`c%0`Cj7-K9{mgY9Tx*qpqc3gqbX#%TtV}%AzJr3KiE5SdqnJr6c~X=&T0f
z<zyJ_|Keb-FxzbTkSCxzSqkeN#J)=a2xH{TxmYE)p@r^r?c%pYT*KY|%`pGPhWW8&
z$EMCWrg+(830!1NpXQG=iQa59WYO%@#NIkX<h0RD*-F@g&&K&y%0eLBhM;$kg0&#E
zWmEiIJP=m2Gz|Sl#HE9yth4r*qTd(H3#<><k~`xpwTGQ?{^u*3=aP#3^gsXa{c5W{
z$)musR}7JMh4~3H-7XNe;2F=jyVv3eyfqHe>>jTC4p$U#4o`1F@$%%eN$os86fXp1
z77JCqTBEB~fr#msdP8xB7g;NwOKTqYb;P-nBrYpX9HJ_*UpF~>V{<-X&je2&j~RAc
z;e^clZdotT6_@4Rf>@uYH)(;49RT#<LQvgov(0s3{T_2x2QF(jliiR!5q6jk`HEe=
z#G^p-{-f5D2V*_ScE$A7$tI06wAk{t40#ri;DlKsZ#E8y>e`puEl|mQY6o!~F4&5H
zo&CtBMp@8|!Mlu_1^mk5I0`%~#)1LuPWIZkaDLwca#_&Hk&YD&T9EGX?TNa5`5Y$h
z!$oES{+n9ivpK`d6;5awHNxCJM%`f0@NPFnB*Vg?TLA6Fi;^NMKwt}ZID>BW*#W}g
zU0Ppzo9PF_5W|xN+!_lkqt};wDq=VsZw~O*#O^MZ=PWTv98Q2-KD?WEZZ}(SPVNl5
zI57j?AsRsl0`1X0`qYLQiZnroz^B+i#1W};n~5orL;!iuj|FcGK$*i2a4~ndb`@f{
z0UYlubb$_FfUkfpF<R$&N^BI@>~W%jSw4=86u3y=)4OQjt}E&iZbtMY4E^aEBII;O
zf(m`mSU&)N2{mSEbZ0E&D?0c3mB?<TPBM!|P?<|BB^i`l1b|&+5=@q125T+!CA)PM
z+JtD*Zegq2?O{2N+aNZ;z<Q-QZH_=3BH)K1w_OOM7ZoHzptlA~ZOdAkDX;}WbUhFQ
zs2VY<f)GeO(`yS&?5OzZP4$SdIn)gTttD}4m^U_@sh--g)EV|GwK=8oW9f*+0jZef
z#o$vu4dC7%>~mJPH0&H`eW4|J19&S{tpJAEin*yvR+m8&zzxy`joLV9FfJ4Bd`WBw
zPyV$jx%DFtTTzs?0|nVgYC!I(UD%apY|;z%8KiT9LyR3hvEY{jKaxjng^)rnBTzR&
zSoECJJi+}IwBgyrn0yD2)NJv`4)u(*Z4!FkpZV*wMgF&`A-$b;dBLpAyCfMIUQ2QD
zT;gONc7@PgTF952kd9Hp&%!8$5Hi)!e`O_3u^sc9p@Zy%+5%DFR<^)E=(N!mPSX$h
z`d1S4%K6;vjle<&Z^e6)rl8x*MG8b7ioT#cU)Gh(pr<5r&S)j?yd<9(2%@Z1EnK}(
z&sE5%m*a{61Pz8aHx3}0MV^Bdbqu0J+7%Yo@)ta@kROHicAd!u5)0r=CtDDCeAT38
zV+1iH>zU?^$4Nm}DDgFxf{#cHuDU+5gzDUV>Acb+R=Qp31roy(J5XpO!bv!Mo{uXD
z<MSj(<1!@nE}So3XbIy57(7(vG@fQHUus-;A_+Bd@>dQM|G;3bmb;B!vpoeoP&>{q
z*Rlnl4D4#Sb{XFU;P3}!vy>5AqVv>=G>E_&jHf3T)Ewk&HACv&Sas^^<fvY2)znEu
z??F_?%=pLZpPLQ!toFR2y8W8!w(<Wn{r>UuURx5LSv}NeHSz6Qui5`On)qh_xYhiu
zCjRfM1pACqKJU_jQvU7u>g=1zX}i-LPg|qO?A5oo!`}~v(@A6U?davOtdwuDZ&@KL
zS@%hmd}(AQUHpxe$1*ZMvIp%ysy-bG>L=9cEw-4JwCT079{t}(WByAJU%K^Qs<$O_
zPeOPPZvi9DWV^uWf~&|8Nj!+QDC4?}h&j$ehrE!$LlcblToTjp6FQ_J)oWPHfP(BA
zgaMWtWc&%4AJ~!sgN>9%rv~1oHIezVip&^;0usOmGLh|@XnG`Gk1ySF0c9qr)fTy@
zlT;Fs2WBmTm$%(%PEulTiQXy~;Hbkctt%Y$IZZ2T-8lT4`%_gTxbBnK$8z30TmlS%
zcwubcnhUJiR1%j38nPr+M$EB%7x(ehX1>@td;)tqacWFsGsnnj$uYxx6eFk%ec~bR
z{JrEj2<bcWYw+poa^>z_&=^hJ7O70!MH*eV_uWD+P_aiNLnzdCJRubbR+55cWgZ?4
z%cUo9X%sLc@g_@PizqIe#u{zgE0+@v%A@p(vIPr+2TI}8dIN-XzHO`OJbW;$G(%vB
z&EJgx5jxM7sXg}c^^j!i^qY*pb|2SFH;rgVZUj=x>M<q7C*wf`u5wJn);4URlwbzq
zukeg!2}X*G4k_#t3z97~qc1p382>`=zSolEJ=dm#@TzKH6M=9-dYk4hCqsTOG(BNx
zN9W#Ho_&s=T-^CN!iF6yN7ddLv*TTpja+VVkEq<NJ&ymj5j<Nj3k9WX6mZTZEs8A#
zy1=E{j#4rXIXUt%dD>uID`ramB%!rDVHYd$vTU=E@qmmJaDbuHFbSN6+xRlSl1&<u
zAA~j<CnIhlS8$(Y;U&~@yIT)t!G-j*DpXq=!PERm?oPRW%Q7J=sW06|;Vv#i6Z58W
z!*P0$T@EpAb+3+zNvOS;Z#)h^<xO0S0<y}GEMf|c;fg$`-Q_zPKn_F6WkKt9l12!4
zkRWao+jRG~XoXW1qFt#e%4z6d77#!k8-?aPOPeGQ<2?K@yO}e$&YAVSiF0nvS4hcV
z4CM|_B*#knxZbQc6?;EKDg%UIWg~R2V^I}pxI-Ji&lvIHV!==$M0Y~04}gt2$l>;A
zM8f{5g3ps`5J$wEsu1$SEk`ybhY0kgci<XdN(41TbVf*0z;2I~<9IQfuAJ_S>j3^t
zk9)#<9ht+0nLjR2TFeXBCw5RGoA=u3E~^~TK$YVyF6hYd5ou9o#aXPUls7rTiM#|_
zwx@UQGF&8KR9t02cR>muckQw_IY0i$sVo;i`|4%Jpc9vY8W?LXY4-w|#gM^s;W=($
z4^fFo&NiuL<&oQhbmlCeK}rzv9b{3E#j_;mz);3`HqBdZG<i($Vk`3aT@_#yAkY=C
zEr@fMdEpN3Z4_=~IM|EB?O~?kJbYOR378iOurZx3yBjk+e{anpdv#|SaIs6S&v}D3
zdwE&rvDiOZPkEgRP(6H3pGN8x6xI^x!z628Gfktl0WH~Zel{~e51xu?qm-dWPB4yP
z^Yd&~5~1wG4{0D@1t?PZqKUNWOi?5u0Qmudb9*NmI$=ppAS*mrWIi5N5nPSP&E}8i
zgUiiNRGdD^$Cq39NcPRGb7>+>RYxv+Q;q@U#oJ6RWjVJ?=_X(f`{HX{Ps@vhj(&mI
zuO;gao&OT#<;1KdTF-*a+b4UddC}kyeX^qZmz+TZfH;pD8Zje9c9dy?3~$#Py$B7R
zUNH>vF0&0)j6BW~J`AG(ux-OM0jv!5bsW>L2qi`83pKq%es#K9d5f><y5Nx*aYYX{
zVaPG)1A4)p<_8p|cSwzL25cScl>ixo9`D5|ldc{!Yz7_)^?~mpGh+~PXA$<Z(F?@B
zgYRt=L8}`M?z&ONt1ciwe*}~N6F~e$80_9BVENw#fBmoM7UJU`m^w*@xDBHv93(~J
zJhh43F3m$+{j3_J-{{mDou83jz1BQ#_CBY^`1yCj&*V+d-*%v(IC=YWIG(hA|Elw<
z@y*NI@7_*sUfsTKpR|X+fBXBlx7Q^NMYDwqcEM}#NhsaNoi1r99^XOP-2CV=`;V%$
zKp4dnN{c2tpdTtNie7*pasKN(0hZfx4tu{K6?(n*%T*F|%de)3Wz{n;w$VI(0qCti
zLQA=??l!xxVe<W|-un_l##gN`5mo<Hz40Y6>ohT6HEUl&6Z#cRGd%+X_^Q_Xa;f)U
z)!Ich#>Bu;iuub3{!)g|JaARX6o$k_oOFk?cJuY<T~JpI)l@CjRvpz<J=Ld)rFn&2
zz1~#y7Ho9&j;eQQ9%yc9v(xO-<Ra@@qlKI`jSg(Tjh<@sp@nTSPXbQ?KLDFyp7TwL
zd?`M75#YkXJit!CV1%16yJ6<VtPF$2pC<ma@TZMG9sKFyPY?0N{*Ff#+tfSng4Y3n
zVfiK#G4v^*9&x!4(TvYg6^SQ5gpiPA9-CDifZ%ZD;p{mi6~LD9c{bH+^n0I&2d^DB
zJIC$MYFqz(>Etu4E$`t0Yiqo@YTa~Ro;Tl4U%j23yy*^3+KuMh^Yinw^UKNgWtp`#
zOCaG-s$L7jEOE3?5b8fF6RSV%$lUBbp?J-Bm{q_2Y@P&P;RBx3?pB+>j3-I<LE0K7
z=JPPo>le%P&K$wUL&FGzl8*Ex7pE<o2nx)r%U8cAsfuqbPXBOf&B>5|d<Z#=y!MU$
z@Znuhst>2v9gXo_@ap>FHC`|O;<Aw9{1h@9zBO9%^GvUg75b;L{z2A=ErMY#!n8S)
zT=Vf0;&m0coA5gZ$WR}1(8^R)El9md;v?U-@c7g->RG>CKk9dC&l%f5>{)?$pb(dY
zaJ$UBHb@Q@)E@C#$nJ+yjqVqb=FGuPF@;~OxlOK3%TUs~#l{>UF@297YxEc%OYqla
zrHK^Z3Zg3AwQY5H2d29!WXqkAJA>#Wd``@3b9~1;0DIuQaI{5Iyf_Eh1%jkZkSG9Y
zBt(82y^Av(ZFw2PQCaAliXghNvmN`*(xkc}2>cCn39MDlkro9(i4_~7zQ%O{cf6t^
zMfxJQcUFtiY>L&HH8d6mk?$tbYg#s^!Bn(-G#B)ws-_%xb?rm1eWsK6xRCk|u|~)#
z0SE;Pg22EWY7lMd(grW5XLJjsTN*5d#53B@iz&rsSL)>TTfC=HRTst_-E#8C|A>FX
z4{}Wt@@#N@MSlV@i^D_C0fb#y1`<C<(?oAiH{GnNYr30+PAh$qBfznvA$G^e?GE*9
z8q!?K=uQjyLqGY7mgqmT6lyQw&=)QC>P+%{S@&TMIiUrhM;ra1o?*kz;R1Iv9H9MH
z?TG%g3+P`sd}936>$CT@=gXZ7&p*<ab``5}hm4y4iQhQkK&Df=ILGRln={$Z>=x-Q
z_zmsWM;h$N;Kl*xp}>V5Jv;M(=8N``bWu$f7n<mXX7R;D<B}e4f|OQh0i2*n=Tz_l
zmf^)7Z|v4T^2TmeonTsoJAM#sjg5|ckA30pO!R^tf7q*+fc>C7Nj&Eb;>liIhyETe
zS3IEFQxn<lD4Xi<Q<K=EF-PHc4woKc+cS3SMK?|i=WBgQ--8$5RKKaZoRcqJX%7kY
z_-&t>=FUf6*{|Y~p2)hfmV~AZv09=#_%_9*S<)J3v+hc~xYi^j7K?nU2i>Q8Ks%nq
z2NLvNre30DGv6Z7F3Ch|ge>l{iUDTyh5an1C3$g1oJq&~7YAvAdE+#neC6MKvT&vv
z_>r(4x7)7CX~z?kr3;P)z)1cLFBnmRF0~+ljveEqe+jsrAVp@HN9O1|q|}UaP&QUU
z7MjHme84;dOS$yk)20-lD@hVy&0Q!`VQb$QKF4B`T&2NtIP~c`tvIxe)_dY&A1rM)
zUWT6e#@lZ9&~GGQ)Q1$$NWA(ziG3O{JwVXGsUltBol|0LBugMn#2^FnMqUShdS9LE
z?M|R~pq5%9b>Uh|3pdm;-ggiya;YKHZos5nPMw8oB@%%_Lw#nImYjr`BV=zGo98Nb
zlM}^$A`*{4Qi)C%8uz{WK4j;2tMfyJ+>MqM-=0gcD;`ou><KK%8i1|NXDgMlFtm)j
z2dh;AsCu1P17reJx0UlhW(77QEsA%ij-(y{pxNjk_aKJkV$>~8dCqTQFXPOCP`X?g
z88@>`Et+|u?~sqS`U8Wrfm9?OBR9%qdDL?^Uoiv~LsMAXK*oD`WM`myf+a}BqmT&!
z-7NgdQV-kT#E&vZNn?P)2a&cIM%r63+Hx#*5sk#{39D<NtbPb9AaEm~Wmad+#%zGv
zi%kG20N_myC8S+`wqL{0VR0r6Bg#f;U;(>{y0;n|BHpB$2y?mRd1EX<c&CQ~=gRBJ
znY<(8oL>Vvo#2-h-?`}*0n}QGocU0LEMtM62BSwc8(ahfCWn+zKlX6*^+PMPN`jS*
z$BzFD>i570?m8zfp-oG7w|is<#l1d;-nZf$MKRrT_OrL0P9XekZB`Qw(y+M<kXaqp
z(T&J|VshbJE)p-3A<gYv;5hb`W}MJ$ogcJwmgTsn2^a<~*a;nycfd^ql0Usv%rLm+
z8)pm`a$t|sFfcjgR_rSzD*`T$47Lh#a|bWOplb$V9y^-Ihrqy{TwyCm#ZN!}Ia9^w
z+mGu1kMuv@e$}6T{0ngxYn2uT!tia1F~3}wG$eZxaA8!>cN#0<bNgt8-O<@7LRUJ7
zz4XrSL-@)SQ)eW4wX#^*Fd$*2#F_@L$p;VFXI_F7eP~IDEI@Wxs+G8~`cRw;_~V#g
zXywqYw_IAk*K9Yj7a!(hX;vmcdLRL36snc1E-NitIcu4ACmm`;f|=ikY-YJQIp%{&
z#u9H`J2F9ek^J&tr`3W=+U!A@C#YT|ImbzGXbRQQ+J&pt(H2R3|AIm0(pPMXT<XEt
z3@=DGV^<m31b|piH{%_WkTAe8yiD>#<bkbaf<CvUX&eCsv(CbIQN%?*iO7BA<4sw5
zxpPT__2Vu{k0b%Pfxo4-*}}>xS%kueINN{no~5^xdmP`z_ysr}Sj-gm&RL8?A9$fJ
zy$o78e|cz=)N8EV1!MDC;;J~uXlsk)&bU*Ee)H%71EV_Og*m4m-XCt5;^Gy8+i0Gz
zqU>5Ksb1u8&ellWYZw<E5oaLkdoT3GM4YcbyWCjPIlS{d7C+Q8Yl3{d0{lMXF?(kE
zgC!d_`gOXGpMOmw&CKy0JHj|CbCEg^iSfZfJqwb#Oa5+;K}cV6q{=m4;z7jC{eV^v
z97?Rj@-%gVEyw}}A$#6hEr0|V<l75juek}DCbZx?1i6RJjggfjA8@qlt@R%eV=kSl
z;&LCl*pkD3x)Xs<S1-8tl3Z=Ufiy6a;z5y$ygQ4&PZP_OWI2_kkZewAi$~#Z!^42t
z3emRJqJ-#dT)j^Sh_829I7XO&LWX`immJ=-3^-?)N5m<+T3G)c8ZUm4PH(;UHvhQ|
zs5u}xwinwo){zk$tl?uvNBTJ5$WR_JheYENK~79%Z_|DW-YPpQFz)Nth<w5o5NFJs
zVl5k;owtj7c?_g|PFqBMme<V<-DKf^2(N9t#46fFFEpjM6SKb{*+i6vZj5lCqQQaS
zJeF=CEJ}_KzR}RivaAPHq7#_?eZ<$k!eF_E1%~~=N4{fkE@B8*ri70wnqud0hslNK
zLa^sJAEF&<&E^bya`=2(V}CKpM;DYx1lA4sz<!iIZqjWiQA~Wr6XYABW?rqW1uwS_
zo=o!aZb@-WiUYjFJB;n5()k<jTvQbC7QVZ=2r|xQ<<2HKDt3`M+GVOHEYn6Wi`qM)
z(PWb!o<;rz<y+<i&Bu<-tYwp9cs6Sb(rF2DHy}pWRyY$MB~I!yR!&C_XIFVGzm0tu
z2E>9+jDe#C{hv-lz8}baxI7hl9kO@~kF~8J4H)as0NEP(O_G3Eih6cU<#8p9=HO%(
z>Edp$F2%=U>KnR-J-2(0uS3HPN$e*G=#^n|r|l-eP_82{b6yI4J3hX2?Gl+P&t`y%
z(`ZF(d+82P<~n=?dyc~uJ#@1hUh#^`(~HY<fJ-wCi`P=_lgPwk8FlRW%vTmr;Z(!{
zlh5pw<aXl3h7p5kb7$$mtn&qnbwqH;MHB;GA9Gug3sdHf$Zn-p_bcK*QYvKvZx%>e
zn7X+a?&yViDl=)LyfQzl2_CRZcH{4J&m`pIfc*JXmm<~&WWjL7$oYld0aFQF!cN|e
zRaM-wv`G&xa2$idJ`j=7be4Nf_i@kdtkT`Zg^)q?MVL37N#I(U`2%GD_fA>kyc?yP
z6*xlKQ1&$N{*+2#w_U2VF@&Y%q)iK^Z2+Z~#bzF`(!ujQ9x<PwMdr0^kp*ZMMaG6=
z2g*ePW|;ex!dek8SVnZ+$b!F%p}5X@4WE|4>@AOXLdZET+31=I0>zS@)R>cS#_M!v
z7efaDV$8Dea3qMcj^&Zb`J(Ru@fjKt>-j0H(Sl~+onBnrwW5#P&UGAXL|A6gO|jh1
zaeHve`7=z3W*6@A;agBAU=Cxsm&j#@sC6Cb-G=o5sOEN-93EB;_rQXA=$8UoQJVdN
zX~<z3;i@hAHaS0a7UBt=BQrq~U=!z=I~))2`#92n;{Is~&}2!=1^&kA8fIL_A6+%#
zcTBrby=9?!S#S6j@BD(%F`H^?B3HbTj<YRj(@jMTLNB6po>vstU!ynDBf3RZ)a&ZB
zsu+b&of_zV>|zVLiJB3CP3ca+u>YITy*oRw^WNEcDfEb0mkYcNOX2KQmlu>e^mGZn
z;@RbbxNf4Yb>Zg490N9hX404KzJfr@1i%q&!jqS*1_=%}h>$d<4=VTU`~pP~iB>a;
zdxqhTl|Xw~d64zE0q0@#`M#2o<8$#>wSLr`=L-(>#`@xlYRDbg3m0iEbZK>M3<1I0
zF_Q-!9t0zfITpefm7I!-#s-!_x9sFDaiuZKIu+!av|`bCa(V)kGtei7gJg==A=IW9
zk{yh9=NTBw<@Jqd*gxj)mOHRzY2@oJjeI>%BhSyoGp=L+=4!cDTeIZV)+l+kHHuzs
zjZ$KJ`kMEn;S$^PzobVE*P}+>qekANMyW@Q5`s^uNA*&V>ct+_%RQ<WdQ{JPRL^?k
zJOWYB^{Afrs9x$((U*|E6nj*5D6AEGR4ezWR_IYJ>rpN1QO)(JmiMTZ_o!CtQLWgc
z|F4<z*+CG7;qV)orU8dXk&qB~K!vE>d<Wk@w%w2wmTU(P-d57E-+@0P`}iPCL@Vte
z5miefwFsB;n~26fk&Gk}%t$0GNTeKdi9pej5YmyL=txj>B!qM%RBZg06Vl_mp{rkL
zNs8t0>7%TK9VSsTcXWEad5_p{33(jCy@P{W#nBN7LaExZ6(2E!P@?t{Q-o3`HIhJ$
zmZ?^#QP(!Dd6dI2S-jpS+l}^&PupYN^|#B|DAcGb_KKopo!%BUS|wJ%k>*L{GcU%u
z{4acDqV&-L*|rz4DE>~NC_jGYC=z*A#x|*seZNU%B1bGE>O|^eg>g)wXl5m%*CDd@
dFti&=S>zL-8<`*)*g=%G`T%y4ljH*r0RXkMhs*!~

literal 0
HcmV?d00001

diff --git a/tests/python/test_warc.py b/tests/python/test_warc.py
index e69d3852..bd70da91 100644
--- a/tests/python/test_warc.py
+++ b/tests/python/test_warc.py
@@ -149,8 +149,7 @@ def _run_pipeline_with_skip_linearization(self) -> Dict[str, List[dict]]:
             skip_no_pre_taggers=False,
             skip_no_post_taggers=False,
             store_html_in_metadata=False,
-            linearizer_name="resiliparse",
-            skip_linearization=True,
+            linearizer_name="no-op",
             pre_taggers=["cc_re"],
             post_taggers=["lingua_1e2"],
         )

From 80e3c96fd9bd5f06251156dfcc18443cb08623c7 Mon Sep 17 00:00:00 2001
From: David Graham <david.l.graham1@gmail.com>
Date: Fri, 14 Feb 2025 14:29:55 -0800
Subject: [PATCH 5/5] .

---
 tests/config/alt-path-mixer.json | 34 ++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 tests/config/alt-path-mixer.json

diff --git a/tests/config/alt-path-mixer.json b/tests/config/alt-path-mixer.json
new file mode 100644
index 00000000..cdcbe596
--- /dev/null
+++ b/tests/config/alt-path-mixer.json
@@ -0,0 +1,34 @@
+{
+  "streams": [
+    {
+      "name": "mixer-test",
+      "documents": [
+        "tests/data/provided/alternative_term/*.gz"
+      ],
+      "document_dir":"alternative_term",
+      "output": {
+        "path": "tests/work/output/mixer",
+        "max_size_in_bytes": 100000
+      },
+      "attributes": [
+        "pii",
+        "toxicity"
+      ],
+      "filter": {
+        "include": [
+          "$.metadata[?(@.length < 10000)]"
+        ],
+        "exclude": [
+          "$.metadata[?(@.length < 500)]",
+          "$.attributes[?(@.pii.too_much_pii == true)]",
+          "$.attributes[?(@.toxicity > 0.8)]"
+        ]
+      }
+    }
+  ],
+  "work_dir": {
+    "input": "tests/work/temp/mixer/input",
+    "output": "tests/work/temp/mixer/output"
+  },
+  "processes": 1
+}