diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ae9aa1008b..e5d4695e33 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -7,8 +7,6 @@ on:
     tags:
       - 'v*'
   pull_request:
-    branches:
-      - master
 
 env:
   REGISTRY: ghcr.io
diff --git a/metagraph/integration_tests/base.py b/metagraph/integration_tests/base.py
index 015b7dcf41..36b0b532e2 100644
--- a/metagraph/integration_tests/base.py
+++ b/metagraph/integration_tests/base.py
@@ -37,10 +37,19 @@ def setUpClass(cls):
     def _get_stats(graph_path):
         stats_command = METAGRAPH + ' stats ' + graph_path + ' --mmap'
         res = subprocess.run(stats_command.split(), stdout=PIPE, stderr=PIPE)
-        assert(res.returncode == 0)
+        if res.returncode != 0:
+            raise AssertionError(f"Command '{stats_command}' failed with return code {res.returncode} and error: {res.stderr.decode()}")
         stats_command = METAGRAPH + ' stats ' + graph_path + MMAP_FLAG
         res = subprocess.run(stats_command.split(), stdout=PIPE, stderr=PIPE)
-        return res
+        parsed = dict()
+        parsed['returncode'] = res.returncode
+        res = res.stdout.decode().split('\n')[2:]
+        for line in res:
+            if ': ' in line:
+                x, y = map(str.strip, line.split(':', 1))
+                assert(x not in parsed or parsed[x] == y)
+                parsed[x] = y
+        return parsed
 
     @staticmethod
     def _build_graph(input, output, k, repr, mode='basic', extra_params=''):
diff --git a/metagraph/integration_tests/test_align.py b/metagraph/integration_tests/test_align.py
index 2e3bcb0d83..f4b4c79fa6 100644
--- a/metagraph/integration_tests/test_align.py
+++ b/metagraph/integration_tests/test_align.py
@@ -35,11 +35,10 @@ def test_simple_align_all_graphs(self, representation):
                           k=11, repr=representation,
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16438', params_str[1])
-        self.assertEqual('mode: basic', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16438', params['nodes (k)'])
+        self.assertEqual('basic', params['mode'])
 
         stats_command = '{exe} align --align-only-forwards -i {graph} --align-min-exact-match 0.0 {reads}'.format(
             exe=METAGRAPH,
@@ -68,11 +67,10 @@ def test_simple_align_map_all_graphs(self, representation):
                           k=11, repr=representation,
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16438', params_str[1])
-        self.assertEqual('mode: basic', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16438', params['nodes (k)'])
+        self.assertEqual('basic', params['mode'])
 
         stats_command = '{exe} align -i {graph} --map --count-kmers {reads}'.format(
             exe=METAGRAPH,
@@ -99,11 +97,10 @@ def test_simple_align_map_all_graphs_subk(self, representation):
                           k=11, repr=representation,
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16438', params_str[1])
-        self.assertEqual('mode: basic', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16438', params['nodes (k)'])
+        self.assertEqual('basic', params['mode'])
 
         stats_command = '{exe} align -i {graph} --map --count-kmers --align-length 10 {reads}'.format(
             exe=METAGRAPH,
@@ -134,11 +131,10 @@ def test_simple_align_map_canonical_all_graphs(self, representation):
                           k=11, repr=representation, mode='canonical',
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 32782', params_str[1])
-        self.assertEqual('mode: canonical', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('32782', params['nodes (k)'])
+        self.assertEqual('canonical', params['mode'])
 
         stats_command = '{exe} align -i {graph} --map --count-kmers {reads}'.format(
             exe=METAGRAPH,
@@ -165,11 +161,10 @@ def test_simple_align_json_all_graphs(self, representation):
                           k=11, repr=representation,
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16438', params_str[1])
-        self.assertEqual('mode: basic', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16438', params['nodes (k)'])
+        self.assertEqual('basic', params['mode'])
 
         stats_command = '{exe} align --align-only-forwards -i {graph} --align-min-exact-match 0.0 {reads}'.format(
             exe=METAGRAPH,
@@ -189,11 +184,10 @@ def test_simple_align_fwd_rev_comp_all_graphs(self, representation):
                           k=11, repr=representation,
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16438', params_str[1])
-        self.assertEqual('mode: basic', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16438', params['nodes (k)'])
+        self.assertEqual('basic', params['mode'])
 
         stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 {reads}'.format(
             exe=METAGRAPH,
@@ -222,11 +216,10 @@ def test_simple_align_canonical_all_graphs(self, representation):
                           k=11, repr=representation, mode='canonical',
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 32782', params_str[1])
-        self.assertEqual('mode: canonical', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('32782', params['nodes (k)'])
+        self.assertEqual('canonical', params['mode'])
 
         stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 {reads}'.format(
             exe=METAGRAPH,
@@ -256,11 +249,10 @@ def test_simple_align_canonical_subk_succinct(self, representation):
                           k=11, repr=representation, mode='canonical',
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 32782', params_str[1])
-        self.assertEqual('mode: canonical', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('32782', params['nodes (k)'])
+        self.assertEqual('canonical', params['mode'])
 
         stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 --align-min-seed-length 10 {reads}'.format(
             exe=METAGRAPH,
@@ -286,11 +278,10 @@ def test_simple_align_primary_all_graphs(self, representation):
                           k=11, repr=representation, mode='primary',
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT.primary' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16391', params_str[1])
-        self.assertEqual('mode: primary', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT.primary' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16391', params['nodes (k)'])
+        self.assertEqual('primary', params['mode'])
 
         stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 {reads}'.format(
             exe=METAGRAPH,
@@ -320,11 +311,10 @@ def test_simple_align_primary_subk_succinct(self, representation):
                           k=11, repr=representation, mode='primary',
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT.primary' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16391', params_str[1])
-        self.assertEqual('mode: primary', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT.primary' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16391', params['nodes (k)'])
+        self.assertEqual('primary', params['mode'])
 
         stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 --align-min-seed-length 10 {reads}'.format(
             exe=METAGRAPH,
@@ -349,11 +339,10 @@ def test_simple_align_fwd_rev_comp_json_all_graphs(self, representation):
                           output=self.tempdir.name + '/genome.MT',
                           k=11, repr=representation)
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16461', params_str[1])
-        self.assertEqual('mode: basic', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16461', params['nodes (k)'])
+        self.assertEqual('basic', params['mode'])
 
         stats_command = '{exe} align --json -i {graph} --align-min-exact-match 0.0 {reads}'.format(
             exe=METAGRAPH,
@@ -375,11 +364,10 @@ def test_simple_align_edit_distance_all_graphs(self, representation):
                           output=self.tempdir.name + '/genome.MT',
                           k=11, repr=representation)
 
-        res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
-        params_str = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', params_str[0])
-        self.assertEqual('nodes (k): 16461', params_str[1])
-        self.assertEqual('mode: basic', params_str[2])
+        params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
+        self.assertEqual('11', params['k'])
+        self.assertEqual('16461', params['nodes (k)'])
+        self.assertEqual('basic', params['mode'])
 
         stats_command = '{exe} align --json --align-edit-distance -i {graph} --align-min-exact-match 0.0 {reads}'.format(
             exe=METAGRAPH,
diff --git a/metagraph/integration_tests/test_annotate.py b/metagraph/integration_tests/test_annotate.py
index 38b5f90ba4..e7c7707333 100644
--- a/metagraph/integration_tests/test_annotate.py
+++ b/metagraph/integration_tests/test_annotate.py
@@ -44,12 +44,11 @@ def test_simple_all_graphs(self, graph_repr):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 46960', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('20', stats_graph['k'])
+        self.assertEqual('46960', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
         for anno_repr in ['row', 'column']:
             # build annotation
@@ -63,13 +62,15 @@ def test_simple_all_graphs(self, graph_repr):
             self.assertEqual(res.returncode, 0)
 
             # check annotation
-            res = self._get_stats(f'-a {self.tempdir.name}/annotation{anno_file_extension[anno_repr]}')
-            self.assertEqual(res.returncode, 0)
-            out = res.stdout.decode().split('\n')[2:]
-            self.assertEqual('labels:  100', out[0])
-            self.assertEqual('objects: 46960', out[1])
-            self.assertEqual('density: 0.0185072', out[2])
-            self.assertEqual('representation: ' + anno_repr, out[3])
+            stats_annotation = self._get_stats('-a ' + self.tempdir.name + '/annotation' + anno_file_extension[anno_repr])
+            self.assertEqual(stats_annotation['returncode'], 0)
+            self.assertEqual('100', stats_annotation['labels'])
+            self.assertEqual(stats_graph['max index (k)'], stats_annotation['objects'])
+            self.assertAlmostEqual(
+                0.0185072 * (int(stats_graph['nodes (k)']) / int(stats_graph['max index (k)'])),
+                float(stats_annotation['density']),
+                places=6)
+            self.assertEqual(anno_repr, stats_annotation['representation'])
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand(['succinct', 'bitmap', 'hash'])  # , 'hashstr']:
@@ -88,12 +89,11 @@ def test_simple_all_graphs_canonical(self, graph_repr):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 91584', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('20', stats_graph['k'])
+        self.assertEqual('91584', stats_graph['nodes (k)'])
+        self.assertEqual('canonical', stats_graph['mode'])
 
         for anno_repr in ['row', 'column']:
             # build annotation
@@ -106,13 +106,15 @@ def test_simple_all_graphs_canonical(self, graph_repr):
             self.assertEqual(res.returncode, 0)
 
             # check annotation
-            res = self._get_stats(f'-a {self.tempdir.name}/annotation{anno_file_extension[anno_repr]}')
-            self.assertEqual(res.returncode, 0)
-            out = res.stdout.decode().split('\n')[2:]
-            self.assertEqual('labels:  100', out[0])
-            self.assertEqual('objects: 91584', out[1])
-            self.assertEqual('density: 0.00948888', out[2])
-            self.assertEqual('representation: ' + anno_repr, out[3])
+            stats_annotation = self._get_stats('-a ' + self.tempdir.name + '/annotation' + anno_file_extension[anno_repr])
+            self.assertEqual(stats_annotation['returncode'], 0)
+            self.assertEqual('100', stats_annotation['labels'])
+            self.assertEqual(stats_graph['max index (k)'], stats_annotation['objects'])
+            self.assertAlmostEqual(
+                0.00948888 * (int(stats_graph['nodes (k)']) / int(stats_graph['max index (k)'])),
+                float(stats_annotation['density']),
+                places=6)
+            self.assertEqual(anno_repr, stats_annotation['representation'])
 
     @parameterized.expand(GRAPH_TYPES)
     def test_simple_all_graphs_from_kmc(self, graph_repr):
@@ -128,12 +130,11 @@ def test_simple_all_graphs_from_kmc(self, graph_repr):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 469983', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('469983', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
         for anno_repr in ['row', 'column']:
             # build annotation
@@ -146,13 +147,15 @@ def test_simple_all_graphs_from_kmc(self, graph_repr):
             self.assertEqual(res.returncode, 0)
 
             # check annotation
-            res = self._get_stats(f'-a {self.tempdir.name}/annotation{anno_file_extension[anno_repr]}')
-            self.assertEqual(res.returncode, 0)
-            out = res.stdout.decode().split('\n')[2:]
-            self.assertEqual('labels:  1', out[0])
-            self.assertEqual('objects: 469983', out[1])
-            self.assertEqual('density: 1', out[2])
-            self.assertEqual('representation: ' + anno_repr, out[3])
+            stats_annotation = self._get_stats('-a ' + self.tempdir.name + '/annotation' + anno_file_extension[anno_repr])
+            self.assertEqual(stats_annotation['returncode'], 0)
+            self.assertEqual('1', stats_annotation['labels'])
+            self.assertEqual(stats_graph['max index (k)'], stats_annotation['objects'])
+            self.assertAlmostEqual(
+                1 * (int(stats_graph['nodes (k)']) / int(stats_graph['max index (k)'])),
+                float(stats_annotation['density']),
+                places=6)
+            self.assertEqual(anno_repr, stats_annotation['representation'])
 
     @parameterized.expand(GRAPH_TYPES)
     def test_simple_all_graphs_from_kmc_both(self, graph_repr):
@@ -168,12 +171,11 @@ def test_simple_all_graphs_from_kmc_both(self, graph_repr):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('802920', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
         for anno_repr in ['row', 'column']:
             # build annotation
@@ -186,13 +188,15 @@ def test_simple_all_graphs_from_kmc_both(self, graph_repr):
             self.assertEqual(res.returncode, 0)
 
             # check annotation
-            res = self._get_stats(f'-a {self.tempdir.name}/annotation_single{anno_file_extension[anno_repr]}')
-            self.assertEqual(res.returncode, 0)
-            out = res.stdout.decode().split('\n')[2:]
-            self.assertEqual('labels:  1', out[0])
-            self.assertEqual('objects: 802920', out[1])
-            self.assertEqual('density: 0.585342', out[2])
-            self.assertEqual('representation: ' + anno_repr, out[3])
+            stats_annotation = self._get_stats('-a ' + self.tempdir.name + '/annotation_single' + anno_file_extension[anno_repr])
+            self.assertEqual(stats_annotation['returncode'], 0)
+            self.assertEqual('1', stats_annotation['labels'])
+            self.assertEqual(stats_graph['max index (k)'], stats_annotation['objects'])
+            self.assertAlmostEqual(
+                0.585342 * (int(stats_graph['nodes (k)']) / int(stats_graph['max index (k)'])),
+                float(stats_annotation['density']),
+                places=6)
+            self.assertEqual(anno_repr, stats_annotation['representation'])
 
             # both strands
             annotate_command = f'{METAGRAPH} annotate --anno-label LabelName -p {NUM_THREADS} \
@@ -204,13 +208,15 @@ def test_simple_all_graphs_from_kmc_both(self, graph_repr):
             self.assertEqual(res.returncode, 0)
 
             # check annotation
-            res = self._get_stats(f'-a {self.tempdir.name}/annotation_both{anno_file_extension[anno_repr]}')
-            self.assertEqual(res.returncode, 0)
-            out = res.stdout.decode().split('\n')[2:]
-            self.assertEqual('labels:  1', out[0])
-            self.assertEqual('objects: 802920', out[1])
-            self.assertEqual('density: 1', out[2])
-            self.assertEqual('representation: ' + anno_repr, out[3])
+            stats_annotation = self._get_stats('-a ' + self.tempdir.name + '/annotation_both' + anno_file_extension[anno_repr])
+            self.assertEqual(stats_annotation['returncode'], 0)
+            self.assertEqual('1', stats_annotation['labels'])
+            self.assertEqual(stats_graph['max index (k)'], stats_annotation['objects'])
+            self.assertAlmostEqual(
+                1 * (int(stats_graph['nodes (k)']) / int(stats_graph['max index (k)'])),
+                float(stats_annotation['density']),
+                places=6)
+            self.assertEqual(anno_repr, stats_annotation['representation'])
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand(['succinct', 'bitmap', 'hash'])  # , 'hashstr']:
@@ -228,12 +234,11 @@ def test_simple_all_graphs_from_kmc_both_canonical(self, graph_repr):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('802920', stats_graph['nodes (k)'])
+        self.assertEqual('canonical', stats_graph['mode'])
 
         for anno_repr in ['row', 'column']:
             # build annotation
@@ -246,13 +251,15 @@ def test_simple_all_graphs_from_kmc_both_canonical(self, graph_repr):
             self.assertEqual(res.returncode, 0)
 
             # check annotation
-            res = self._get_stats(f'-a {self.tempdir.name}/annotation_single{anno_file_extension[anno_repr]}')
-            self.assertEqual(res.returncode, 0)
-            out = res.stdout.decode().split('\n')[2:]
-            self.assertEqual('labels:  1', out[0])
-            self.assertEqual('objects: 802920', out[1])
-            self.assertEqual('density: 0.5', out[2])
-            self.assertEqual('representation: ' + anno_repr, out[3])
+            stats_annotation = self._get_stats('-a ' + self.tempdir.name + '/annotation_single' + anno_file_extension[anno_repr])
+            self.assertEqual(stats_annotation['returncode'], 0)
+            self.assertEqual('1', stats_annotation['labels'])
+            self.assertEqual(stats_graph['max index (k)'], stats_annotation['objects'])
+            self.assertAlmostEqual(
+                0.5 * (int(stats_graph['nodes (k)']) / int(stats_graph['max index (k)'])),
+                float(stats_annotation['density']),
+                places=6)
+            self.assertEqual(anno_repr, stats_annotation['representation'])
 
             # both strands
             annotate_command = f'{METAGRAPH} annotate --anno-label LabelName -p {NUM_THREADS} \
@@ -264,13 +271,15 @@ def test_simple_all_graphs_from_kmc_both_canonical(self, graph_repr):
             self.assertEqual(res.returncode, 0)
 
             # check annotation
-            res = self._get_stats(f'-a {self.tempdir.name}/annotation_both{anno_file_extension[anno_repr]}')
-            self.assertEqual(res.returncode, 0)
-            out = res.stdout.decode().split('\n')[2:]
-            self.assertEqual('labels:  1', out[0])
-            self.assertEqual('objects: 802920', out[1])
-            self.assertEqual('density: 0.5', out[2])
-            self.assertEqual('representation: ' + anno_repr, out[3])
+            stats_annotation = self._get_stats('-a ' + self.tempdir.name + '/annotation_both' + anno_file_extension[anno_repr])
+            self.assertEqual(stats_annotation['returncode'], 0)
+            self.assertEqual('1', stats_annotation['labels'])
+            self.assertEqual(stats_graph['max index (k)'], stats_annotation['objects'])
+            self.assertAlmostEqual(
+                0.5 * (int(stats_graph['nodes (k)']) / int(stats_graph['max index (k)'])),
+                float(stats_annotation['density']),
+                places=6)
+            self.assertEqual(anno_repr, stats_annotation['representation'])
 
     def test_annotate_with_disk_swap(self):
         graph_repr = 'succinct'
@@ -288,12 +297,11 @@ def test_annotate_with_disk_swap(self):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 46960', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[graph_repr])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('20', stats_graph['k'])
+        self.assertEqual('46960', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
         # build annotation
         annotate_command = f'{METAGRAPH} annotate --anno-header \
@@ -306,13 +314,15 @@ def test_annotate_with_disk_swap(self):
         self.assertEqual(res.returncode, 0)
 
         # check annotation
-        res = self._get_stats(f'-a {self.tempdir.name}/annotation{anno_file_extension[anno_repr]}')
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('labels:  100', out[0])
-        self.assertEqual('objects: 46960', out[1])
-        self.assertEqual('density: 0.0185072', out[2])
-        self.assertEqual('representation: ' + anno_repr, out[3])
+        stats_annotation = self._get_stats('-a ' + f'{self.tempdir.name}/annotation{anno_file_extension[anno_repr]}')
+        self.assertEqual(stats_annotation['returncode'], 0)
+        self.assertEqual('100', stats_annotation['labels'])
+        self.assertEqual(stats_graph['max index (k)'], stats_annotation['objects'])
+        self.assertAlmostEqual(
+            0.0185072 * (int(stats_graph['nodes (k)']) / int(stats_graph['max index (k)'])),
+            float(stats_annotation['density']),
+            places=6)
+        self.assertEqual(anno_repr, stats_annotation['representation'])
 
     @parameterized.expand(GRAPH_TYPES)
     def test_annotate_coordinates(self, graph_repr):
diff --git a/metagraph/integration_tests/test_build.py b/metagraph/integration_tests/test_build.py
index 7ded5fb30e..367d1e10c0 100644
--- a/metagraph/integration_tests/test_build.py
+++ b/metagraph/integration_tests/test_build.py
@@ -50,12 +50,11 @@ def test_simple_all_graphs(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 591997', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('20', stats_graph['k'])
+        self.assertEqual('591997', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
     @parameterized.expand(succinct_states)
     def test_build_succinct_inplace(self, state):
@@ -67,13 +66,12 @@ def test_build_succinct_inplace(self, state):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension['succinct'])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 597931', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('state: ' + state, out[8])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension['succinct'])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('20', stats_graph['k'])
+        self.assertEqual('597931', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
+        self.assertEqual(state, stats_graph['state'])
 
     @parameterized.expand(['succinct'])
     def test_simple_bloom_graph(self, build):
@@ -90,12 +88,11 @@ def test_simple_bloom_graph(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 591997', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('20', stats_graph['k'])
+        self.assertEqual('591997', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
         convert_command = '{exe} transform -o {outfile} --initialize-bloom {bloom_param} {input}'.format(
             exe=METAGRAPH,
@@ -136,12 +133,11 @@ def test_simple_all_graphs_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 1159851', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('20', stats_graph['k'])
+        self.assertEqual('1159851', stats_graph['nodes (k)'])
+        self.assertEqual('canonical', stats_graph['mode'])
 
     @parameterized.expand(BUILDS)
     def test_build_tiny_k(self, build):
@@ -157,12 +153,11 @@ def test_build_tiny_k(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 2', out[0])
-        self.assertEqual('nodes (k): 16', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('2', stats_graph['k'])
+        self.assertEqual('16', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
@@ -180,12 +175,11 @@ def test_build_tiny_k_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 2', out[0])
-        self.assertEqual('nodes (k): 16', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('2', stats_graph['k'])
+        self.assertEqual('16', stats_graph['nodes (k)'])
+        self.assertEqual('canonical', stats_graph['mode'])
 
     @parameterized.expand(BUILDS)
     def test_build_tiny_k_parallel(self, build):
@@ -199,12 +193,11 @@ def test_build_tiny_k_parallel(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 2', out[0])
-        self.assertEqual('nodes (k): 16', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('2', stats_graph['k'])
+        self.assertEqual('16', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
@@ -221,12 +214,11 @@ def test_build_tiny_k_parallel_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 2', out[0])
-        self.assertEqual('nodes (k): 16', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('2', stats_graph['k'])
+        self.assertEqual('16', stats_graph['nodes (k)'])
+        self.assertEqual('canonical', stats_graph['mode'])
 
     @parameterized.expand(BUILDS)
     def test_build_from_kmc(self, build):
@@ -243,12 +235,11 @@ def test_build_from_kmc(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 469983', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('469983', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
     @parameterized.expand(BUILDS)
     def test_build_from_kmc_both(self, build):
@@ -265,12 +256,11 @@ def test_build_from_kmc_both(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('802920', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
     @parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
     @unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets")
@@ -289,12 +279,11 @@ def test_build_from_kmc_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('802920', stats_graph['nodes (k)'])
+        self.assertEqual('canonical', stats_graph['mode'])
 
     @parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
     @unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets")
@@ -313,12 +302,11 @@ def test_build_from_kmc_both_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('802920', stats_graph['nodes (k)'])
+        self.assertEqual('canonical', stats_graph['mode'])
 
     @parameterized.expand(['succinct', 'succinct_disk'])
     @unittest.skipUnless(DNA_MODE, "Need to adapt suffixes for other alphabets")
@@ -352,13 +340,12 @@ def test_build_chunks_from_kmc(self, build):
         self.assertEqual(res.returncode, 0)
 
         # Check graph
-        res = self._get_stats(self.tempdir.name + '/graph_from_chunks'
-                               + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 469983', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph_from_chunks'
+                                      + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('469983', stats_graph['nodes (k)'])
+        self.assertEqual('basic', stats_graph['mode'])
 
     @parameterized.expand(['succinct', 'succinct_disk'])
     @unittest.skipUnless(DNA_MODE, "Need to adapt suffixes for other alphabets")
@@ -392,13 +379,12 @@ def test_build_chunks_from_kmc_canonical(self, build):
         self.assertEqual(res.returncode, 0)
 
         # Check graph
-        res = self._get_stats(self.tempdir.name + '/graph_from_chunks'
-                               + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph_from_chunks'
+                                      + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual('11', stats_graph['k'])
+        self.assertEqual('802920', stats_graph['nodes (k)'])
+        self.assertEqual('canonical', stats_graph['mode'])
 
 
 if __name__ == '__main__':
diff --git a/metagraph/integration_tests/test_build_weighted.py b/metagraph/integration_tests/test_build_weighted.py
index 6c176cffe7..35f50aeb05 100644
--- a/metagraph/integration_tests/test_build_weighted.py
+++ b/metagraph/integration_tests/test_build_weighted.py
@@ -50,14 +50,13 @@ def test_simple_all_graphs(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 591997', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 591997', out[3])
-        self.assertEqual('avg weight: 2.48587', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '20')
+        self.assertEqual(stats_graph['nodes (k)'], '591997')
+        self.assertEqual(stats_graph['mode'], 'basic')
+        self.assertEqual(stats_graph['nnz weights'], '591997')
+        self.assertEqual(stats_graph['avg weight'], '2.48587')
 
     @parameterized.expand([repr for repr in BUILDS if not (repr == 'bitmap' and PROTEIN_MODE)])
     def test_simple_all_graphs_contigs(self, build):
@@ -88,14 +87,13 @@ def test_simple_all_graphs_contigs(self, build):
         res = subprocess.run([command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 591997', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 591997', out[3])
-        self.assertEqual('avg weight: 2.48587', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '20')
+        self.assertEqual(stats_graph['nodes (k)'], '591997')
+        self.assertEqual(stats_graph['mode'], 'basic')
+        self.assertEqual(stats_graph['nnz weights'], '591997')
+        self.assertEqual(stats_graph['avg weight'], '2.48587')
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
@@ -115,14 +113,13 @@ def test_simple_all_graphs_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 1159851', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 1159851', out[3])
-        self.assertEqual('avg weight: 2.53761', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '20')
+        self.assertEqual(stats_graph['nodes (k)'], '1159851')
+        self.assertEqual(stats_graph['mode'], 'canonical')
+        self.assertEqual(stats_graph['nnz weights'], '1159851')
+        self.assertEqual(stats_graph['avg weight'], '2.53761')
 
     @parameterized.expand(BUILDS)
     def test_build_tiny_k(self, build):
@@ -138,14 +135,13 @@ def test_build_tiny_k(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 2', out[0])
-        self.assertEqual('nodes (k): 16', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 16', out[3])
-        self.assertEqual('avg weight: 255', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '2')
+        self.assertEqual(stats_graph['nodes (k)'], '16')
+        self.assertEqual(stats_graph['mode'], 'basic')
+        self.assertEqual(stats_graph['nnz weights'], '16')
+        self.assertEqual(stats_graph['avg weight'], '255')
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
@@ -164,14 +160,13 @@ def test_build_tiny_k_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 2', out[0])
-        self.assertEqual('nodes (k): 16', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 16', out[3])
-        self.assertEqual('avg weight: 255', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '2')
+        self.assertEqual(stats_graph['nodes (k)'], '16')
+        self.assertEqual(stats_graph['mode'], 'canonical')
+        self.assertEqual(stats_graph['nnz weights'], '16')
+        self.assertEqual(stats_graph['avg weight'], '255')
 
     @parameterized.expand(BUILDS)
     def test_build_from_kmc(self, build):
@@ -189,14 +184,13 @@ def test_build_from_kmc(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 469983', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 469983', out[3])
-        self.assertEqual('avg weight: 3.15029', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '11')
+        self.assertEqual(stats_graph['nodes (k)'], '469983')
+        self.assertEqual(stats_graph['mode'], 'basic')
+        self.assertEqual(stats_graph['nnz weights'], '469983')
+        self.assertEqual(stats_graph['avg weight'], '3.15029')
 
     @parameterized.expand(BUILDS)
     def test_build_from_kmc_both(self, build):
@@ -214,14 +208,13 @@ def test_build_from_kmc_both(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 802920', out[3])
-        self.assertEqual('avg weight: 3.68754', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '11')
+        self.assertEqual(stats_graph['nodes (k)'], '802920')
+        self.assertEqual(stats_graph['mode'], 'basic')
+        self.assertEqual(stats_graph['nnz weights'], '802920')
+        self.assertEqual(stats_graph['avg weight'], '3.68754')
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
@@ -241,14 +234,13 @@ def test_build_from_kmc_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 802920', out[3])
-        self.assertEqual('avg weight: 3.68754', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '11')
+        self.assertEqual(stats_graph['nodes (k)'], '802920')
+        self.assertEqual(stats_graph['mode'], 'canonical')
+        self.assertEqual(stats_graph['nnz weights'], '802920')
+        self.assertEqual(stats_graph['avg weight'], '3.68754')
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand([repr for repr in BUILDS if repr != 'hashstr'])
@@ -268,14 +260,13 @@ def test_build_from_kmc_both_canonical(self, build):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 11', out[0])
-        self.assertEqual('nodes (k): 802920', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 802920', out[3])
-        self.assertEqual('avg weight: 3.68754', out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '11')
+        self.assertEqual(stats_graph['nodes (k)'], '802920')
+        self.assertEqual(stats_graph['mode'], 'canonical')
+        self.assertEqual(stats_graph['nnz weights'], '802920')
+        self.assertEqual(stats_graph['avg weight'], '3.68754')
 
     @parameterized.expand(
         itertools.product(BUILDS,
@@ -306,14 +297,13 @@ def test_kmer_count_width(self, build, width_result):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 4', out[0])
-        self.assertEqual('nodes (k): 256', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 256', out[3])
-        self.assertEqual('avg weight: {}'.format(avg_count_expected), out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], '4')
+        self.assertEqual(stats_graph['nodes (k)'], '256')
+        self.assertEqual(stats_graph['mode'], 'basic')
+        self.assertEqual(stats_graph['nnz weights'], '256')
+        self.assertEqual(stats_graph['avg weight'], str(avg_count_expected))
 
     @parameterized.expand(itertools.chain(
         itertools.product(BUILDS,
@@ -366,14 +356,13 @@ def test_kmer_count_width_large(self, build, k_width_result):
         res = subprocess.run([construct_command], shell=True)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: {}'.format(k), out[0])
-        self.assertEqual('nodes (k): 2', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 2', out[3])
-        self.assertEqual('avg weight: {}'.format(avg_count_expected), out[4])
+        stats_graph = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual(stats_graph['returncode'], 0)
+        self.assertEqual(stats_graph['k'], str(k))
+        self.assertEqual(stats_graph['nodes (k)'], '2')
+        self.assertEqual(stats_graph['mode'], 'basic')
+        self.assertEqual(stats_graph['nnz weights'], '2')
+        self.assertEqual(stats_graph['avg weight'], str(avg_count_expected))
 
 
 if __name__ == '__main__':
diff --git a/metagraph/integration_tests/test_clean.py b/metagraph/integration_tests/test_clean.py
index 070b396a17..c86e614483 100644
--- a/metagraph/integration_tests/test_clean.py
+++ b/metagraph/integration_tests/test_clean.py
@@ -35,13 +35,12 @@ def test_no_cleaning_contigs(self, representation):
                           k=20, repr=representation,
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 591997', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 591997', out[3])
-        self.assertEqual('avg weight: 2.48587', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual('20', stats['k'])
+        self.assertEqual('591997', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
+        self.assertEqual('591997', stats['nnz weights'])
+        self.assertEqual('2.48587', stats['avg weight'])
 
         clean_fasta = self.tempdir.name + '/contigs.fasta.gz'
         self._clean(self.tempdir.name + '/graph' + graph_file_extension[representation],
@@ -53,13 +52,12 @@ def test_no_cleaning_contigs(self, representation):
                           k=20, repr=representation,
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 591997', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 591997', out[3])
-        self.assertEqual('avg weight: 2.48587', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('20', stats['k'])
+        self.assertEqual('591997', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
+        self.assertEqual('591997', stats['nnz weights'])
+        self.assertEqual('2.48587', stats['avg weight'])
 
     @parameterized.expand([repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)])
     def test_no_cleaning_contigs_2bit_counts(self, representation):
@@ -69,13 +67,12 @@ def test_no_cleaning_contigs_2bit_counts(self, representation):
                           k=20, repr=representation,
                           extra_params="--mask-dummy --count-kmers --count-width 2")
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 591997', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 591997', out[3])
-        self.assertEqual('avg weight: 1.73589', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual('20', stats['k'])
+        self.assertEqual('591997', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
+        self.assertEqual('591997', stats['nnz weights'])
+        self.assertEqual('1.73589', stats['avg weight'])
 
         clean_fasta = self.tempdir.name + '/contigs.fasta.gz'
         self._clean(self.tempdir.name + '/graph' + graph_file_extension[representation],
@@ -87,13 +84,12 @@ def test_no_cleaning_contigs_2bit_counts(self, representation):
                           k=20, repr=representation,
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 591997', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 591997', out[3])
-        self.assertEqual('avg weight: 1.73589', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('20', stats['k'])
+        self.assertEqual('591997', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
+        self.assertEqual('591997', stats['nnz weights'])
+        self.assertEqual('1.73589', stats['avg weight'])
 
     @parameterized.expand([repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)])
     def test_clean_prune_tips_no_counts(self, representation):
@@ -113,11 +109,10 @@ def test_clean_prune_tips_no_counts(self, representation):
                           k=20, repr=representation,
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 589774', out[1])
-        self.assertEqual('mode: basic', out[2])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('20', stats['k'])
+        self.assertEqual('589774', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
 
     @parameterized.expand([repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)])
     def test_clean_prune_tips(self, representation):
@@ -137,13 +132,12 @@ def test_clean_prune_tips(self, representation):
                           k=20, repr=representation,
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 589774', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 589774', out[3])
-        self.assertEqual('avg weight: 2.49001', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('20', stats['k'])
+        self.assertEqual('589774', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
+        self.assertEqual('589774', stats['nnz weights'])
+        self.assertEqual('2.49001', stats['avg weight'])
 
     @parameterized.expand([repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)])
     def test_cleaning_threshold_fixed(self, representation):
@@ -163,14 +157,12 @@ def test_cleaning_threshold_fixed(self, representation):
                           k=20, repr=representation,
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 167395', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 167395', out[3])
-        self.assertEqual('avg weight: 5.52732', out[4])
-
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('20', stats['k'])
+        self.assertEqual('167395', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
+        self.assertEqual('167395', stats['nnz weights'])
+        self.assertEqual('5.52732', stats['avg weight'])
     @parameterized.expand([repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)])
     def test_cleaning_prune_tips_threshold_fixed(self, representation):
 
@@ -189,13 +181,12 @@ def test_cleaning_prune_tips_threshold_fixed(self, representation):
                           k=20, repr=representation,
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 20', out[0])
-        self.assertEqual('nodes (k): 167224', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 167224', out[3])
-        self.assertEqual('avg weight: 5.52757', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('20', stats['k'])
+        self.assertEqual('167224', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
+        self.assertEqual('167224', stats['nnz weights'])
+        self.assertEqual('5.52757', stats['avg weight'])
 
 
 @unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets")
@@ -212,13 +203,12 @@ def test_no_cleaning_contigs(self, representation):
                           k=31, repr=representation, mode='canonical',
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 1185814', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 1185814', out[3])
-        self.assertEqual('avg weight: 2.4635', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('1185814', stats['nodes (k)'])
+        self.assertEqual('canonical', stats['mode'])
+        self.assertEqual('1185814', stats['nnz weights'])
+        self.assertEqual('2.4635', stats['avg weight'])
 
         clean_fasta = self.tempdir.name + '/contigs.fasta.gz'
         self._clean(self.tempdir.name + '/graph' + graph_file_extension[representation],
@@ -230,13 +220,12 @@ def test_no_cleaning_contigs(self, representation):
                           k=31, repr=representation, mode='canonical',
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 1185814', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 1185814', out[3])
-        self.assertEqual('avg weight: 2.4635', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('1185814', stats['nodes (k)'])
+        self.assertEqual('canonical', stats['mode'])
+        self.assertEqual('1185814', stats['nnz weights'])
+        self.assertEqual('2.4635', stats['avg weight'])
 
     # TODO: add 'hashstr' once the canonical mode is implemented for it
     @parameterized.expand(['succinct', 'bitmap', 'hash'])  # , 'hashstr']:
@@ -247,13 +236,12 @@ def test_no_cleaning_contigs_2bit_counts(self, representation):
                           k=31, repr=representation, mode='canonical',
                           extra_params="--mask-dummy --count-kmers --count-width 2")
 
-        res = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 1185814', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 1185814', out[3])
-        self.assertEqual('avg weight: 1.72792', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('1185814', stats['nodes (k)'])
+        self.assertEqual('canonical', stats['mode'])
+        self.assertEqual('1185814', stats['nnz weights'])
+        self.assertEqual('1.72792', stats['avg weight'])
 
         clean_fasta = self.tempdir.name + '/contigs.fasta.gz'
         self._clean(self.tempdir.name + '/graph' + graph_file_extension[representation],
@@ -265,13 +253,12 @@ def test_no_cleaning_contigs_2bit_counts(self, representation):
                           k=31, repr=representation, mode='canonical',
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 1185814', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 1185814', out[3])
-        self.assertEqual('avg weight: 1.72792', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('1185814', stats['nodes (k)'])
+        self.assertEqual('canonical', stats['mode'])
+        self.assertEqual('1185814', stats['nnz weights'])
+        self.assertEqual('1.72792', stats['avg weight'])
 
     @parameterized.expand(['succinct', 'bitmap', 'hash'])  # , 'hashstr']:
     def test_clean_prune_tips_no_counts(self, representation):
@@ -291,11 +278,10 @@ def test_clean_prune_tips_no_counts(self, representation):
                           k=31, repr=representation, mode='canonical',
                           extra_params="--mask-dummy")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 1180802', out[1])
-        self.assertEqual('mode: canonical', out[2])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('1180802', stats['nodes (k)'])
+        self.assertEqual('canonical', stats['mode'])
 
     @parameterized.expand(['succinct', 'bitmap', 'hash'])  # , 'hashstr']:
     def test_clean_prune_tips(self, representation):
@@ -315,13 +301,12 @@ def test_clean_prune_tips(self, representation):
                           k=31, repr=representation, mode='canonical',
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 1180802', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 1180802', out[3])
-        self.assertEqual('avg weight: 2.46882', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('1180802', stats['nodes (k)'])
+        self.assertEqual('canonical', stats['mode'])
+        self.assertEqual('1180802', stats['nnz weights'])
+        self.assertEqual('2.46882', stats['avg weight'])
 
     @parameterized.expand(GRAPH_TYPES)
     def test_cleaning_threshold_fixed_both_strands(self, representation):
@@ -342,13 +327,12 @@ def test_cleaning_threshold_fixed_both_strands(self, representation):
                           k=31, repr=representation,
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 331452', out[1])
-        self.assertEqual('mode: basic', out[2])
-        self.assertEqual('nnz weights: 331452', out[3])
-        self.assertEqual('avg weight: 5.52692', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('331452', stats['nodes (k)'])
+        self.assertEqual('basic', stats['mode'])
+        self.assertEqual('331452', stats['nnz weights'])
+        self.assertEqual('5.52692', stats['avg weight'])
 
     @parameterized.expand(['succinct', 'bitmap', 'hash'])  # , 'hashstr']:
     def test_cleaning_threshold_fixed(self, representation):
@@ -368,13 +352,12 @@ def test_cleaning_threshold_fixed(self, representation):
                           k=31, repr=representation, mode='canonical',
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 331452', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 331452', out[3])
-        self.assertEqual('avg weight: 5.52692', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('331452', stats['nodes (k)'])
+        self.assertEqual('canonical', stats['mode'])
+        self.assertEqual('331452', stats['nnz weights'])
+        self.assertEqual('5.52692', stats['avg weight'])
 
     @parameterized.expand(['succinct', 'bitmap', 'hash'])  # , 'hashstr']:
     def test_cleaning_prune_tips_threshold_fixed(self, representation):
@@ -394,13 +377,12 @@ def test_cleaning_prune_tips_threshold_fixed(self, representation):
                           k=31, repr=representation, mode='canonical',
                           extra_params="--mask-dummy --count-kmers")
 
-        res = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('k: 31', out[0])
-        self.assertEqual('nodes (k): 331266', out[1])
-        self.assertEqual('mode: canonical', out[2])
-        self.assertEqual('nnz weights: 331266', out[3])
-        self.assertEqual('avg weight: 5.52728', out[4])
+        stats = self._get_stats(self.tempdir.name + '/graph_clean' + graph_file_extension[representation])
+        self.assertEqual('31', stats['k'])
+        self.assertEqual('331266', stats['nodes (k)'])
+        self.assertEqual('canonical', stats['mode'])
+        self.assertEqual('331266', stats['nnz weights'])
+        self.assertEqual('5.52728', stats['avg weight'])
 
 
 if __name__ == '__main__':
diff --git a/metagraph/integration_tests/test_query.py b/metagraph/integration_tests/test_query.py
index 25434a7b22..b8dfcab0c8 100644
--- a/metagraph/integration_tests/test_query.py
+++ b/metagraph/integration_tests/test_query.py
@@ -86,13 +86,12 @@ def setUpClass(cls):
                          20, cls.graph_repr, 'basic',
                          '--mask-dummy' if cls.mask_dummy else '')
 
-        res = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('k: 20' == out[0])
+        stats_graph = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
+        assert(stats_graph['returncode'] == 0)
+        assert('20' == stats_graph['k'])
         if cls.graph_repr != 'succinct' or cls.mask_dummy:
-            assert('nodes (k): 46960' == out[1])
-        assert('mode: basic' == out[2])
+            assert('46960' == stats_graph['nodes (k)'])
+        assert('basic' == stats_graph['mode'])
 
         if cls.with_bloom:
             convert_command = f'{METAGRAPH} transform -o {cls.tempdir.name}/graph \
@@ -122,17 +121,16 @@ def check_suffix(anno_repr, suffix):
         )
 
         # check annotation
-        res = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('labels:  100' == out[0])
+        stats_annotation = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
+        assert(stats_annotation['returncode'] == 0)
+        assert('100' == stats_annotation['labels'])
         if cls.graph_repr != 'hashfast' and (cls.graph_repr != 'succinct' or cls.mask_dummy):
-            assert('objects: 46960' == out[1])
+            assert(stats_graph['max index (k)'] == stats_annotation['objects'])
 
         if cls.anno_repr.endswith('_noswap'):
             cls.anno_repr = cls.anno_repr[:-len('_noswap')]
 
-        assert(f'representation: {cls.anno_repr}' == out[3])
+        assert(cls.anno_repr == stats_annotation['representation'])
 
     def test_query(self):
         query_command = '{exe} query --batch-size 0 -i {graph} -a {annotation} --min-kmers-fraction-label 1.0 {input}'.format(
@@ -574,12 +572,11 @@ def setUpClass(cls):
 
         cls._build_graph(cls.fasta_graph, cls.tempdir.name + '/graph', 5, cls.graph_repr, 'basic', '--mask-dummy')
 
-        res = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('k: 5' == out[0])
-        assert('nodes (k): 12' == out[1])
-        assert('mode: basic' == out[2])
+        stats_graph = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
+        assert(stats_graph['returncode'] == 0)
+        assert(stats_graph['k'] == '5')
+        assert(stats_graph['nodes (k)'] == '12')
+        assert(stats_graph['mode'] == 'basic')
 
         def check_suffix(anno_repr, suffix):
             match = anno_repr.endswith(suffix)
@@ -597,16 +594,15 @@ def check_suffix(anno_repr, suffix):
                 separate, no_fork_opt, no_anchor_opt)
 
         # check annotation
-        res = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('labels:  3' == out[0])
-        assert('objects: 12' == out[1])
+        stats_annotation = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
+        assert(stats_annotation['returncode'] == 0)
+        assert(stats_annotation['labels'] == '3')
+        assert(stats_annotation['objects'] == stats_graph['max index (k)'])
 
         if cls.anno_repr.endswith('_noswap'):
             cls.anno_repr = cls.anno_repr[:-len('_noswap')]
 
-        assert(f'representation: {cls.anno_repr}' == out[3])
+        assert(cls.anno_repr == stats_annotation['representation'])
 
     def test_query_coordinates(self):
         if not self.anno_repr.endswith('_coord'):
@@ -655,13 +651,12 @@ def setUpClass(cls):
                          20, cls.graph_repr, 'basic',
                          '--mask-dummy' if cls.mask_dummy else '')
 
-        res = cls._get_stats(cls.tempdir.name + '/graph' + graph_file_extension[cls.graph_repr])
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('k: 20' == out[0])
+        stats_graph = cls._get_stats(cls.tempdir.name + '/graph' + graph_file_extension[cls.graph_repr])
+        assert(stats_graph['returncode'] == 0)
+        assert(stats_graph['k'] == '20')
         if cls.graph_repr != 'succinct' or cls.mask_dummy:
-            assert('nodes (k): 46960' == out[1])
-        assert('mode: basic' == out[2])
+            assert(stats_graph['nodes (k)'] == '46960')
+        assert(stats_graph['mode'] == 'basic')
 
         if cls.with_bloom:
             convert_command = f'{METAGRAPH} transform -o {cls.tempdir.name}/graph \
@@ -692,17 +687,16 @@ def check_suffix(anno_repr, suffix):
         )
 
         # check annotation
-        res = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('labels:  1' == out[0])
+        stats_annotation = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
+        assert(stats_annotation['returncode'] == 0)
+        assert(stats_annotation['labels'] == '1')
         if cls.graph_repr != 'hashfast' and (cls.graph_repr != 'succinct' or cls.mask_dummy):
-            assert('objects: 46960' == out[1])
+            assert(stats_annotation['objects'] == stats_graph['max index (k)'])
 
         if cls.anno_repr.endswith('_noswap'):
             cls.anno_repr = cls.anno_repr[:-len('_noswap')]
 
-        assert('representation: ' + cls.anno_repr == out[3])
+        assert(cls.anno_repr == stats_annotation['representation'])
 
     def test_query(self):
         query_command = f'{METAGRAPH} query --batch-size 0 \
@@ -788,13 +782,12 @@ def setUpClass(cls):
         cls._build_graph((cls.fasta_file_1, cls.fasta_file_2), cls.tempdir.name + '/graph',
                          cls.k, cls.graph_repr, 'basic', '--mask-dummy' if cls.mask_dummy else '')
 
-        res = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('k: 3' == out[0])
+        stats_graph = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
+        assert(stats_graph['returncode'] == 0)
+        assert(stats_graph['k'] == '3')
         if cls.graph_repr != 'succinct' or cls.mask_dummy:
-            assert('nodes (k): 12' == out[1])
-        assert('mode: basic' == out[2])
+            assert(stats_graph['nodes (k)'] == '12')
+        assert(stats_graph['mode'] == 'basic')
 
         if cls.with_bloom:
             convert_command = f'{METAGRAPH} transform -o {cls.tempdir.name}/graph \
@@ -812,13 +805,12 @@ def setUpClass(cls):
         )
 
         # check annotation
-        res = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('labels:  2' == out[0])
+        stats_annotation = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
+        assert(stats_annotation['returncode'] == 0)
+        assert(stats_annotation['labels'] == '2')
         if cls.graph_repr != 'hashfast' and (cls.graph_repr != 'succinct' or cls.mask_dummy):
-            assert('objects: 12' == out[1])
-        assert('representation: ' + cls.anno_repr == out[3])
+            assert(stats_annotation['objects'] == stats_graph['max index (k)'])
+        assert(stats_annotation['representation'] == cls.anno_repr)
 
         cls.queries = [
             'AAA',
@@ -968,13 +960,12 @@ def setUpClass(cls):
                          20, cls.graph_repr, 'canonical',
                          '--mask-dummy' if cls.mask_dummy else '')
 
-        res = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('k: 20' == out[0])
+        stats_graph = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
+        assert(stats_graph['returncode'] == 0)
+        assert(stats_graph['k'] == '20')
         if cls.graph_repr != 'succinct' or cls.mask_dummy:
-            assert('nodes (k): 91584' == out[1])
-        assert('mode: canonical' == out[2])
+            assert(stats_graph['nodes (k)'] == '91584')
+        assert(stats_graph['mode'] == 'canonical')
 
         if cls.with_bloom:
             convert_command = f'{METAGRAPH} transform -o {cls.tempdir.name}/graph \
@@ -991,17 +982,16 @@ def setUpClass(cls):
         )
 
         # check annotation
-        res = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('labels:  100' == out[0])
+        stats_annotation = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
+        assert(stats_annotation['returncode'] == 0)
+        assert(stats_annotation['labels'] == '100')
         if cls.graph_repr != 'hashfast' and (cls.graph_repr != 'succinct' or cls.mask_dummy):
-            assert('objects: 91584' == out[1])
+            assert(stats_annotation['objects'] == stats_graph['max index (k)'])
 
         if cls.anno_repr.endswith('_noswap'):
             cls.anno_repr = cls.anno_repr[:-len('_noswap')]
 
-        assert('representation: ' + cls.anno_repr == out[3])
+        assert(cls.anno_repr == stats_annotation['representation'])
 
     def test_query(self):
         query_command = '{exe} query --batch-size 0 -i {graph} -a {annotation} --min-kmers-fraction-label 1.0 {input}'.format(
@@ -1135,13 +1125,12 @@ def setUpClass(cls):
                          20, cls.graph_repr, 'primary',
                          '--mask-dummy' if cls.mask_dummy else '')
 
-        res = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('k: 20' == out[0])
+        stats_graph = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
+        assert(stats_graph['returncode'] == 0)
+        assert(stats_graph['k'] == '20')
         if cls.graph_repr != 'succinct' or cls.mask_dummy:
-            assert('nodes (k): 45792' == out[1])
-        assert('mode: primary' == out[2])
+            assert(stats_graph['nodes (k)'] == '45792')
+        assert(stats_graph['mode'] == 'primary')
 
         if cls.with_bloom:
             convert_command = f'{METAGRAPH} transform -o {cls.tempdir.name}/graph \
@@ -1158,17 +1147,16 @@ def setUpClass(cls):
         )
 
         # check annotation
-        res = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('labels:  100' == out[0])
+        stats_annotation = cls._get_stats(f'-a {cls.tempdir.name}/annotation{anno_file_extension[cls.anno_repr]}')
+        assert(stats_annotation['returncode'] == 0)
+        assert(stats_annotation['labels'] == '100')
         if cls.graph_repr != 'hashfast' and (cls.graph_repr != 'succinct' or cls.mask_dummy):
-            assert('objects: 45792' == out[1])
+            assert(stats_annotation['objects'] == stats_graph['max index (k)'])
 
         if cls.anno_repr.endswith('_noswap'):
             cls.anno_repr = cls.anno_repr[:-len('_noswap')]
 
-        assert('representation: ' + cls.anno_repr == out[3])
+        assert(cls.anno_repr == stats_annotation['representation'])
 
     def test_query(self):
         query_command = '{exe} query --batch-size 0 -i {graph} -a {annotation} --min-kmers-fraction-label 1.0 {input}'.format(
diff --git a/metagraph/integration_tests/test_transform_anno.py b/metagraph/integration_tests/test_transform_anno.py
index a1887db9e0..6d76826b09 100644
--- a/metagraph/integration_tests/test_transform_anno.py
+++ b/metagraph/integration_tests/test_transform_anno.py
@@ -30,12 +30,14 @@ def setUpClass(cls):
                          cls.tempdir.name + '/graph',
                          20, cls.graph_repr, 'basic', '--mask-dummy')
 
-        res = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
-        assert(res.returncode == 0)
-        out = res.stdout.decode().split('\n')[2:]
-        assert('k: 20' == out[0])
-        assert('nodes (k): 46960' == out[1])
-        assert('mode: basic' == out[2])
+        stats_graph = cls._get_stats(f'{cls.tempdir.name}/graph{graph_file_extension[cls.graph_repr]}')
+        assert(stats_graph['returncode'] == 0)
+        assert(stats_graph['k'] == '20')
+        assert(stats_graph['nodes (k)'] == '46960')
+        assert(stats_graph['mode'] == 'basic')
+
+        cls.num_nodes = stats_graph['nodes (k)']
+        cls.max_index = stats_graph['max index (k)']
 
         cls._annotate_graph(
             TEST_DATA_DIR + '/transcripts_100.fa',
@@ -52,13 +54,15 @@ def setUp(self):
         self.annotation = f'annotation{anno_file_extension[self.anno_repr]}';
 
         # check annotation
-        res = self._get_stats(f'-a {self.annotation}')
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('labels:  100', out[0])
-        self.assertEqual('objects: 46960', out[1])
-        self.assertEqual('density: 0.0185072', out[2])
-        self.assertEqual(f'representation: {self.anno_repr}', out[3])
+        stats_annotation = self._get_stats(f'-a {self.annotation}')
+        self.assertEqual(stats_annotation['returncode'], 0)
+        self.assertEqual(stats_annotation['labels'], '100')
+        self.assertEqual(stats_annotation['objects'], self.max_index)
+        self.assertAlmostEqual(
+            float(stats_annotation['density']),
+            0.0185072 * int(self.num_nodes) / int(self.max_index),
+            places=6)
+        self.assertEqual(stats_annotation['representation'], self.anno_repr)
 
     def tearDown(self):
         os.chdir(self.old_cwd)
@@ -78,13 +82,15 @@ def _check_aggregation_min(self, min_count, expected_density):
         res = subprocess.run(command.split(), stdout=PIPE)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(f'-a aggregated{anno_file_extension[self.anno_repr]}')
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('labels:  1', out[0])
-        self.assertEqual('objects: 46960', out[1])
-        self.assertEqual(f'density: {expected_density}', out[2])
-        self.assertEqual(f'representation: {self.anno_repr}', out[3])
+        stats_annotation = self._get_stats(f'-a aggregated{anno_file_extension[self.anno_repr]}')
+        self.assertEqual(stats_annotation['returncode'], 0)
+        self.assertEqual(stats_annotation['labels'], '1')
+        self.assertEqual(stats_annotation['objects'], self.max_index)
+        self.assertAlmostEqual(
+            float(stats_annotation['density']),
+            float(expected_density) * int(self.num_nodes) / int(self.max_index),
+            places=5)
+        self.assertEqual(stats_annotation['representation'], self.anno_repr)
 
     def test_aggregate_columns(self):
         self._check_aggregation_min(0, 1)
@@ -100,13 +106,15 @@ def _check_aggregation_min_max_value(self, min_count, max_value, expected_densit
         res = subprocess.run(command.split(), stdout=PIPE)
         self.assertEqual(res.returncode, 0)
 
-        res = self._get_stats(f'-a aggregated{anno_file_extension[self.anno_repr]}')
-        self.assertEqual(res.returncode, 0)
-        out = res.stdout.decode().split('\n')[2:]
-        self.assertEqual('labels:  1', out[0])
-        self.assertEqual('objects: 46960', out[1])
-        self.assertEqual(f'density: {expected_density}', out[2])
-        self.assertEqual(f'representation: {self.anno_repr}', out[3])
+        stats_annotation = self._get_stats(f'-a aggregated{anno_file_extension[self.anno_repr]}')
+        self.assertEqual(stats_annotation['returncode'], 0)
+        self.assertEqual(stats_annotation['labels'], '1')
+        self.assertEqual(stats_annotation['objects'], self.max_index)
+        self.assertAlmostEqual(
+            float(stats_annotation['density']),
+            float(expected_density) * int(self.num_nodes) / int(self.max_index),
+            places=5)
+        self.assertEqual(stats_annotation['representation'], self.anno_repr)
 
     def test_aggregate_columns_filtered(self):
         self._check_aggregation_min_max_value(0, 0, 0)
diff --git a/metagraph/src/annotation/annotation_converters.cpp b/metagraph/src/annotation/annotation_converters.cpp
index 061b16ee21..99f310fc23 100644
--- a/metagraph/src/annotation/annotation_converters.cpp
+++ b/metagraph/src/annotation/annotation_converters.cpp
@@ -10,6 +10,7 @@
 #include <tsl/hopscotch_map.h>
 
 #include "row_diff_builder.hpp"
+#include "cli/load/load_graph.hpp"
 #include "common/logger.hpp"
 #include "common/algorithms.hpp"
 #include "common/hashers/hash.hpp"
@@ -1539,12 +1540,13 @@ void convert_to_row_diff(const std::vector<std::string> &files,
     if (out_dir.empty())
         out_dir = "./";
 
+    auto graph = cli::load_critical_dbg(graph_fname);
     if (construction_stage != RowDiffStage::COUNT_LABELS)
-        build_pred_succ(graph_fname, graph_fname, out_dir,
+        build_pred_succ(*graph, graph_fname, out_dir,
                         ".row_count", get_num_threads());
 
     if (construction_stage == RowDiffStage::CONVERT) {
-        assign_anchors(graph_fname, graph_fname, out_dir, max_path_length,
+        assign_anchors(*graph, graph_fname, out_dir, max_path_length,
                        ".row_reduction", get_num_threads());
 
         const std::string anchors_fname = graph_fname + kRowDiffAnchorExt;
diff --git a/metagraph/src/annotation/binary_matrix/row_diff/row_diff.cpp b/metagraph/src/annotation/binary_matrix/row_diff/row_diff.cpp
index 81f23f581b..712e098564 100644
--- a/metagraph/src/annotation/binary_matrix/row_diff/row_diff.cpp
+++ b/metagraph/src/annotation/binary_matrix/row_diff/row_diff.cpp
@@ -60,25 +60,26 @@ IRowDiff::get_rd_ids(const std::vector<BinaryMatrix::Row> &row_ids) const {
     for (size_t i = 0; i < row_ids.size(); ++i) {
         Row row = row_ids[i];
 
-        graph::boss::BOSS::edge_index boss_edge = graph_->kmer_to_boss_index(
-                graph::AnnotatedSequenceGraph::anno_to_graph_index(row));
+        graph::boss::BOSS::edge_index boss_edge = 
+                graph::AnnotatedSequenceGraph::anno_to_graph_index(row);
 
         while (true) {
-            row = graph::AnnotatedSequenceGraph::graph_to_anno_index(
-                    graph_->boss_to_kmer_index(boss_edge));
-
-            auto [it, is_new] = node_to_rd.try_emplace(row, node_to_rd.size());
-            rd_paths_trunc[i].push_back(it.value());
-
-            // If a node had been reached before, we interrupt the diff path.
-            // The annotation for that node will have been reconstructed earlier
-            // than for other nodes in this path as well. Thus, we will start
-            // reconstruction from that node and don't need its successors.
-            if (!is_new)
-                break;
-
-            if (anchor_[row])
-                break;
+            if (graph_->is_valid(boss_edge)) {
+                row = graph::AnnotatedSequenceGraph::graph_to_anno_index(boss_edge);
+
+                auto [it, is_new] = node_to_rd.try_emplace(row, node_to_rd.size());
+                rd_paths_trunc[i].push_back(it.value());
+
+                // If a node had been reached before, we interrupt the diff path.
+                // The annotation for that node will have been reconstructed earlier
+                // than for other nodes in this path as well. Thus, we will start
+                // reconstruction from that node and don't need its successors.
+                if (!is_new)
+                    break;
+
+                if (anchor_[row])
+                    break;
+            }
 
             boss_edge = boss.row_diff_successor(boss_edge, rd_succ);
         }
diff --git a/metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp b/metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp
index f2842d58f4..d9d3ebb5b5 100644
--- a/metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp
+++ b/metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp
@@ -123,9 +123,7 @@ std::vector<BinaryMatrix::Row> RowDiff<BaseMatrix>::get_column(Column column) co
     std::vector<Row> result;
     // TODO: implement a more efficient algorithm
     for (Row row = 0; row < num_rows(); ++row) {
-        auto edge = graph_->kmer_to_boss_index(
-            graph::AnnotatedSequenceGraph::anno_to_graph_index(row)
-        );
+        auto edge = graph::AnnotatedSequenceGraph::anno_to_graph_index(row);
 
         if (!boss.get_W(edge))
             continue;
diff --git a/metagraph/src/annotation/int_matrix/row_diff/int_row_diff.hpp b/metagraph/src/annotation/int_matrix/row_diff/int_row_diff.hpp
index 36a04eace6..535adc0084 100644
--- a/metagraph/src/annotation/int_matrix/row_diff/int_row_diff.hpp
+++ b/metagraph/src/annotation/int_matrix/row_diff/int_row_diff.hpp
@@ -86,9 +86,7 @@ std::vector<BinaryMatrix::Row> IntRowDiff<BaseMatrix>::get_column(Column j) cons
     // TODO: implement a more efficient algorithm
     std::vector<Row> result;
     for (Row i = 0; i < num_rows(); ++i) {
-        auto edge = graph_->kmer_to_boss_index(
-            graph::AnnotatedSequenceGraph::anno_to_graph_index(i)
-        );
+        auto edge = graph::AnnotatedSequenceGraph::anno_to_graph_index(i);
 
         if (!boss.get_W(edge))
             continue;
diff --git a/metagraph/src/annotation/int_matrix/row_diff/tuple_row_diff.hpp b/metagraph/src/annotation/int_matrix/row_diff/tuple_row_diff.hpp
index 8c9df1cfa5..0b05d4c9fa 100644
--- a/metagraph/src/annotation/int_matrix/row_diff/tuple_row_diff.hpp
+++ b/metagraph/src/annotation/int_matrix/row_diff/tuple_row_diff.hpp
@@ -69,9 +69,7 @@ std::vector<BinaryMatrix::Row> TupleRowDiff<BaseMatrix>::get_column(Column j) co
     // TODO: implement a more efficient algorithm
     std::vector<Row> result;
     for (Row i = 0; i < num_rows(); ++i) {
-        auto edge = graph_->kmer_to_boss_index(
-            graph::AnnotatedSequenceGraph::anno_to_graph_index(i)
-        );
+        auto edge = graph::AnnotatedSequenceGraph::anno_to_graph_index(i);
 
         if (!boss.get_W(edge))
             continue;
diff --git a/metagraph/src/annotation/row_diff_builder.cpp b/metagraph/src/annotation/row_diff_builder.cpp
index db7d36befa..9060a33391 100644
--- a/metagraph/src/annotation/row_diff_builder.cpp
+++ b/metagraph/src/annotation/row_diff_builder.cpp
@@ -11,6 +11,7 @@
 #include "common/elias_fano/elias_fano_merger.hpp"
 #include "common/utils/file_utils.hpp"
 #include "common/vectors/bit_vector_sdsl.hpp"
+#include "common/vectors/bit_vector_dyn.hpp"
 #include "graph/annotated_dbg.hpp"
 
 const uint64_t BLOCK_SIZE = 1 << 25;
@@ -26,6 +27,7 @@ namespace annot {
 using namespace mtg::annot::matrix;
 using mtg::common::logger;
 using mtg::graph::boss::BOSS;
+using node_index = graph::DeBruijnGraph::node_index;
 namespace fs = std::filesystem;
 
 using anchor_bv_type = RowDiff<ColumnMajor>::anchor_bv_type;
@@ -264,7 +266,24 @@ void sum_and_call_counts(const fs::path &dir,
     }
 }
 
-rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,
+std::shared_ptr<const bit_vector> get_last(const graph::DeBruijnGraph &graph) {
+    if (auto* dbg_succ = dynamic_cast<graph::DBGSuccinct const*>(&graph)) {
+        return std::shared_ptr<const bit_vector>(
+            std::shared_ptr<const bit_vector>{}, &dbg_succ->get_boss().get_last());
+    } else {
+        bit_vector_dyn last_bv(graph.max_index() + 1);
+        graph.call_nodes([&](node_index v) {
+            std::pair<char, node_index> last;
+            graph.call_outgoing_kmers(v, [&](node_index u, char c) {
+                last = std::max(last, std::pair{c, u});
+            });
+            last_bv.set(last.second, true);
+        });
+        return std::make_shared<bit_vector_dyn>(std::move(last_bv));
+    }
+}
+
+rd_succ_bv_type route_at_forks(const graph::DeBruijnGraph &graph,
                                const std::string &rd_succ_filename,
                                const std::string &count_vectors_dir,
                                const std::string &row_count_extension) {
@@ -277,37 +296,38 @@ rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,
         if (utils::ends_with(p.path(), row_count_extension))
             optimize_forks = true;
     }
-
+    // Other graphs may not support consecutive access
+    optimize_forks &= (bool)dynamic_cast<graph::DBGSuccinct const*>(&graph);
     if (optimize_forks) {
         logger->trace("RowDiff successors will be set to the adjacent nodes with"
                       " the largest number of labels");
 
-        const bit_vector &last = graph.get_boss().get_last();
+        auto last = get_last(graph);
         graph::DeBruijnGraph::node_index graph_idx = to_node(0);
 
         std::vector<uint32_t> outgoing_counts;
 
-        sdsl::bit_vector rd_succ_bv(last.size(), false);
+        sdsl::bit_vector rd_succ_bv(last->size(), false);
 
         sum_and_call_counts(count_vectors_dir, row_count_extension, "row counts",
             [&](int32_t count) {
                 // TODO: skip single outgoing
                 outgoing_counts.push_back(count);
-                if (last[graph.kmer_to_boss_index(graph_idx)]) {
+                if ((*last)[graph_idx]) {
                     // pick the node with the largest count
                     size_t max_pos = std::max_element(outgoing_counts.rbegin(),
                                                       outgoing_counts.rend())
                                      - outgoing_counts.rbegin();
-                    rd_succ_bv[graph.kmer_to_boss_index(graph_idx - max_pos)] = true;
+                    rd_succ_bv[graph_idx - max_pos] = true;
                     outgoing_counts.resize(0);
                 }
                 graph_idx++;
             }
         );
 
-        if (graph_idx != graph.num_nodes() + 1) {
-            logger->error("Size the count vectors is incompatible with the"
-                          " graph: {} != {}", graph_idx - 1, graph.num_nodes());
+        if (graph_idx != graph.max_index() + 1) {
+            logger->error("Size of the count vectors is incompatible with the"
+                          " graph: {} != {}", graph_idx - 1, graph.max_index());
             exit(1);
         }
 
@@ -326,7 +346,72 @@ rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,
     return rd_succ;
 }
 
-void build_pred_succ(const std::string &graph_fname,
+node_index row_diff_successor(const graph::DeBruijnGraph &graph,
+                              node_index node,
+                              const bit_vector &rd_succ) {
+    if (auto* dbg_succ = dynamic_cast<graph::DBGSuccinct const*>(&graph)) {
+        return dbg_succ->get_boss().row_diff_successor(node, rd_succ);
+    } else {
+        node_index succ = graph::DeBruijnGraph::npos;
+        graph.adjacent_outgoing_nodes(node, [&](node_index adjacent_node) {
+            if (rd_succ[adjacent_node]) {
+                succ = adjacent_node;
+            }
+        });
+        assert(succ != graph::DeBruijnGraph::npos && "a row diff successor must exist");
+        return succ;
+    }
+}
+
+void row_diff_traverse(const graph::DeBruijnGraph &graph,
+                       size_t num_threads,
+                       size_t max_length,
+                       const bit_vector &rd_succ,
+                       sdsl::bit_vector *terminal) {
+    if (auto* dbg_succ = dynamic_cast<graph::DBGSuccinct const*>(&graph)) {
+        return dbg_succ->get_boss().row_diff_traverse(
+            num_threads, max_length, rd_succ, terminal);
+    } else {
+        sdsl::bit_vector visited(graph.max_index() + 1);
+        auto finalised = visited;
+        std::vector<size_t> distance(graph.max_index() + 1);
+        assert(terminal->size() == visited.size());
+        assert(rd_succ.size() == visited.size());
+        auto set_terminal = [&](int v) {
+            distance[v] = 0;
+            (*terminal)[v] = true;
+        };
+        graph.call_nodes([&](node_index v) {
+            static std::stack<node_index> path;
+            while (!visited[v]) {
+                path.push(v);
+                visited[v] = true;
+                if (!graph.has_no_outgoing(v)) {
+                    v = row_diff_successor(graph, v, rd_succ);
+                }
+            }
+            // Either a sink, or a cyclic dependency
+            if (!finalised[v]) {
+                set_terminal(v);
+                finalised[v] = true;
+            }
+            node_index succ;
+            while (!empty(path)) {
+                succ = std::exchange(v, path.top());
+                if (!finalised[v]) {
+                    distance[v] = distance[succ] + 1;
+                    if (distance[v] == max_length) {
+                        set_terminal(v);
+                    }
+                    finalised[v] = true;
+                }
+                path.pop();
+            }
+        });
+    }
+}
+
+void build_pred_succ(const graph::DeBruijnGraph &graph,
                      const std::string &outfbase,
                      const std::string &count_vectors_dir,
                      const std::string &row_count_extension,
@@ -342,68 +427,68 @@ void build_pred_succ(const std::string &graph_fname,
     logger->trace("Building and writing successor and predecessor files to {}.*",
                   outfbase);
 
-    graph::DBGSuccinct graph(2);
-    logger->trace("Loading graph...");
-    if (!graph.load(graph_fname)) {
-        logger->error("Cannot load graph from {}", graph_fname);
-        std::exit(1);
+    std::optional<sdsl::bit_vector> dummy;
+    auto* succinct = dynamic_cast<graph::DBGSuccinct const*>(&graph);
+    if (succinct) {
+        dummy = succinct->get_boss().mark_all_dummy_edges(num_threads);
     }
 
     // assign row-diff successors at forks
-    rd_succ_bv_type rd_succ = route_at_forks(graph, outfbase + kRowDiffForkSuccExt,
-                                             count_vectors_dir, row_count_extension);
-
-    const BOSS &boss = graph.get_boss();
-
-    sdsl::bit_vector dummy = boss.mark_all_dummy_edges(num_threads);
+    auto rd_succ = route_at_forks(graph, outfbase + kRowDiffForkSuccExt,
+                                  count_vectors_dir, row_count_extension);
 
     // create the succ/pred files, indexed using annotation indices
-    uint32_t width = sdsl::bits::hi(graph.num_nodes()) + 1;
+    uint32_t width = sdsl::bits::hi(graph.max_index()) + 1;
     sdsl::int_vector_buffer<> succ(outfbase + ".succ", std::ios::out, BUFFER_SIZE, width);
     sdsl::int_vector_buffer<1> succ_boundary(outfbase + ".succ_boundary", std::ios::out, BUFFER_SIZE);
     sdsl::int_vector_buffer<> pred(outfbase + ".pred", std::ios::out, BUFFER_SIZE, width);
     sdsl::int_vector_buffer<1> pred_boundary(outfbase + ".pred_boundary", std::ios::out, BUFFER_SIZE);
 
-    ProgressBar progress_bar(graph.num_nodes(), "Compute succ/pred", std::cerr,
+    ProgressBar progress_bar(graph.max_index(), "Compute succ/pred", std::cerr,
                              !common::get_verbose());
 
     const uint64_t BS = 1'000'000;
-    // traverse BOSS table in parallel processing blocks of size |BS|
+    // traverse graph in parallel processing blocks of size |BS|
     // use static scheduling to make threads process ordered contiguous blocks
     #pragma omp parallel for ordered num_threads(num_threads) schedule(dynamic)
-    for (uint64_t start = 1; start <= graph.num_nodes(); start += BS) {
-        std::vector<uint64_t> succ_buf;
+    for (node_index start = 1; start <= graph.max_index(); start += BS) {
+        std::vector<node_index> succ_buf;
         std::vector<bool> succ_boundary_buf;
-        std::vector<uint64_t> pred_buf;
+        std::vector<node_index> pred_buf;
         std::vector<bool> pred_boundary_buf;
 
-        for (uint64_t i = start; i < std::min(start + BS, graph.num_nodes() + 1); ++i) {
-            BOSS::edge_index boss_idx = graph.kmer_to_boss_index(i);
-            if (!dummy[boss_idx]) {
-                BOSS::edge_index next = boss.fwd(boss_idx);
-                assert(next);
-                if (!dummy[next]) {
-                    while (rd_succ.size() && !rd_succ[next]) {
-                        next--;
-                        assert(!boss.get_last(next));
-                    }
-                    succ_buf.push_back(to_row(graph.boss_to_kmer_index(next)));
+        for (node_index i = start; i < std::min(start + BS, graph.max_index() + 1); ++i) {
+            bool skip_succ = false, skip_all = false;
+            if (succinct) { // Legacy code for DBGSuccinct
+                BOSS::edge_index boss_idx = i;
+                if((*dummy)[boss_idx]) {
+                    skip_all = true;
+                } else {
+                    skip_succ = (*dummy)[succinct->get_boss().fwd(boss_idx)];
+                }
+            }
+            auto with_rd_succ = [&](bit_vector const& rd_succ) {
+                if(!skip_succ) {
+                    auto j = row_diff_successor(graph, i, rd_succ);
+                    succ_buf.push_back(to_row(j));
                     succ_boundary_buf.push_back(0);
                 }
-                // compute predecessors only for row-diff successors
-                if (rd_succ.size() ? rd_succ[boss_idx] : boss.get_last(boss_idx)) {
-                    BOSS::TAlphabet d = boss.get_node_last_value(boss_idx);
-                    BOSS::edge_index back_idx = boss.bwd(boss_idx);
-                    boss.call_incoming_to_target(back_idx, d,
-                        [&](BOSS::edge_index pred) {
-                            // dummy predecessors are ignored
-                            if (!dummy[pred]) {
-                                uint64_t node_index = graph.boss_to_kmer_index(pred);
-                                pred_buf.push_back(to_row(node_index));
-                                pred_boundary_buf.push_back(0);
-                            }
+                if(rd_succ[i]) {
+                    graph.adjacent_incoming_nodes(i, [&](auto pred) {
+                        if (dummy && (*dummy)[pred]) {
+                            return;
                         }
-                    );
+                        pred_buf.push_back(to_row(pred));
+                        pred_boundary_buf.push_back(0);
+                    });
+                }
+            };
+            if (!skip_all) {
+                if (rd_succ.size()) {
+                    with_rd_succ(rd_succ);
+                } else {
+                    auto last = get_last(graph);
+                    with_rd_succ(*last);
                 }
             }
             succ_boundary_buf.push_back(1);
@@ -424,7 +509,7 @@ void build_pred_succ(const std::string &graph_fname,
     logger->trace("Pred/succ nodes written to {}.pred/succ", outfbase);
 }
 
-void assign_anchors(const std::string &graph_fname,
+void assign_anchors(const graph::DeBruijnGraph &graph,
                     const std::string &outfbase,
                     const std::filesystem::path &count_vectors_dir,
                     uint32_t max_length,
@@ -436,14 +521,7 @@ void assign_anchors(const std::string &graph_fname,
         return;
     }
 
-    graph::DBGSuccinct graph(2);
-    logger->trace("Loading graph...");
-    if (!graph.load(graph_fname)) {
-        logger->error("Cannot load graph from {}", graph_fname);
-        std::exit(1);
-    }
-    const BOSS &boss = graph.get_boss();
-    const uint64_t num_rows = graph.num_nodes();
+    const uint64_t num_rows = graph.max_index();
 
     bool optimize_anchors = false;
     for (const auto &p : fs::directory_iterator(count_vectors_dir)) {
@@ -451,7 +529,7 @@ void assign_anchors(const std::string &graph_fname,
             optimize_anchors = true;
     }
 
-    sdsl::bit_vector anchors_bv(boss.get_last().size(), false);
+    sdsl::bit_vector anchors_bv(graph.max_index() + 1, false);
 
     if (optimize_anchors) {
         logger->trace("Making every row with negative reduction an anchor...");
@@ -461,7 +539,7 @@ void assign_anchors(const std::string &graph_fname,
             [&](int32_t count) {
                 // check if the reduction is negative
                 if (count < 0)
-                    anchors_bv[graph.kmer_to_boss_index(to_node(i))] = true;
+                    anchors_bv[to_node(i)] = true;
                 i++;
             }
         );
@@ -492,11 +570,12 @@ void assign_anchors(const std::string &graph_fname,
 
         if (rd_succ.size()) {
             logger->trace("Assigning anchors for RowDiff successors {}...", rd_succ_fname);
-            boss.row_diff_traverse(num_threads, max_length, rd_succ, &anchors_bv);
+            row_diff_traverse(graph, num_threads, max_length, rd_succ, &anchors_bv);
         } else {
             logger->warn("Assigning anchors without chosen RowDiff successors."
                          " The last outgoing edges will be used for routing.");
-            boss.row_diff_traverse(num_threads, max_length, boss.get_last(), &anchors_bv);
+            auto last = get_last(graph);
+            row_diff_traverse(graph, num_threads, max_length, *last, &anchors_bv);
         }
     }
 
@@ -505,7 +584,7 @@ void assign_anchors(const std::string &graph_fname,
         sdsl::bit_vector anchors(num_rows, false);
         for (BOSS::edge_index i = 1; i < anchors_bv.size(); ++i) {
             if (anchors_bv[i]) {
-                uint64_t graph_idx = graph.boss_to_kmer_index(i);
+                uint64_t graph_idx = i;
                 assert(to_row(graph_idx) < num_rows);
                 anchors[to_row(graph_idx)] = 1;
             }
@@ -929,7 +1008,7 @@ void convert_batch_to_row_diff(const std::string &pred_succ_fprefix,
                         // reduction (zero diff)
                         __atomic_add_fetch(&row_nbits_block[chunk_idx], 1, __ATOMIC_RELAXED);
                     }
-                } else {
+                } else if (succ || anchor[row_idx]) {
                     bool is_anchor = anchor[row_idx];
                     // add current bit if this node is an anchor
                     // or if the successor has zero diff
diff --git a/metagraph/src/annotation/row_diff_builder.hpp b/metagraph/src/annotation/row_diff_builder.hpp
index f57fe4c38c..47e3ffa116 100644
--- a/metagraph/src/annotation/row_diff_builder.hpp
+++ b/metagraph/src/annotation/row_diff_builder.hpp
@@ -16,13 +16,13 @@ void count_labels_per_row(const std::vector<std::string> &source_files,
                           const std::string &row_count_fname,
                           bool with_coordinates = false);
 
-void build_pred_succ(const std::string &graph_filename,
+void build_pred_succ(const graph::DeBruijnGraph &graph,
                      const std::string &outfbase,
                      const std::string &count_vectors_dir,
                      const std::string &row_count_extension,
                      uint32_t num_threads);
 
-void assign_anchors(const std::string &graph_filename,
+void assign_anchors(const graph::DeBruijnGraph &graph,
                     const std::string &outfbase,
                     const std::filesystem::path &dest_dir,
                     uint32_t max_length,
diff --git a/metagraph/src/cli/stats.cpp b/metagraph/src/cli/stats.cpp
index f4a2cee9c6..c29ef07c09 100644
--- a/metagraph/src/cli/stats.cpp
+++ b/metagraph/src/cli/stats.cpp
@@ -76,6 +76,7 @@ void print_stats(const graph::DeBruijnGraph &graph, bool print_counts_hist) {
     std::cout << "====================== GRAPH STATS =====================" << std::endl;
     std::cout << "k: " << graph.get_k() << std::endl;
     std::cout << "nodes (k): " << graph.num_nodes() << std::endl;
+    std::cout << "max index (k): " << graph.max_index() << std::endl;
     std::cout << "mode: " << Config::graphmode_to_string(graph.get_mode()) << std::endl;
 
     if (auto weights = graph.get_extension<graph::NodeWeights>()) {
@@ -143,7 +144,6 @@ void print_stats(const graph::DeBruijnGraph &graph, bool print_counts_hist) {
             std::cout << std::endl;
         }
     }
-
     std::cout << "========================================================" << std::endl;
 }
 
diff --git a/metagraph/src/graph/alignment/aligner_seeder_methods.cpp b/metagraph/src/graph/alignment/aligner_seeder_methods.cpp
index 306c7f6a0d..1a6d7e03d4 100644
--- a/metagraph/src/graph/alignment/aligner_seeder_methods.cpp
+++ b/metagraph/src/graph/alignment/aligner_seeder_methods.cpp
@@ -104,7 +104,7 @@ void suffix_to_prefix(const DBGSuccinct &dbg_succ,
         const auto &[first, last, seed_length] = final_range;
         assert(seed_length == boss.get_k());
         for (boss::BOSS::edge_index i = first; i <= last; ++i) {
-            DBGSuccinct::node_index node = dbg_succ.boss_to_kmer_index(i);
+            DBGSuccinct::node_index node = dbg_succ.validate_edge(i);
             if (node)
                 callback(node);
         }
diff --git a/metagraph/src/graph/alignment/alignment.cpp b/metagraph/src/graph/alignment/alignment.cpp
index b1bdd0d8a7..ef1f4fb29b 100644
--- a/metagraph/src/graph/alignment/alignment.cpp
+++ b/metagraph/src/graph/alignment/alignment.cpp
@@ -550,7 +550,7 @@ void Alignment::reverse_complement(const DeBruijnGraph &graph,
             // the node is present in the underlying graph, so use
             // lower-level methods
             const auto &boss = dbg_succ.get_boss();
-            boss::BOSS::edge_index edge = dbg_succ.kmer_to_boss_index(nodes_[0]);
+            boss::BOSS::edge_index edge = nodes_[0];
             boss::BOSS::TAlphabet edge_label = boss.get_W(edge) % boss.alph_size;
 
             // TODO: This picks the node which is found by always traversing
@@ -565,7 +565,7 @@ void Alignment::reverse_complement(const DeBruijnGraph &graph,
                     return;
                 }
 
-                nodes_[0] = dbg_succ.boss_to_kmer_index(edge);
+                nodes_[0] = dbg_succ.validate_edge(edge);
                 assert(nodes_[0]);
                 sequence_.push_back(boss.decode(edge_label));
                 assert(graph.get_node_sequence(nodes_[0])
diff --git a/metagraph/src/graph/alignment/annotation_buffer.cpp b/metagraph/src/graph/alignment/annotation_buffer.cpp
index 4020f312a7..a644bf2933 100644
--- a/metagraph/src/graph/alignment/annotation_buffer.cpp
+++ b/metagraph/src/graph/alignment/annotation_buffer.cpp
@@ -78,7 +78,7 @@ void AnnotationBuffer::fetch_queued_annotations() {
                 continue;
             }
 
-            if (boss && !boss->get_W(dbg_succ->kmer_to_boss_index(base_path[i]))) {
+            if (boss && !boss->get_W(base_path[i])) {
                 // skip dummy nodes
                 if (node_to_cols_.try_emplace(base_path[i], 0).second && has_coordinates())
                     label_coords_.emplace_back();
diff --git a/metagraph/src/graph/graph_extensions/node_first_cache.cpp b/metagraph/src/graph/graph_extensions/node_first_cache.cpp
index a945acf12f..2534dbbeda 100644
--- a/metagraph/src/graph/graph_extensions/node_first_cache.cpp
+++ b/metagraph/src/graph/graph_extensions/node_first_cache.cpp
@@ -36,14 +36,14 @@ void NodeFirstCache::call_incoming_edges(edge_index edge,
 
 void NodeFirstCache::call_incoming_kmers(node_index node,
                                          const IncomingEdgeCallback &callback) const {
-    assert(node > 0 && node <= dbg_succ_.num_nodes());
+    assert(dbg_succ_.is_valid(node));
 
-    edge_index edge = dbg_succ_.kmer_to_boss_index(node);
+    edge_index edge = node;
 
     call_incoming_edges(edge,
         [&](edge_index prev_edge) {
-            node_index prev = dbg_succ_.boss_to_kmer_index(prev_edge);
-            if (prev != DeBruijnGraph::npos)
+            node_index prev = prev_edge;
+            if (dbg_succ_.is_valid(prev))
                 callback(prev, get_first_char(prev_edge, edge));
         }
     );
diff --git a/metagraph/src/graph/representation/base/sequence_graph.cpp b/metagraph/src/graph/representation/base/sequence_graph.cpp
index bd6d55c485..89ecdfa16f 100644
--- a/metagraph/src/graph/representation/base/sequence_graph.cpp
+++ b/metagraph/src/graph/representation/base/sequence_graph.cpp
@@ -23,11 +23,11 @@ static_assert(!(kBlockSize & 0xFF));
 /*************** SequenceGraph ***************/
 
 void SequenceGraph::call_nodes(const std::function<void(node_index)> &callback,
-                               const std::function<bool()> &stop_early) const {
+                               const std::function<bool()> &terminate) const {
     assert(num_nodes() == max_index());
 
     const auto nnodes = num_nodes();
-    for (node_index i = 1; i <= nnodes && !stop_early(); ++i) {
+    for (node_index i = 1; i <= nnodes && !terminate(); ++i) {
         callback(i);
     }
 }
diff --git a/metagraph/src/graph/representation/base/sequence_graph.hpp b/metagraph/src/graph/representation/base/sequence_graph.hpp
index a1b8625b62..72ff596e82 100644
--- a/metagraph/src/graph/representation/base/sequence_graph.hpp
+++ b/metagraph/src/graph/representation/base/sequence_graph.hpp
@@ -1,6 +1,8 @@
 #ifndef __SEQUENCE_GRAPH_HPP__
 #define __SEQUENCE_GRAPH_HPP__
 
+#include "common/vectors/bit_vector.hpp"
+
 #include <vector>
 #include <string>
 #include <functional>
@@ -60,7 +62,7 @@ class SequenceGraph {
                                          const std::function<void(node_index)> &callback) const = 0;
 
     virtual void call_nodes(const std::function<void(node_index)> &callback,
-                            const std::function<bool()> &stop_early = [](){ return false; }) const;
+                            const std::function<bool()> &terminate = [](){ return false; }) const;
 
     virtual uint64_t num_nodes() const = 0;
     virtual uint64_t max_index() const { return num_nodes(); };
@@ -203,6 +205,7 @@ class DeBruijnGraph : public SequenceGraph {
                             const std::function<bool()> &stop_early = [](){ return false; }) const;
 
     virtual size_t outdegree(node_index) const = 0;
+    virtual bool has_no_outgoing(node_index node) const { return outdegree(node) == 0; }
     virtual bool has_single_outgoing(node_index node) const { return outdegree(node) == 1; }
     virtual bool has_multiple_outgoing(node_index node) const { return outdegree(node) > 1; }
 
diff --git a/metagraph/src/graph/representation/canonical_dbg.cpp b/metagraph/src/graph/representation/canonical_dbg.cpp
index 39b3798001..659a584de1 100644
--- a/metagraph/src/graph/representation/canonical_dbg.cpp
+++ b/metagraph/src/graph/representation/canonical_dbg.cpp
@@ -115,7 +115,7 @@ ::map_to_nodes_sequentially(std::string_view sequence,
                                                    sequence.substr(1));
         boss.map_to_edges(sequence.substr(1),
             [&](boss::BOSS::edge_index edge) {
-                path.push_back(dbg_succ->boss_to_kmer_index(edge));
+                path.push_back(dbg_succ->validate_edge(edge));
                 ++it;
             },
             []() { return false; },
@@ -285,7 +285,6 @@ void CanonicalDBG::call_incoming_kmers(node_index node,
     SmallVector<node_index> parents(alphabet.size(), npos);
     // "- has_sentinel_" because there can't be a dummy sink with another non-dummy edge
     size_t max_num_edges_left = parents.size() - has_sentinel_;
-
     auto incoming_kmer_callback = [&](node_index prev, char c) {
         assert(has_sentinel_ || c != boss::BOSS::kSentinel);
         assert(c == boss::BOSS::kSentinel || traverse_back(node, c) == prev);
@@ -601,18 +600,15 @@ ::adjacent_incoming_rc_strand(node_index node,
         //-> TCAAGCAGAAGACGGCATACGAGATCCTCT
         const boss::BOSS &boss = dbg_succ_->get_boss();
 
-        boss::BOSS::edge_index rc_edge = get_cache().get_prefix_rc(
-            dbg_succ_->kmer_to_boss_index(node),
-            spelling_hint
-        );
+        boss::BOSS::edge_index rc_edge = get_cache().get_prefix_rc(node, spelling_hint);
 
         if (!rc_edge)
             return;
 
         boss.call_outgoing(rc_edge, [&](boss::BOSS::edge_index adjacent_edge) {
             assert(dbg_succ_);
-            node_index prev = dbg_succ_->boss_to_kmer_index(adjacent_edge);
-            if (prev == DeBruijnGraph::npos)
+            node_index prev = adjacent_edge;
+            if (!dbg_succ_->is_valid(prev))
                 return;
 
             char c = boss.decode(boss.get_W(adjacent_edge) % boss.alph_size);
@@ -665,18 +661,15 @@ ::adjacent_outgoing_rc_strand(node_index node,
 
         auto &cache = get_cache();
 
-        boss::BOSS::edge_index rc_edge = cache.get_suffix_rc(
-            dbg_succ_->kmer_to_boss_index(node),
-            spelling_hint
-        );
+        boss::BOSS::edge_index rc_edge = cache.get_suffix_rc(node, spelling_hint);
 
         if (!rc_edge)
             return;
 
         cache.call_incoming_edges(rc_edge,
             [&](edge_index prev_edge) {
-                node_index prev = dbg_succ_->boss_to_kmer_index(prev_edge);
-                if (!prev)
+                node_index prev = prev_edge;
+                if (!dbg_succ_->is_valid(prev))
                     return;
 
                 char c = cache.get_first_char(prev_edge, rc_edge);
diff --git a/metagraph/src/graph/representation/masked_graph.cpp b/metagraph/src/graph/representation/masked_graph.cpp
index 319a936237..68b721b099 100644
--- a/metagraph/src/graph/representation/masked_graph.cpp
+++ b/metagraph/src/graph/representation/masked_graph.cpp
@@ -91,14 +91,14 @@ bit_vector_stat get_boss_mask(const DBGSuccinct &dbg_succ,
     sdsl::bit_vector mask_bv(dbg_succ.get_boss().num_edges() + 1, false);
     if (only_valid_nodes_in_mask) {
         kmers_in_graph.call_ones([&](auto i) {
-            assert(dbg_succ.kmer_to_boss_index(i));
-            mask_bv[dbg_succ.kmer_to_boss_index(i)] = true;
+            assert(i);
+            mask_bv[i] = true;
         });
     } else {
         dbg_succ.call_nodes([&](auto i) {
-            assert(dbg_succ.kmer_to_boss_index(i));
+            assert(i);
             if (kmers_in_graph[i])
-                mask_bv[dbg_succ.kmer_to_boss_index(i)] = true;
+                mask_bv[i] = true;
         });
     }
     return bit_vector_stat(std::move(mask_bv));
@@ -113,7 +113,7 @@ void MaskedDeBruijnGraph::call_sequences(const CallPath &callback,
 
         dbg_succ->get_boss().call_sequences([&](std::string&& sequence, auto&& path) {
             for (auto &node : path) {
-                node = dbg_succ->boss_to_kmer_index(node);
+                node = dbg_succ->validate_edge(node);
             }
             callback(sequence, path);
 
@@ -134,7 +134,7 @@ void MaskedDeBruijnGraph::call_unitigs(const CallPath &callback,
 
         dbg_succ->get_boss().call_unitigs([&](std::string&& sequence, auto&& path) {
             for (auto &node : path) {
-                node = dbg_succ->boss_to_kmer_index(node);
+                node = dbg_succ->validate_edge(node);
             }
             callback(sequence, path);
 
diff --git a/metagraph/src/graph/representation/succinct/dbg_succinct.cpp b/metagraph/src/graph/representation/succinct/dbg_succinct.cpp
index 915c76af49..a700360aef 100644
--- a/metagraph/src/graph/representation/succinct/dbg_succinct.cpp
+++ b/metagraph/src/graph/representation/succinct/dbg_succinct.cpp
@@ -82,27 +82,27 @@ bool DBGSuccinct::find(std::string_view sequence,
 
 // Traverse the outgoing edge
 node_index DBGSuccinct::traverse(node_index node, char next_char) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
     // return npos if the character is invalid
     if (boss_graph_->encode(next_char) == boss_graph_->alph_size)
         return npos;
 
     // dbg node is a boss edge
-    BOSS::edge_index boss_edge = kmer_to_boss_index(node);
+    BOSS::edge_index boss_edge = node;
     boss_edge = boss_graph_->fwd(boss_edge);
-    return boss_to_kmer_index(
+    return validate_edge(
         boss_graph_->pick_edge(boss_edge, boss_graph_->encode(next_char))
     );
 }
 
 // Traverse the incoming edge
 node_index DBGSuccinct::traverse_back(node_index node, char prev_char) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
     // dbg node is a boss edge
-    BOSS::edge_index edge = boss_graph_->bwd(kmer_to_boss_index(node));
-    return boss_to_kmer_index(
+    BOSS::edge_index edge = boss_graph_->bwd(node);
+    return validate_edge(
         boss_graph_->pick_incoming_edge(edge, boss_graph_->encode(prev_char))
     );
 }
@@ -128,11 +128,11 @@ inline void call_outgoing(const BOSS &boss,
 
 void DBGSuccinct::call_outgoing_kmers(node_index node,
                                       const OutgoingEdgeCallback &callback) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    call_outgoing(*boss_graph_, kmer_to_boss_index(node), [&](auto i) {
-        auto next = boss_to_kmer_index(i);
-        if (next != npos)
+    call_outgoing(*boss_graph_, node, [&](auto i) {
+        auto next = i;
+        if (is_valid(next))
             callback(next, boss_graph_->decode(boss_graph_->get_W(i)
                                 % boss_graph_->alph_size));
     });
@@ -140,9 +140,9 @@ void DBGSuccinct::call_outgoing_kmers(node_index node,
 
 void DBGSuccinct::call_incoming_kmers(node_index node,
                                       const IncomingEdgeCallback &callback) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto edge = kmer_to_boss_index(node);
+    auto edge = node;
 
     boss_graph_->call_incoming_to_target(boss_graph_->bwd(edge),
         boss_graph_->get_node_last_value(edge),
@@ -150,8 +150,8 @@ void DBGSuccinct::call_incoming_kmers(node_index node,
             assert(boss_graph_->get_W(incoming_boss_edge) % boss_graph_->alph_size
                     == boss_graph_->get_node_last_value(edge));
 
-            auto prev = boss_to_kmer_index(incoming_boss_edge);
-            if (prev != npos) {
+            auto prev = incoming_boss_edge;
+            if (is_valid(prev)) {
                 callback(prev,
                     boss_graph_->decode(
                         boss_graph_->get_minus_k_value(incoming_boss_edge, get_k() - 2).first
@@ -164,20 +164,20 @@ void DBGSuccinct::call_incoming_kmers(node_index node,
 
 void DBGSuccinct::adjacent_outgoing_nodes(node_index node,
                                           const std::function<void(node_index)> &callback) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    call_outgoing(*boss_graph_, kmer_to_boss_index(node), [&](auto i) {
-        auto next = boss_to_kmer_index(i);
-        if (next != npos)
+    call_outgoing(*boss_graph_, node, [&](auto i) {
+        auto next = i;
+        if (is_valid(next))
             callback(next);
     });
 }
 
 void DBGSuccinct::adjacent_incoming_nodes(node_index node,
                                           const std::function<void(node_index)> &callback) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto edge = kmer_to_boss_index(node);
+    auto edge = node;
 
     boss_graph_->call_incoming_to_target(boss_graph_->bwd(edge),
         boss_graph_->get_node_last_value(edge),
@@ -185,13 +185,32 @@ void DBGSuccinct::adjacent_incoming_nodes(node_index node,
             assert(boss_graph_->get_W(incoming_boss_edge) % boss_graph_->alph_size
                     == boss_graph_->get_node_last_value(edge));
 
-            auto prev = boss_to_kmer_index(incoming_boss_edge);
-            if (prev != npos)
+            auto prev = incoming_boss_edge;
+            if (is_valid(prev))
                 callback(prev);
         }
     );
 }
 
+void DBGSuccinct::call_nodes(const std::function<void(node_index)> &callback,
+                             const std::function<bool()> &terminate) const {
+    if (valid_edges_) {
+        try {
+            valid_edges_->call_ones([&](uint64_t i) {
+                callback(i);
+                if (terminate())
+                    throw early_term();
+            });
+        } catch (early_term&) {}
+        return;
+    }
+    for (node_index i = 1; i <= max_index() && !terminate(); ++i) {
+        if (is_valid(i)) {
+            callback(i);
+        }
+    }
+}
+
 void DBGSuccinct::add_sequence(std::string_view sequence,
                                const std::function<void(node_index)> &on_insertion) {
     if (sequence.size() < get_k())
@@ -223,7 +242,7 @@ void DBGSuccinct::add_sequence(std::string_view sequence,
 
         // Call all new nodes inserted including the dummy ones, unless they
         // are masked out.
-        on_insertion(boss_to_kmer_index(new_boss_edge));
+        on_insertion(validate_edge(new_boss_edge));
     }
 
     assert(!valid_edges_.get() || !(*valid_edges_)[0]);
@@ -234,9 +253,9 @@ void DBGSuccinct::add_sequence(std::string_view sequence,
 }
 
 std::string DBGSuccinct::get_node_sequence(node_index node) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto boss_edge = kmer_to_boss_index(node);
+    auto boss_edge = node;
 
     return boss_graph_->get_node_str(boss_edge)
             + boss_graph_->decode(boss_graph_->get_W(boss_edge) % boss_graph_->alph_size);
@@ -256,7 +275,7 @@ void DBGSuccinct::map_to_nodes_sequentially(std::string_view sequence,
 
     boss_graph_->map_to_edges(
         sequence,
-        [&](BOSS::edge_index i) { callback(boss_to_kmer_index(i)); },
+        [&](BOSS::edge_index i) { callback(validate_edge(i)); },
         terminate,
         [&]() {
             if (!is_missing())
@@ -297,8 +316,8 @@ ::call_nodes_with_suffix_matching_longest_prefix(
         assert(first == last);
         auto edge = boss_graph_->pick_edge(last, encoded.back());
         if (edge) {
-            auto kmer_index = boss_to_kmer_index(edge);
-            if (kmer_index != npos) {
+            auto kmer_index = edge;
+            if (is_valid(kmer_index)) {
                 assert(str.size() == get_k());
                 assert(get_node_sequence(kmer_index) == str);
                 callback(kmer_index, get_k());
@@ -322,8 +341,8 @@ ::call_nodes_with_suffix_matching_longest_prefix(
             boss_graph_->call_incoming_to_target(boss_graph_->bwd(e),
                 boss_graph_->get_node_last_value(e),
                 [&](BOSS::edge_index incoming_edge_idx) {
-                    auto kmer_index = boss_to_kmer_index(incoming_edge_idx);
-                    if (kmer_index != npos) {
+                    auto kmer_index = incoming_edge_idx;
+                    if (is_valid(kmer_index)) {
                         assert(get_node_sequence(kmer_index).substr(get_k() - match_size)
                             == str.substr(0, match_size));
                         nodes.emplace_back(kmer_index);
@@ -344,8 +363,8 @@ ::call_nodes_with_suffix_matching_longest_prefix(
             boss_graph_->call_incoming_to_target(boss_graph_->bwd(e),
                 boss_graph_->get_node_last_value(e),
                 [&](BOSS::edge_index incoming_edge_idx) {
-                    auto kmer_index = boss_to_kmer_index(incoming_edge_idx);
-                    if (kmer_index != npos) {
+                    auto kmer_index = incoming_edge_idx;
+                    if (is_valid(kmer_index)) {
                         assert(get_node_sequence(kmer_index).substr(get_k() - match_size)
                             == str.substr(0, match_size));
                         callback(kmer_index, match_size);
@@ -361,13 +380,13 @@ void DBGSuccinct::traverse(node_index start,
                            const char *end,
                            const std::function<void(node_index)> &callback,
                            const std::function<bool()> &terminate) const {
-    assert(start > 0 && start <= num_nodes());
+    assert(is_valid(start));
     assert(end >= begin);
 
     if (terminate())
         return;
 
-    auto edge = kmer_to_boss_index(start);
+    auto edge = start;
     assert(edge);
 
     BOSS::TAlphabet w;
@@ -379,8 +398,8 @@ void DBGSuccinct::traverse(node_index start,
         edge = boss_graph_->fwd(edge, w % boss_graph_->alph_size);
         edge = boss_graph_->pick_edge(edge, boss_graph_->encode(*begin));
 
-        start = boss_to_kmer_index(edge);
-        if (start == npos)
+        start = edge;
+        if (!is_valid(start))
             return;
 
         callback(start);
@@ -442,13 +461,13 @@ void DBGSuccinct::map_to_nodes(std::string_view sequence,
         for (size_t i = 0; i < boss_edges.size() && !terminate(); ++i) {
             // the definition of a canonical k-mer is redefined:
             //      use k-mer with smaller index in the BOSS table.
-            callback(boss_to_kmer_index(boss_edges[i]));
+            callback(validate_edge(boss_edges[i]));
         }
 
     } else {
         boss_graph_->map_to_edges(
             sequence,
-            [&](BOSS::edge_index i) { callback(boss_to_kmer_index(i)); },
+            [&](BOSS::edge_index i) { callback(validate_edge(i)); },
             terminate,
             [&]() {
                 if (!is_missing())
@@ -468,7 +487,7 @@ void DBGSuccinct::call_sequences(const CallPath &callback,
     boss_graph_->call_sequences(
         [&](std::string&& seq, auto&& path) {
             for (auto &node : path) {
-                node = boss_to_kmer_index(node);
+                node = validate_edge(node);
             }
             callback(std::move(seq), std::move(path));
         },
@@ -485,7 +504,7 @@ void DBGSuccinct::call_unitigs(const CallPath &callback,
     boss_graph_->call_unitigs(
         [&](std::string&& seq, auto&& path) {
             for (auto &node : path) {
-                node = boss_to_kmer_index(node);
+                node = validate_edge(node);
             }
             callback(std::move(seq), std::move(path));
         },
@@ -500,8 +519,8 @@ ::call_kmers(const std::function<void(node_index, const std::string&)> &callback
              const std::function<bool()> &stop_early) const {
     assert(boss_graph_.get());
     boss_graph_->call_kmers([&](auto index, const std::string &seq) {
-        auto node = boss_to_kmer_index(index);
-        assert(node != npos);
+        auto node = index;
+        assert(is_valid(node));
         callback(node, seq);
     }, stop_early);
 }
@@ -509,17 +528,17 @@ ::call_kmers(const std::function<void(node_index, const std::string&)> &callback
 void DBGSuccinct
 ::call_source_nodes(const std::function<void(node_index)> &callback) const {
     boss_graph_->call_start_edges([&](auto boss_edge) {
-        auto node = boss_to_kmer_index(boss_edge);
-        assert(node != npos);
+        auto node = boss_edge;
+        assert(is_valid(node));
         assert(!indegree(node));
         callback(node);
     });
 }
 
 size_t DBGSuccinct::outdegree(node_index node) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto boss_edge = kmer_to_boss_index(node);
+    auto boss_edge = node;
 
     if (boss_edge == 1)
         return boss_graph_->succ_last(1) - 1;
@@ -543,9 +562,9 @@ size_t DBGSuccinct::outdegree(node_index node) const {
 }
 
 bool DBGSuccinct::has_single_outgoing(node_index node) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto boss_edge = kmer_to_boss_index(node);
+    auto boss_edge = node;
 
     if (boss_edge == 1)
         return boss_graph_->succ_last(1) == 2;
@@ -569,9 +588,9 @@ bool DBGSuccinct::has_single_outgoing(node_index node) const {
 }
 
 bool DBGSuccinct::has_multiple_outgoing(node_index node) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto boss_edge = kmer_to_boss_index(node);
+    auto boss_edge = node;
 
     if (boss_edge == 1)
         return boss_graph_->succ_last(1) > 2;
@@ -586,9 +605,9 @@ bool DBGSuccinct::has_multiple_outgoing(node_index node) const {
 }
 
 size_t DBGSuccinct::indegree(node_index node) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto boss_edge = kmer_to_boss_index(node);
+    auto boss_edge = node;
 
     if (boss_edge == 1)
         return 1;
@@ -602,9 +621,9 @@ size_t DBGSuccinct::indegree(node_index node) const {
 }
 
 bool DBGSuccinct::has_no_incoming(node_index node) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto boss_edge = kmer_to_boss_index(node);
+    auto boss_edge = node;
 
     if (boss_edge == 1)
         return false;
@@ -618,9 +637,9 @@ bool DBGSuccinct::has_no_incoming(node_index node) const {
 }
 
 bool DBGSuccinct::has_single_incoming(node_index node) const {
-    assert(node > 0 && node <= num_nodes());
+    assert(is_valid(node));
 
-    auto boss_edge = kmer_to_boss_index(node);
+    auto boss_edge = node;
 
     if (boss_edge == 1)
         return false;
@@ -645,6 +664,10 @@ uint64_t DBGSuccinct::num_nodes() const {
                 : boss_graph_->num_edges();
 }
 
+uint64_t DBGSuccinct::max_index() const {
+    return boss_graph_->num_edges();
+}
+
 bool DBGSuccinct::load_without_mask(const std::string &filename) {
     // release the old mask
     valid_edges_.reset();
@@ -888,27 +911,31 @@ void DBGSuccinct::mask_dummy_kmers(size_t num_threads, bool with_pruning) {
     assert(!(*valid_edges_)[0]);
 }
 
-uint64_t DBGSuccinct::kmer_to_boss_index(node_index node) const {
-    assert(node > 0);
-    assert(node <= num_nodes());
+bool DBGSuccinct::is_valid(node_index node) const {
+    return 0 < node && node <= max_index() && (!valid_edges_ || (*valid_edges_)[node]);
+}
+node_index DBGSuccinct::validate_edge(node_index node) const {
+    return is_valid(node) ? node : npos;
+}
+node_index DBGSuccinct::select_node(uint64_t rank) const {
+    assert(rank <= num_nodes());
 
-    if (!valid_edges_.get())
-        return node;
+    if (!valid_edges_.get() || !rank)
+        return rank;
 
-    return valid_edges_->select1(node);
+    return valid_edges_->select1(rank);
 }
 
-DBGSuccinct::node_index DBGSuccinct::boss_to_kmer_index(uint64_t boss_index) const {
-    assert(boss_index <= boss_graph_->num_edges());
-    assert(!valid_edges_.get() || boss_index < valid_edges_->size());
+uint64_t DBGSuccinct::rank_node(node_index node) const {
+    assert(node <= max_index());
 
-    if (!valid_edges_.get() || !boss_index)
-        return boss_index;
+    if (!valid_edges_.get() || !node)
+        return node;
 
-    if (!(*valid_edges_)[boss_index])
+    if (!(*valid_edges_)[node])
         return npos;
 
-    return valid_edges_->rank1(boss_index);
+    return valid_edges_->rank1(node);
 }
 
 void DBGSuccinct
diff --git a/metagraph/src/graph/representation/succinct/dbg_succinct.hpp b/metagraph/src/graph/representation/succinct/dbg_succinct.hpp
index bdbabe3104..8e92510ba9 100644
--- a/metagraph/src/graph/representation/succinct/dbg_succinct.hpp
+++ b/metagraph/src/graph/representation/succinct/dbg_succinct.hpp
@@ -37,6 +37,9 @@ class DBGSuccinct : public DeBruijnGraph {
     virtual void adjacent_incoming_nodes(node_index node,
                                          const std::function<void(node_index)> &callback) const override final;
 
+    virtual void call_nodes(const std::function<void(node_index)> &callback,
+                            const std::function<bool()> &terminate = [](){ return false; }) const override final;
+
     // Insert sequence to graph and invoke callback |on_insertion| for each new
     // node index augmenting the range [1,...,max_index], including those not
     // pointing to any real node in graph. That is, the callback is invoked for
@@ -110,6 +113,7 @@ class DBGSuccinct : public DeBruijnGraph {
      * edges in the BOSS graph (because an edge in the BOSS graph represents a k-mer).
      */
     virtual uint64_t num_nodes() const override final;
+    virtual uint64_t max_index() const override final;
 
     virtual void mask_dummy_kmers(size_t num_threads, bool with_pruning) final;
 
@@ -174,8 +178,10 @@ class DBGSuccinct : public DeBruijnGraph {
 
     virtual void call_source_nodes(const std::function<void(node_index)> &callback) const override final;
 
-    uint64_t kmer_to_boss_index(node_index kmer_index) const;
-    node_index boss_to_kmer_index(uint64_t boss_index) const;
+    bool is_valid(node_index node) const;
+    node_index validate_edge(node_index node) const;
+    node_index select_node(uint64_t rank) const;
+    uint64_t rank_node(node_index node) const;
 
     void initialize_bloom_filter_from_fpr(double false_positive_rate,
                                           uint32_t max_num_hash_functions = -1);
diff --git a/metagraph/tests/annotation/row_diff/test_row_diff.cpp b/metagraph/tests/annotation/row_diff/test_row_diff.cpp
index 158171a44e..5bf9d0416b 100644
--- a/metagraph/tests/annotation/row_diff/test_row_diff.cpp
+++ b/metagraph/tests/annotation/row_diff/test_row_diff.cpp
@@ -17,6 +17,10 @@ using ::testing::_;
 using mtg::annot::matrix::RowDiff;
 using mtg::annot::matrix::ColumnMajor;
 
+static auto graph_to_anno_index(graph::DeBruijnGraph::node_index node) {
+    return graph::AnnotatedDBG::graph_to_anno_index(node);
+}
+
 typedef RowDiff<ColumnMajor>::anchor_bv_type anchor_bv_type;
 
 TEST(RowDiff, Empty) {
@@ -95,28 +99,28 @@ TEST(RowDiff, GetRows) {
     annot.load_anchor(fterm_temp.name());
 
     auto rows = annot.get_rows({ 3, 3, 3, 3, 5, 5, 6, 7, 8, 9, 10, 11 });
-    EXPECT_EQ("CTAG", graph.get_node_sequence(4));
+    EXPECT_EQ("CTAG", graph.get_node_sequence(graph.select_node(4)));
     ASSERT_THAT(rows[3], ElementsAre(0, 1));
 
-    EXPECT_EQ("AGCT", graph.get_node_sequence(6));
+    EXPECT_EQ("AGCT", graph.get_node_sequence(graph.select_node(6)));
     ASSERT_THAT(rows[5], ElementsAre(1));
 
-    EXPECT_EQ("CTCT", graph.get_node_sequence(7));
+    EXPECT_EQ("CTCT", graph.get_node_sequence(graph.select_node(7)));
     ASSERT_THAT(rows[6], ElementsAre(0));
 
-    EXPECT_EQ("TAGC", graph.get_node_sequence(8));
+    EXPECT_EQ("TAGC", graph.get_node_sequence(graph.select_node(8)));
     ASSERT_THAT(rows[7], ElementsAre(1));
 
-    EXPECT_EQ("ACTA", graph.get_node_sequence(9));
+    EXPECT_EQ("ACTA", graph.get_node_sequence(graph.select_node(9)));
     ASSERT_THAT(rows[8], ElementsAre(1));
 
-    EXPECT_EQ("ACTC", graph.get_node_sequence(10));
+    EXPECT_EQ("ACTC", graph.get_node_sequence(graph.select_node(10)));
     ASSERT_THAT(rows[9], ElementsAre(0));
 
-    EXPECT_EQ("GCTA", graph.get_node_sequence(11));
+    EXPECT_EQ("GCTA", graph.get_node_sequence(graph.select_node(11)));
     ASSERT_THAT(rows[10], ElementsAre(1));
 
-    EXPECT_EQ("TCTA", graph.get_node_sequence(12));
+    EXPECT_EQ("TCTA", graph.get_node_sequence(graph.select_node(12)));
     ASSERT_THAT(rows[11], ElementsAre(0));
 }
 
@@ -149,28 +153,28 @@ TEST(RowDiff, GetAnnotation) {
     RowDiff<ColumnMajor> annot(&graph, std::move(mat));
     annot.load_anchor(fterm_temp.name());
 
-    EXPECT_EQ("CTAG", graph.get_node_sequence(4));
+    EXPECT_EQ("CTAG", graph.get_node_sequence(graph.select_node(4)));
     ASSERT_THAT(annot.get_rows({3})[0], ElementsAre(0, 1));
 
-    EXPECT_EQ("AGCT", graph.get_node_sequence(6));
+    EXPECT_EQ("AGCT", graph.get_node_sequence(graph.select_node(6)));
     ASSERT_THAT(annot.get_rows({5})[0], ElementsAre(1));
 
-    EXPECT_EQ("CTCT", graph.get_node_sequence(7));
+    EXPECT_EQ("CTCT", graph.get_node_sequence(graph.select_node(7)));
     ASSERT_THAT(annot.get_rows({6})[0], ElementsAre(0));
 
-    EXPECT_EQ("TAGC", graph.get_node_sequence(8));
+    EXPECT_EQ("TAGC", graph.get_node_sequence(graph.select_node(8)));
     ASSERT_THAT(annot.get_rows({7})[0], ElementsAre(1));
 
-    EXPECT_EQ("ACTA", graph.get_node_sequence(9));
+    EXPECT_EQ("ACTA", graph.get_node_sequence(graph.select_node(9)));
     ASSERT_THAT(annot.get_rows({8})[0], ElementsAre(1));
 
-    EXPECT_EQ("ACTC", graph.get_node_sequence(10));
+    EXPECT_EQ("ACTC", graph.get_node_sequence(graph.select_node(10)));
     ASSERT_THAT(annot.get_rows({9})[0], ElementsAre(0));
 
-    EXPECT_EQ("GCTA", graph.get_node_sequence(11));
+    EXPECT_EQ("GCTA", graph.get_node_sequence(graph.select_node(11)));
     ASSERT_THAT(annot.get_rows({10})[0], ElementsAre(1));
 
-    EXPECT_EQ("TCTA", graph.get_node_sequence(12));
+    EXPECT_EQ("TCTA", graph.get_node_sequence(graph.select_node(12)));
     ASSERT_THAT(annot.get_rows({11})[0], ElementsAre(0));
 }
 
@@ -187,47 +191,66 @@ TEST(RowDiff, GetAnnotationMasked) {
     graph.mask_dummy_kmers(1, false);
 
     // build annotation
-    sdsl::bit_vector bterminal = { 0, 0, 0, 0, 1, 0, 1, 0 };
+    sdsl::bit_vector bterminal_masked = { 0, 0, 0, 0, 1, 0, 1, 0 };
+    sdsl::bit_vector bterminal(graph.max_index() + 1);
+    sdsl::bit_vector cols_masked[2] = {
+        { 1, 0, 0, 0, 0, 0, 0, 0 },
+        { 0, 0, 0, 0, 1, 0, 1, 1 }
+    };
+    sdsl::bit_vector cols_concrete[2];
+    cols_concrete[0].resize(graph.max_index() + 1);
+    cols_concrete[1].resize(graph.max_index() + 1);
+    graph.call_nodes([&](auto i) {
+        auto rank = graph_to_anno_index(graph.rank_node(i));
+        bterminal[graph_to_anno_index(i)] = bterminal_masked[rank];
+        cols_concrete[0][graph_to_anno_index(i)] = cols_masked[0][rank];
+        cols_concrete[1][graph_to_anno_index(i)] = cols_masked[1][rank];
+    });
     anchor_bv_type terminal(bterminal);
     utils::TempFile fterm_temp;
     std::ofstream fterm(fterm_temp.name(), ios::binary);
     terminal.serialize(fterm);
     fterm.flush();
-
+    
     std::vector<std::unique_ptr<bit_vector>> cols(2);
-    cols[0] = std::make_unique<bit_vector_sd>(
-            std::initializer_list<bool>({ 1, 0, 0, 0, 0, 0, 0, 0 }));
-    cols[1] = std::make_unique<bit_vector_sd>(
-            std::initializer_list<bool>({ 0, 0, 0, 0, 1, 0, 1, 1 }));
+    cols[0] = std::make_unique<bit_vector_sd>(std::move(cols_concrete[0]));
+    cols[1] = std::make_unique<bit_vector_sd>(std::move(cols_concrete[1]));
 
     ColumnMajor mat(std::move(cols));
 
     RowDiff<ColumnMajor> annot(&graph, std::move(mat));
     annot.load_anchor(fterm_temp.name());
+    EXPECT_EQ("CTAG", graph.get_node_sequence(graph.select_node(1)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(1))})[0],
+                ElementsAre(0, 1));
 
-    EXPECT_EQ("CTAG", graph.get_node_sequence(1));
-    ASSERT_THAT(annot.get_rows({0})[0], ElementsAre(0, 1));
+    EXPECT_EQ("AGCT", graph.get_node_sequence(graph.select_node(2)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(2))})[0],
+                ElementsAre(1));
 
-    EXPECT_EQ("AGCT", graph.get_node_sequence(2));
-    ASSERT_THAT(annot.get_rows({1})[0], ElementsAre(1));
+    EXPECT_EQ("CTCT", graph.get_node_sequence(graph.select_node(3)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(3))})[0],
+                ElementsAre(0));
 
-    EXPECT_EQ("CTCT", graph.get_node_sequence(3));
-    ASSERT_THAT(annot.get_rows({2})[0], ElementsAre(0));
+    EXPECT_EQ("TAGC", graph.get_node_sequence(graph.select_node(4)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(4))})[0],
+                ElementsAre(1));
 
-    EXPECT_EQ("TAGC", graph.get_node_sequence(4));
-    ASSERT_THAT(annot.get_rows({3})[0], ElementsAre(1));
+    EXPECT_EQ("ACTA", graph.get_node_sequence(graph.select_node(5)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(5))})[0],
+                ElementsAre(1));
 
-    EXPECT_EQ("ACTA", graph.get_node_sequence(5));
-    ASSERT_THAT(annot.get_rows({4})[0], ElementsAre(1));
-
-    EXPECT_EQ("ACTC", graph.get_node_sequence(6));
-    ASSERT_THAT(annot.get_rows({5})[0], ElementsAre(0));
+    EXPECT_EQ("ACTC", graph.get_node_sequence(graph.select_node(6)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(6))})[0],
+                ElementsAre(0));
 
-    EXPECT_EQ("GCTA", graph.get_node_sequence(7));
-    ASSERT_THAT(annot.get_rows({6})[0], ElementsAre(1));
+    EXPECT_EQ("GCTA", graph.get_node_sequence(graph.select_node(7)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(7))})[0],
+                ElementsAre(1));
 
-    EXPECT_EQ("TCTA", graph.get_node_sequence(8));
-    ASSERT_THAT(annot.get_rows({7})[0], ElementsAre(0));
+    EXPECT_EQ("TCTA", graph.get_node_sequence(graph.select_node(8)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(8))})[0],
+                ElementsAre(0));
 }
 
 /**
@@ -260,34 +283,34 @@ TEST(RowDiff, GetAnnotationBifurcation) {
     RowDiff<ColumnMajor> annot(&graph, std::move(mat));
     annot.load_anchor(fterm_temp.name());
 
-    EXPECT_EQ("CTAG", graph.get_node_sequence(4));
+    EXPECT_EQ("CTAG", graph.get_node_sequence(graph.select_node(4)));
     ASSERT_THAT(annot.get_rows({3})[0], ElementsAre(0, 1));
 
-    EXPECT_EQ("CTAT", graph.get_node_sequence(5));
+    EXPECT_EQ("CTAT", graph.get_node_sequence(graph.select_node(5)));
     ASSERT_THAT(annot.get_rows({4})[0], ElementsAre(1));
 
-    EXPECT_EQ("TACT", graph.get_node_sequence(6));
+    EXPECT_EQ("TACT", graph.get_node_sequence(graph.select_node(6)));
     ASSERT_THAT(annot.get_rows({5})[0], ElementsAre(0));
 
-    EXPECT_EQ("AGCT", graph.get_node_sequence(7));
+    EXPECT_EQ("AGCT", graph.get_node_sequence(graph.select_node(7)));
     ASSERT_THAT(annot.get_rows({6})[0], ElementsAre(0, 1));
 
-    EXPECT_EQ("CTCT", graph.get_node_sequence(8));
+    EXPECT_EQ("CTCT", graph.get_node_sequence(graph.select_node(8)));
     ASSERT_THAT(annot.get_rows({7})[0], ElementsAre(1));
 
-    EXPECT_EQ("TAGC", graph.get_node_sequence(9));
+    EXPECT_EQ("TAGC", graph.get_node_sequence(graph.select_node(9)));
     ASSERT_THAT(annot.get_rows({8})[0], ElementsAre(0, 1));
 
-    EXPECT_EQ("ACTA", graph.get_node_sequence(12));
+    EXPECT_EQ("ACTA", graph.get_node_sequence(graph.select_node(12)));
     ASSERT_THAT(annot.get_rows({11})[0], ElementsAre(0));
 
-    EXPECT_EQ("ACTC", graph.get_node_sequence(13));
+    EXPECT_EQ("ACTC", graph.get_node_sequence(graph.select_node(13)));
     ASSERT_THAT(annot.get_rows({12})[0], ElementsAre(1));
 
-    EXPECT_EQ("GCTA", graph.get_node_sequence(14));
+    EXPECT_EQ("GCTA", graph.get_node_sequence(graph.select_node(14)));
     ASSERT_THAT(annot.get_rows({13})[0], ElementsAre(0, 1));
 
-    EXPECT_EQ("TCTA", graph.get_node_sequence(15));
+    EXPECT_EQ("TCTA", graph.get_node_sequence(graph.select_node(15)));
     ASSERT_THAT(annot.get_rows({14})[0], ElementsAre(1));
 }
 
@@ -299,57 +322,77 @@ TEST(RowDiff, GetAnnotationBifurcationMasked) {
     graph.mask_dummy_kmers(1, false);
 
     // build annotation
-    sdsl::bit_vector bterminal = { 0, 1, 0, 0, 0, 0, 1, 0, 1, 0 };
+    sdsl::bit_vector bterminal_masked = { 0, 1, 0, 0, 0, 0, 1, 0, 1, 0 };
+    sdsl::bit_vector bterminal(graph.max_index() + 1);
+    sdsl::bit_vector cols_masked[2] = {
+        {0, 0, 1, 0, 0, 0, 1, 0, 1, 0 },
+        {0, 1, 1, 0, 0, 0, 0, 0, 1, 0 }
+    };
+    sdsl::bit_vector cols_concrete[2];
+    cols_concrete[0].resize(graph.max_index() + 1);
+    cols_concrete[1].resize(graph.max_index() + 1);
+    graph.call_nodes([&](auto i) {
+        auto rank = graph_to_anno_index(graph.rank_node(i));
+        bterminal[graph_to_anno_index(i)] = bterminal_masked[rank];
+        cols_concrete[0][graph_to_anno_index(i)] = cols_masked[0][rank];
+        cols_concrete[1][graph_to_anno_index(i)] = cols_masked[1][rank];
+    });
     anchor_bv_type terminal(bterminal);
     utils::TempFile fterm_temp;
     std::ofstream fterm(fterm_temp.name(), ios::binary);
     terminal.serialize(fterm);
     fterm.flush();
+    
+    std::vector<std::unique_ptr<bit_vector>> cols(2);
+    cols[0] = std::make_unique<bit_vector_sd>(std::move(cols_concrete[0]));
+    cols[1] = std::make_unique<bit_vector_sd>(std::move(cols_concrete[1]));
 
     Vector<uint64_t> diffs = { 1, 0, 1, 0, 0, 1 };
     sdsl::bit_vector boundary = { 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1 };
 
-
-    std::vector<std::unique_ptr<bit_vector>> cols(2);
-    cols[0] = std::make_unique<bit_vector_sd>(
-            std::initializer_list<bool>({0, 0, 1, 0, 0, 0, 1, 0, 1, 0 }));
-    cols[1] = std::make_unique<bit_vector_sd>(
-            std::initializer_list<bool>({0, 1, 1, 0, 0, 0, 0, 0, 1, 0 }));
-
     ColumnMajor mat(std::move(cols));
 
     RowDiff<ColumnMajor> annot(&graph, std::move(mat));
     annot.load_anchor(fterm_temp.name());
 
-    EXPECT_EQ("CTAG", graph.get_node_sequence(1));
-    ASSERT_THAT(annot.get_rows({0})[0], ElementsAre(0, 1));
-
-    EXPECT_EQ("CTAT", graph.get_node_sequence(2));
-    ASSERT_THAT(annot.get_rows({1})[0], ElementsAre(1));
+    EXPECT_EQ("CTAG", graph.get_node_sequence(graph.select_node(1)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(1))})[0],
+                ElementsAre(0, 1));
+    EXPECT_EQ("CTAT", graph.get_node_sequence(graph.select_node(2)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(2))})[0],
+                ElementsAre(1));
 
-    EXPECT_EQ("TACT", graph.get_node_sequence(3));
-    ASSERT_THAT(annot.get_rows({2})[0], ElementsAre(0));
+    EXPECT_EQ("TACT", graph.get_node_sequence(graph.select_node(3)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(3))})[0],
+                ElementsAre(0));
 
-    EXPECT_EQ("AGCT", graph.get_node_sequence(4));
-    ASSERT_THAT(annot.get_rows({3})[0], ElementsAre(0, 1));
+    EXPECT_EQ("AGCT", graph.get_node_sequence(graph.select_node(4)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(4))})[0],
+                ElementsAre(0, 1));
 
-    EXPECT_EQ("CTCT", graph.get_node_sequence(5));
-    ASSERT_THAT(annot.get_rows({4})[0], ElementsAre(1));
+    EXPECT_EQ("CTCT", graph.get_node_sequence(graph.select_node(5)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(5))})[0],
+                ElementsAre(1));
 
-    EXPECT_EQ("TAGC", graph.get_node_sequence(6));
-    ASSERT_THAT(annot.get_rows({5})[0], ElementsAre(0, 1));
+    EXPECT_EQ("TAGC", graph.get_node_sequence(graph.select_node(6)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(6))})[0],
+                ElementsAre(0, 1));
 
-    EXPECT_EQ("ACTA", graph.get_node_sequence(7));
-    ASSERT_THAT(annot.get_rows({6})[0], ElementsAre(0));
+    EXPECT_EQ("ACTA", graph.get_node_sequence(graph.select_node(7)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(7))})[0],
+                ElementsAre(0));
 
-    EXPECT_EQ("ACTC", graph.get_node_sequence(8));
-    ASSERT_THAT(annot.get_rows({7})[0], ElementsAre(1));
+    EXPECT_EQ("ACTC", graph.get_node_sequence(graph.select_node(8)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(8))})[0],
+                ElementsAre(1));
 
-    EXPECT_EQ("GCTA", graph.get_node_sequence(9));
-    ASSERT_THAT(annot.get_rows({8})[0], ElementsAre(0, 1));
+    EXPECT_EQ("GCTA", graph.get_node_sequence(graph.select_node(9)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(9))})[0],
+                ElementsAre(0, 1));
 
-    EXPECT_EQ("TCTA", graph.get_node_sequence(10));
-    ASSERT_THAT(annot.get_rows({9})[0], ElementsAre(1));
+    EXPECT_EQ("TCTA", graph.get_node_sequence(graph.select_node(10)));
+    ASSERT_THAT(annot.get_rows({graph_to_anno_index(graph.select_node(10))})[0],
+                ElementsAre(1));
 }
 
 } // namespace
diff --git a/metagraph/tests/annotation/test_annotated_dbg.cpp b/metagraph/tests/annotation/test_annotated_dbg.cpp
index d1437aa725..52ad5bd077 100644
--- a/metagraph/tests/annotation/test_annotated_dbg.cpp
+++ b/metagraph/tests/annotation/test_annotated_dbg.cpp
@@ -509,8 +509,8 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsWithoutDummy) {
         );
         EXPECT_EQ(num_nodes, anno_graph.get_graph().num_nodes());
 
-        EXPECT_TRUE(anno_graph.get_annotator().num_objects() + k
-                        < dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
+        EXPECT_EQ(anno_graph.get_annotator().num_objects(),
+                  dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
             << dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss();
 
         EXPECT_FALSE(anno_graph.label_exists("First"));
@@ -537,7 +537,7 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsWithoutDummy) {
         );
 
         anno_graph.annotator_->insert_rows(edge_to_row_idx(inserted_nodes));
-        EXPECT_EQ(anno_graph.get_graph().num_nodes() + 1, inserted_nodes.size());
+        EXPECT_EQ(anno_graph.get_graph().max_index() + 1, inserted_nodes.size());
 
         ASSERT_EQ(std::vector<std::string> { "First" },
                   anno_graph.get_labels(seq_first, 1));
@@ -556,8 +556,8 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsWithoutDummy) {
         EXPECT_TRUE(anno_graph.label_exists("Third"));
         EXPECT_FALSE(anno_graph.label_exists("Fourth"));
 
-        EXPECT_TRUE(anno_graph.get_annotator().num_objects() + k
-                        < dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
+        EXPECT_EQ(anno_graph.get_annotator().num_objects(),
+                  dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
             << dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss();
 
         EXPECT_EQ(std::vector<std::string> { "First" },
@@ -627,8 +627,8 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsWithoutDummyParallel) {
             std::make_unique<annot::ColumnCompressed<>>(graph->max_index())
         );
 
-        EXPECT_TRUE(anno_graph.get_annotator().num_objects() + k
-                        < dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
+        EXPECT_EQ(anno_graph.get_annotator().num_objects(),
+                  dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
             << dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss();
 
         EXPECT_FALSE(anno_graph.label_exists("First"));
@@ -661,7 +661,7 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsWithoutDummyParallel) {
         );
 
         anno_graph.annotator_->insert_rows(edge_to_row_idx(inserted_nodes));
-        EXPECT_EQ(anno_graph.get_graph().num_nodes() + 1, inserted_nodes.size());
+        EXPECT_EQ(anno_graph.get_graph().max_index() + 1, inserted_nodes.size());
 
         ASSERT_EQ(std::vector<std::string> { "First" },
                   anno_graph.get_labels(seq_first, 1));
@@ -685,8 +685,8 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsWithoutDummyParallel) {
         EXPECT_TRUE(anno_graph.label_exists("Third"));
         EXPECT_FALSE(anno_graph.label_exists("Fourth"));
 
-        EXPECT_TRUE(anno_graph.get_annotator().num_objects() + k
-                        < dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
+        EXPECT_EQ(anno_graph.get_annotator().num_objects(),
+                  dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
             << dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss();
 
         EXPECT_EQ(std::vector<std::string> { "First" },
@@ -767,8 +767,8 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsPruneDummy) {
         EXPECT_FALSE(anno_graph.label_exists("Third"));
         EXPECT_FALSE(anno_graph.label_exists("Fourth"));
 
-        EXPECT_TRUE(anno_graph.get_annotator().num_objects() + 1
-                        < dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
+        EXPECT_EQ(anno_graph.get_annotator().num_objects(),
+                  dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
             << dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss();
 
         ASSERT_EQ(std::vector<std::string> { "First" },
@@ -783,7 +783,7 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsPruneDummy) {
         );
 
         anno_graph.annotator_->insert_rows(edge_to_row_idx(inserted_nodes));
-        EXPECT_EQ(anno_graph.get_graph().num_nodes() + 1, inserted_nodes.size());
+        EXPECT_EQ(anno_graph.get_graph().max_index() + 1, inserted_nodes.size());
 
         ASSERT_EQ(std::vector<std::string> { "First" },
                   anno_graph.get_labels(seq_first, 1));
@@ -802,8 +802,8 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsPruneDummy) {
         EXPECT_TRUE(anno_graph.label_exists("Third"));
         EXPECT_FALSE(anno_graph.label_exists("Fourth"));
 
-        EXPECT_TRUE(anno_graph.get_annotator().num_objects() + 1
-                        < dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
+        EXPECT_EQ(anno_graph.get_annotator().num_objects(),
+                  dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
             << dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss();
 
         EXPECT_EQ(std::vector<std::string> { "First" },
@@ -890,8 +890,8 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsPruneDummyParallel) {
         EXPECT_FALSE(anno_graph.label_exists("Third"));
         EXPECT_FALSE(anno_graph.label_exists("Fourth"));
 
-        EXPECT_TRUE(anno_graph.get_annotator().num_objects() + 1
-                        < dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
+        EXPECT_EQ(anno_graph.get_annotator().num_objects(),
+                  dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
             << dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss();
 
         ASSERT_EQ(std::vector<std::string> { "First" },
@@ -906,7 +906,7 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsPruneDummyParallel) {
         );
 
         anno_graph.annotator_->insert_rows(edge_to_row_idx(inserted_nodes));
-        EXPECT_EQ(anno_graph.get_graph().num_nodes() + 1, inserted_nodes.size());
+        EXPECT_EQ(anno_graph.get_graph().max_index() + 1, inserted_nodes.size());
 
         ASSERT_EQ(std::vector<std::string> { "First" },
                   anno_graph.get_labels(seq_first, 1));
@@ -930,8 +930,8 @@ TEST(AnnotatedDBG, ExtendGraphAddTwoPathsPruneDummyParallel) {
         EXPECT_TRUE(anno_graph.label_exists("Third"));
         EXPECT_FALSE(anno_graph.label_exists("Fourth"));
 
-        EXPECT_TRUE(anno_graph.get_annotator().num_objects() + 1
-                        < dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
+        EXPECT_EQ(anno_graph.get_annotator().num_objects(),
+                  dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss().num_edges())
             << dynamic_cast<const DBGSuccinct&>(anno_graph.get_graph()).get_boss();
 
         EXPECT_EQ(std::vector<std::string> { "First" },
diff --git a/metagraph/tests/annotation/test_converters.cpp b/metagraph/tests/annotation/test_converters.cpp
index cd39b07b47..20900179aa 100644
--- a/metagraph/tests/annotation/test_converters.cpp
+++ b/metagraph/tests/annotation/test_converters.cpp
@@ -19,6 +19,13 @@ using namespace mtg;
 using namespace mtg::annot;
 using namespace ::testing;
 
+static auto graph_to_anno_index(graph::DeBruijnGraph::node_index node) {
+    return graph::AnnotatedDBG::graph_to_anno_index(node);
+}
+static auto anno_to_graph_index(graph::AnnotatedDBG::row_index row) {
+    return graph::AnnotatedDBG::anno_to_graph_index(row);
+}
+
 const std::string test_data_dir = "../tests/data";
 const std::string test_dump_basename = test_data_dir + "/dump_test";
 const std::string test_dump_basename_row_compressed_merge = test_dump_basename + "_row_compressed_merge";
@@ -189,9 +196,9 @@ TEST(RowDiff, succ) {
      */
 
     const std::vector<uint64_t> expected_succ = { 3, 0, 4, 2 };
-    const std::vector<bool> expected_succ_boundary = { 1, 0, 1, 0, 1, 0, 1, 0, 1 };
+    const std::vector<bool> expected_succ_boundary = { 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1 };
     const std::vector<uint64_t> expected_pred = { 2, 4, 1, 3 };
-    const std::vector<bool> expected_pred_boundary = { 0, 1, 1, 0, 1, 0, 1, 0, 1 };
+    const std::vector<bool> expected_pred_boundary = { 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1 };
 
     for (uint32_t max_depth : { 1, 3, 5 }) {
         std::filesystem::remove_all(dst_dir);
@@ -211,7 +218,10 @@ TEST(RowDiff, succ) {
         sdsl::int_vector_buffer succ(succ_file, std::ios::in);
         ASSERT_EQ(expected_succ.size(), succ.size());
         for (uint32_t i = 0; i < succ.size(); ++i) {
-            EXPECT_EQ(expected_succ[i], succ[i]) << max_depth << " " << i;
+            EXPECT_EQ(
+                anno_to_graph_index(expected_succ[i]),
+                graph->rank_node(anno_to_graph_index(succ[i]))
+            ) << max_depth << " " << i;
         }
 
         sdsl::int_vector_buffer<1> succ_boundary(succ_boundary_file, std::ios::in);
@@ -223,7 +233,10 @@ TEST(RowDiff, succ) {
         sdsl::int_vector_buffer pred(pred_file, std::ios::in);
         EXPECT_EQ(expected_pred.size(), pred.size());
         for (uint32_t i = 0; i < pred.size(); ++i) {
-            EXPECT_EQ(expected_pred[i], pred[i]) << max_depth << " " << i;
+            EXPECT_EQ(
+                anno_to_graph_index(expected_pred[i]),
+                graph->rank_node(anno_to_graph_index(pred[i]))
+            ) << max_depth << " " << i;
         }
 
         sdsl::int_vector_buffer<1> pred_boundary(pred_boundary_file, std::ios::in);
@@ -279,8 +292,10 @@ TEST(RowDiff, ConvertFromColumnCompressedSameLabels) {
             std::unique_ptr<graph::DBGSuccinct> graph = create_graph(3, { "ACGTCAC" });
             graph->serialize(graph_fname);
 
-            ColumnCompressed source_annot(5);
-            source_annot.add_labels({ 0, 1, 2, 3, 4 }, labels);
+            ColumnCompressed source_annot(graph->max_index());
+            std::vector<uint64_t> edges(graph->max_index());
+            std::iota(begin(edges), end(edges), 0);
+            source_annot.add_labels(edges, labels);
             source_annot.serialize(annot_fname);
 
             convert_to_row_diff({ annot_fname }, graph_fname, 1e9, max_depth, dst_dir, dst_dir, RowDiffStage::COMPUTE_REDUCTION);
@@ -293,7 +308,7 @@ TEST(RowDiff, ConvertFromColumnCompressedSameLabels) {
                     .load_anchor(graph_fname + matrix::kRowDiffAnchorExt);
 
             ASSERT_EQ(labels.size(), annotator.num_labels());
-            ASSERT_EQ(5u, annotator.num_objects());
+            ASSERT_EQ(graph->max_index(), annotator.num_objects());
             EXPECT_EQ(labels.size() * expected_relations[max_depth - 1],
                       annotator.num_relations());
 
@@ -326,8 +341,10 @@ TEST(RowDiff, ConvertFromColumnCompressedSameLabelsMultipleColumns) {
 
             std::vector<std::string> sources;
             for (const std::string &label : labels) {
-                ColumnCompressed source_annot(5);
-                source_annot.add_labels({ 0, 1, 2, 3, 4 }, { label });
+                ColumnCompressed source_annot(graph->max_index());
+                std::vector<uint64_t> edges(graph->max_index());
+                std::iota(begin(edges), end(edges), 0);
+                source_annot.add_labels(edges, { label });
                 const std::string annot_fname
                         = dst_dir/(label + ColumnCompressed<>::kExtension);
                 source_annot.serialize(annot_fname);
@@ -346,7 +363,7 @@ TEST(RowDiff, ConvertFromColumnCompressedSameLabelsMultipleColumns) {
                         .load_anchor(graph_fname + matrix::kRowDiffAnchorExt);
 
                 ASSERT_EQ(1, annotator.num_labels());
-                ASSERT_EQ(5u, annotator.num_objects());
+                ASSERT_EQ(graph->max_index(), annotator.num_objects());
                 EXPECT_EQ(expected_relations[max_depth - 1], annotator.num_relations());
 
                 for (uint32 idx = 0; idx < annotator.num_objects(); ++idx) {
@@ -382,13 +399,13 @@ void test_row_diff(uint32_t k,
     graph->mask_dummy_kmers(1, false);
     graph->serialize(graph_fname);
 
-    ColumnCompressed initial_annotation(graph->num_nodes());
+    ColumnCompressed initial_annotation(graph->max_index());
     std::unordered_set<std::string> all_labels;
-    for (uint32_t anno_idx = 0; anno_idx < graph->num_nodes(); ++anno_idx) {
-        const std::vector<std::string> &labels = annotations[anno_idx];
-        initial_annotation.add_labels({anno_idx}, labels);
+    graph->call_nodes([&](uint32_t node_idx) {
+        const auto &labels = annotations[graph_to_anno_index(graph->rank_node(node_idx))];
+        initial_annotation.add_labels({graph_to_anno_index(node_idx)}, labels);
         std::for_each(labels.begin(), labels.end(), [&](auto l) { all_labels.insert(l); });
-    }
+    });
 
     initial_annotation.serialize(annot_fname);
 
@@ -402,12 +419,12 @@ void test_row_diff(uint32_t k,
             .load_anchor(graph_fname + matrix::kRowDiffAnchorExt);
 
     ASSERT_EQ(all_labels.size(), annotator.num_labels());
-    ASSERT_EQ(graph->num_nodes(), annotator.num_objects());
+    ASSERT_EQ(graph->max_index(), annotator.num_objects());
 
-    for (uint32_t anno_idx = 0; anno_idx < graph->num_nodes(); ++anno_idx) {
-        ASSERT_THAT(annotator.get_labels(anno_idx),
-                    UnorderedElementsAreArray(annotations[anno_idx]));
-    }
+    graph->call_nodes([&](uint32_t node_idx) {
+        ASSERT_THAT(annotator.get_labels(graph_to_anno_index(node_idx)),
+                    UnorderedElementsAreArray(annotations[graph_to_anno_index(graph->rank_node(node_idx))]));
+    });
 
     std::filesystem::remove_all(dst_dir);
 }
@@ -433,14 +450,14 @@ void test_row_diff_separate_columns(uint32_t k,
     graph->serialize(graph_fname);
 
     std::map<std::string, std::vector<uint64_t>> col_annotations;
-    for (uint32_t anno_idx = 0; anno_idx < graph->num_nodes(); ++anno_idx) {
-        for (const auto &label : annotations[anno_idx]) {
-            col_annotations[label].push_back(anno_idx);
+    graph->call_nodes([&](auto node_idx) {
+        for (const auto &label : annotations[graph_to_anno_index(graph->rank_node(node_idx))]) {
+            col_annotations[label].push_back(graph_to_anno_index(node_idx));
         }
-    }
+    });
 
     for (const auto& [label, indices] : col_annotations) {
-        ColumnCompressed initial_annotation(graph->num_nodes());
+        ColumnCompressed initial_annotation(graph->max_index());
         initial_annotation.add_labels(indices, {label});
         std::string annot_fname
                 = dst_dir/("anno_" + label + ColumnCompressed<>::kExtension);
@@ -460,7 +477,7 @@ void test_row_diff_separate_columns(uint32_t k,
         const_cast<matrix::RowDiff<matrix::ColumnMajor> &>(annotator.get_matrix())
                 .load_anchor(graph_fname + matrix::kRowDiffAnchorExt);
 
-        ASSERT_EQ(graph->num_nodes(), annotator.num_objects());
+        ASSERT_EQ(graph->max_index(), annotator.num_objects());
 
         std::vector<uint64_t> actual_indices;
         annotator.call_objects(label,
diff --git a/metagraph/tests/graph/succinct/test_dbg_succinct.cpp b/metagraph/tests/graph/succinct/test_dbg_succinct.cpp
index 88a25e2716..1bc51ffd59 100644
--- a/metagraph/tests/graph/succinct/test_dbg_succinct.cpp
+++ b/metagraph/tests/graph/succinct/test_dbg_succinct.cpp
@@ -20,9 +20,9 @@ TEST(DBGSuccinct, get_degree_with_source_dummy) {
                                 + std::string(k, 'T'));
 
         // dummy source k-mer: '$$$$$'
-        EXPECT_EQ(std::string(k, '$'), graph->get_node_sequence(1));
-        EXPECT_EQ(1ull, graph->outdegree(1));
-        EXPECT_EQ(1ull, graph->indegree(1));
+        EXPECT_EQ(std::string(k, '$'), graph->get_node_sequence(graph->select_node(1)));
+        EXPECT_EQ(1ull, graph->outdegree(graph->select_node(1)));
+        EXPECT_EQ(1ull, graph->indegree(graph->select_node(1)));
 
         // 'AAAAA'
         auto node_A = graph->kmer_to_node(std::string(k, 'A'));
@@ -40,7 +40,7 @@ TEST(DBGSuccinct, get_degree_with_source_dummy) {
 
         graph->mask_dummy_kmers(1, false);
         // dummy source k-mer: '$$$$$'
-        EXPECT_NE(std::string(k, '$'), graph->get_node_sequence(1));
+        EXPECT_NE(std::string(k, '$'), graph->get_node_sequence(graph->select_node(1)));
 
         // 'AAAAA'
         node_A = graph->kmer_to_node(std::string(k, 'A'));
@@ -65,9 +65,9 @@ TEST(DBGSuccinct, get_degree_with_source_and_sink_dummy) {
                                 + std::string(k - 1, 'T'));
 
         // dummy source k-mer: '$$$$$'
-        EXPECT_EQ(std::string(k, '$'), graph->get_node_sequence(1));
-        EXPECT_EQ(1ull, graph->outdegree(1));
-        EXPECT_EQ(1ull, graph->indegree(1));
+        EXPECT_EQ(std::string(k, '$'), graph->get_node_sequence(graph->select_node(1)));
+        EXPECT_EQ(1ull, graph->outdegree(graph->select_node(1)));
+        EXPECT_EQ(1ull, graph->indegree(graph->select_node(1)));
 
         // 'AAAAA'
         auto node_A = graph->kmer_to_node(std::string(k, 'A'));
@@ -85,7 +85,7 @@ TEST(DBGSuccinct, get_degree_with_source_and_sink_dummy) {
 
         graph->mask_dummy_kmers(1, false);
         // dummy source k-mer: '$$$$$'
-        EXPECT_NE(std::string(k, '$'), graph->get_node_sequence(1));
+        EXPECT_NE(std::string(k, '$'), graph->get_node_sequence(graph->select_node(1)));
 
         // 'AAAAA'
         node_A = graph->kmer_to_node(std::string(k, 'A'));
@@ -109,7 +109,7 @@ TEST(DBGSuccinct, is_single_outgoing_simple) {
 
     uint64_t single_outgoing_counter = 0;
     for (DBGSuccinct::node_index i = 1; i <= graph->num_nodes(); ++i) {
-        if (graph->outdegree(i) == 1)
+        if (graph->outdegree(graph->select_node(i)) == 1)
             single_outgoing_counter++;
     }
 
@@ -126,7 +126,7 @@ TEST(DBGSuccinct, is_single_outgoing_for_multiple_valid_edges) {
 
     uint64_t single_outgoing_counter = 0;
     for (DBGSuccinct::node_index i = 1; i <= graph->num_nodes(); ++i) {
-        if (graph->outdegree(i) == 1)
+        if (graph->outdegree(graph->select_node(i)) == 1)
             single_outgoing_counter++;
     }