Merge pull request #28 from dyvenia/dev

Dev
dyvenia · Feb 12, 2025 · a568ed8 · a568ed8
2 parents 62f6a81 + b771900
commit a568ed8
Show file tree

Hide file tree

Showing 18 changed files with 1,224 additions and 76 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -56,6 +56,7 @@
     "markdown-it-link-attributes": "^4.0.1",
     "markdown-it-mark": "^4.0.0",
     "markdown-it-prism": "^2.3.0",
+    "markdown-it-toc-done-right": "^4.2.0",
     "netlify-plugin-cache": "^1.0.3",
     "pagefind": "^1.2.0",
     "postcss": "^8.4.8",
@@ -72,4 +73,4 @@
     "slugify": "^1.6.5",
     "svgo": "^3.2.0"
   }
-}
+}
diff --git a/src/_config/plugins/markdown.js b/src/_config/plugins/markdown.js
@@ -8,6 +8,7 @@ import markdownItEleventyImg from 'markdown-it-eleventy-img';
 import markdownItFootnote from 'markdown-it-footnote';
 import markdownitMark from 'markdown-it-mark';
 import markdownitAbbr from 'markdown-it-abbr';
+import markdownItTocDoneRight from 'markdown-it-toc-done-right';
 import {slugifyString} from '../filters/slugify.js';
 import {optimize} from 'svgo';
 import {readFileSync} from 'node:fs';
@@ -112,4 +113,22 @@ export const markdownLib = markdownIt({
   })
   .use(markdownItFootnote)
   .use(markdownitMark)
-  .use(markdownitAbbr);
+  .use(markdownitAbbr)
+  .use(markdownItTocDoneRight, {
+    placeholder: `{:toc}`,
+    slugify: slugifyString,
+    containerId: 'toc',
+    itemClass: 'flow',
+    listType: 'ol'
+  });
+
+const originalRender = markdownLib.render.bind(markdownLib);
+
+markdownLib.render = (content, env = {}) => {
+  const shouldAddToc = env.toc !== false;
+  const tocBeforeContent = shouldAddToc
+    ? `{:toc}\n<span class="visually-hidden" id="toc-skipped"></span>\n${content}`
+    : content;
+
+  return originalRender(tocBeforeContent, env);
+};
diff --git a/src/_data/meta.js b/src/_data/meta.js
@@ -56,6 +56,7 @@ export const details = {
   expand: 'expand all',
   collapse: 'collapse all'
 };
+export const toc = {title: 'Table of contents', skipLink: 'Skip table of contents'};
 export const navigation = {
   navLabel: 'Menu',
   ariaTop: 'Main',

diff --git a/src/_layouts/post.njk b/src/_layouts/post.njk
@@ -8,7 +8,6 @@ schema: blog
     <h1>{{ title }}</h1>
 
     {% if image %}
-      <!-- image -->
       {% image image, alt or title, credit, "eager", "feature" %}
     {% endif %}
 
@@ -26,6 +25,13 @@ schema: blog
       {% endif %}
     </p>
 
+    {% if toc %}
+      <div class="toc-container | relative">
+        <a href="#toc-skipped" id="skip-toc" class="visually-hidden skip-link">{{ meta.toc.skipLink }}</a>
+        <h2 id="table-of-contents">{{ meta.toc.title }}</h2>
+      </div>
+    {% endif %}
+
     {{ content | safe }}
     {% include 'partials/edit-on.njk' %}
   </div>

diff --git a/src/assets/css/bundle/post.css b/src/assets/css/bundle/post.css
@@ -26,3 +26,46 @@ time {
 .post .meta {
   font-size: var(--size-step-min-1);
 }
+
+.post .table-of-contents {
+  padding: var(--space-2xs) var(--space-s);
+  background: var(--color-bg-accent);
+  background: color-mix(in oklab, var(--color-bg) 97%, var(--color-text));
+  border-radius: var(--border-radius-small);
+}
+
+.post .table-of-contents a {
+  text-decoration: none;
+}
+
+.post .table-of-contents a:hover {
+  text-decoration: underline;
+  text-decoration-thickness: 0.08em;
+}
+
+.post .toc-container {
+  --flow-space: var(--space-xl);
+}
+
+.post .toc-container h2 {
+  font-size: var(--size-step-1);
+  text-transform: uppercase;
+}
+
+.post ol {
+  line-height: 1.5em;
+}
+
+.post ol ol {
+  list-style: lower-latin;
+  padding-inline-start: 1.8ch;
+  font-size: var(--size-step-min-1);
+}
+
+.post ol > li > ol {
+  --flow-space: var(--space-3xs);
+}
+
+.post ol > li > ol > li + li {
+  padding-block-start: var(--space-3xs);
+}
diff --git a/src/assets/css/global/blocks/prose.css b/src/assets/css/global/blocks/prose.css
@@ -1,73 +1,79 @@
 /* Based on Andy Bell, https://github.com/Andy-set-studio/personal-site-eleventy */
 
-.prose {
-  --flow-space: var(--space-s-m);
-}
+@layer prose {
+  .prose {
+    --flow-space: var(--space-s-m);
+  }
 
-.prose :where(pre, pre + *, figure, figure + *) {
-  --flow-space: var(--space-m-l);
-}
+  .prose :where(pre, pre + *, figure, figure + *) {
+    --flow-space: var(--space-m-l);
+  }
 
-.prose figcaption {
-  font-size: var(--size-step-min-1);
-  text-align: center;
-  padding-block-end: var(--space-xs);
-  margin-block-end: var(--space-s);
-  border-bottom: 1px solid var(--color-bg-accent);
-}
+  .prose figcaption {
+    font-size: var(--size-step-min-1);
+    text-align: center;
+    padding-block-end: var(--space-xs);
+    margin-block-end: var(--space-s);
+    border-bottom: 1px solid var(--color-bg-accent);
+  }
 
-:where(.prose :is(h2, h3, h4)) {
-  --flow-space: var(--space-xl);
-}
+  :where(.prose :is(h2, h3, h4)) {
+    --flow-space: var(--space-xl);
+  }
 
-:where(.prose :is(h2 + *, h3 + *, h4 + *):not(figure)) {
-  --flow-space: var(--space-m);
-}
+  :where(.prose :is(h2 + *, h3 + *, h4 + *):not(figure)) {
+    --flow-space: var(--space-m);
+  }
 
-.prose .heading-anchor:is(:hover, :focus) {
-  text-decoration: none;
-}
+  .prose .heading-anchor:is(:hover, :focus) {
+    text-decoration: none;
+  }
 
-.heading-anchor {
-  text-decoration: none;
-}
+  .heading-anchor {
+    text-decoration: none;
+  }
 
-.prose mark {
-  background-color: var(--color-gold);
-  color: var(--color-base-dark);
-}
+  .prose mark {
+    background-color: var(--color-gold);
+    color: var(--color-base-dark);
+  }
 
-/* block space only for "regular lists" */
-.prose :not(.cluster):not(.grid) > li + li {
-  padding-block-start: var(--space-s-m);
-}
+  /* block space only for "regular lists" */
+  .prose :not(.cluster):not(.grid) > li + li {
+    padding-block-start: var(--space-s);
+  }
 
-/* inline space only for "regular lists" */
-.prose :where(ul:not(.grid), ol:not(.grid)) {
-  padding-inline-start: 1.2ch;
-}
+  /* inline space only for "regular lists" */
+  .prose ul:not(.grid) {
+    padding-inline-start: 1.3ch;
+  }
 
-/* marker only for "regular lists" */
-.prose :where(ul:not(.grid):not([role='list'])) li::marker {
-  color: var(--color-blue);
-  content: '– ';
-}
+  .prose ol:not(.grid) {
+    padding-inline-start: 2ch;
+  }
 
-.prose ol li::marker {
-  color: var(--color-blue);
-}
+  /* marker only for "regular lists" */
+  .prose :where(ul:not(.grid):not([role='list'])) li::marker {
+    color: var(--color-blue);
+    content: '– ';
+  }
 
-.prose img {
-  border-radius: var(--border-radius-small);
-}
+  .prose ol li::marker {
+    color: var(--color-blue);
+  }
+
+  .prose img {
+    border-radius: var(--border-radius-small);
+  }
 
-@media screen(ltnav) {
-  .prose > *,
-  .prose a {
-    overflow-wrap: break-word;
-    word-wrap: break-word;
-    word-break: break-word;
-    /* Adds a hyphen where the word breaks, if supported (No Blink) */
-    hyphens: auto;
+  @media screen(ltnav) {
+    .prose > *,
+    .prose a {
+      overflow-wrap: break-word;
+      word-wrap: break-word;
+      word-break: break-word;
+      /* Adds a hyphen where the word breaks, if supported (No Blink) */
+      hyphens: auto;
+    }
   }
 }
diff --git a/src/assets/images/data_masking_ingestion_phase.png b/src/assets/images/data_masking_ingestion_phase.png
diff --git a/src/assets/images/data_masking_transformation_phase.png b/src/assets/images/data_masking_transformation_phase.png
diff --git a/src/assets/images/dlt_choosing_write_disposition-1.png b/src/assets/images/dlt_choosing_write_disposition-1.png
diff --git a/src/assets/images/elt_patterns-1.png b/src/assets/images/elt_patterns-1.png
diff --git a/src/assets/images/ingestion_pipeline_workflow_overview-1.png b/src/assets/images/ingestion_pipeline_workflow_overview-1.png
diff --git a/...ages/deploying-prefect-on-any-cloud-using-a-single-virtual-machine-preview.jpeg b/...ages/deploying-prefect-on-any-cloud-using-a-single-virtual-machine-preview.jpeg
diff --git a/...and-prefect-a-great-combo-for-streamlined-data-ingestion-pipelines-preview.jpeg b/...and-prefect-a-great-combo-for-streamlined-data-ingestion-pipelines-preview.jpeg
diff --git a/src/pages/index.njk b/src/pages/index.njk
@@ -207,7 +207,7 @@ title: The Scalable Way
 <div class="wrapper">
   <section class="full | region">
     <div class="wrapper flow prose">
-      <h2>{{ blog.title }}</h2>
+      <h2><a href="/insights/" class="no-underline hover:underline">{{ blog.title }}</a></h2>
 
       {% if blog.intro %}
         <p>{{ blog.intro }}</p>

diff --git a/...ts/2024/breaking-down-prefect-deployments-to-improve-the-data-ops-efficiency.md b/...ts/2024/breaking-down-prefect-deployments-to-improve-the-data-ops-efficiency.md
@@ -11,8 +11,6 @@ internal_notes: |-
 
   **Purpose:** To position TSW as a credible resource for Prefect best practices. To make the potential customer think about potential problems they might be having and trigger them to consider having a professional code review done.
 ---
-## Intro
-
 When building data platforms, it’s tempting to focus entirely on the technology stack—choosing shiny tools, debating between bulk loads or streaming, and designing storage and infrastructure to meet current needs. Yet, the rush to get data flowing often overshadows a crucial question: **How will we monitor and operate all of this effectively?**       
 
 In the early stages, data projects typically start small: an MVP, one or two data sources, and a couple of flow runs per day. At this scale, operations often feel secondary— issues can be solved on the spot, and data engineering teams are under pressure to deliver data to the end users. But as the platform scales, this oversight catches up. Within months, many teams find themselves struggling to manage DataOps, with operational gaps threatening their progress. 

diff --git a/.../2024/dlt-and-prefect-a-great-combo-for-streamlined-data-ingestion-pipelines.md b/.../2024/dlt-and-prefect-a-great-combo-for-streamlined-data-ingestion-pipelines.md
@@ -15,7 +15,7 @@ internal_notes: |-
   - Prefect users looking into improving their data ingestion workflow, technology and process
   - dlt users looking into improving their scheduling, orchestration and monitoring
 
-  **Purpose:** 
+  **Purpose:**
 
   Show that dyvenia knows both technologies as well as how to do data ingestion professionally and thus can help clients which struggle with it or with one of those tools.
 
@@ -44,7 +44,7 @@ internal_notes: |-
           - Features
           - Readiness for AI era
 ---
-# Doing data ingestion right is hard…
+#### Doing data ingestion right is hard…
 
 Despite advances in data engineering, data ingestion—the Extract and Load (EL) steps of the [ELT](https://dyvenia.com/resources/data-glossary/) process—remains a persistent challenge for many data teams.
 
@@ -53,7 +53,7 @@ In the era of AI, UI-based tools face one more limitation: they miss out on most
 
 Even if teams do decide to use open-source solutions, they often end up creating volumes of low-quality glue code. This in-house software, typically written in a rush by non-professional engineers, often fails to meet essential requirements for modern data platforms, such as EaC (Everything as Code), security, monitoring & alerting, reliability, or extensibility. Moreover, since it’s written by non-professional engineers, such code is far more brittle and much harder to maintain and modify. Consequently, all modifications to the code (such as adding new features or fixing bugs) take much more time and are far riskier than they should be.
 
-## …but there is light at the end of the tunnel
+#### …but there is light at the end of the tunnel
 
 Luckily, in recent years, with the growing adoption of software engineering practices, we’ve seen a professionalization of the data engineering field. This has resulted in the creation of a number of high-quality, open-source tools that simplify and improve the quality of data engineering work, such as [dlt](https://dlthub.com/) and [Prefect](https://www.prefect.io/).
 
@@ -153,11 +153,11 @@ Data anonymization and/or pseudonymization are crucial to ensure compliance with
 
 1. During the ingestion phase (in which case the original data never enters the destination system)
 
-![](/src/assets/images/data_masking_ingestion.jpg)
+![](/src/assets/images/data_masking_ingestion_phase.png)
 
 2. During the transformation phase (in which case private data is stored in one or more layers in the destination system but hidden from the eyes of end users)
 
-![](/src/assets/images/data_masking_transformation.jpg)
+![](/src/assets/images/data_masking_transformation_phase.png)
 
 While dlt doesn’t provide built-in anonymization features, it provides the necessary tools to implement the first option effectively.
 
@@ -175,19 +175,19 @@ Incremental extraction allows us to download only new or modified data.
 - [Parallelization
 ](https://dlthub.com/docs/reference/performance#parallelism)dlt allows parallelizing each stage of the pipeline utilizing multithreading and multiprocessing (depending on the stage).
 In cases where further parallelization is needed (i.e., the workload exceeds the capacity of a single machine), utilizing orchestrator-layer parallelization may be required. However, this scenario is now rare, as large virtual machines capable of processing petabytes of data are widely available, and dlt can leverage the machine’s resources more efficiently than older tools or typical in-house Python code.
-- **[Various other optimizations](https://dlthub.com/docs/reference/performance)**
+- [**Various other optimizations**](https://dlthub.com/docs/reference/performance)
 
 As the topic of incremental loading can be complex even for seasoned data engineers, we’ve prepared a diagram of all the viable ELT patterns:
 
-![](/src/assets/images/elt_patterns.jpg)
+![](/src/assets/images/elt_patterns-1.png)
 
 **NOTE:** dlt also provides sub-types of the “merge” disposition, including [SCD type 2](https://dlthub.com/blog/scd2-and-incremental-loading); however, for clarity, we did not include these in the diagram. For more information on these subtypes, see [relevant documentation](https://dlthub.com/docs/general-usage/incremental-loading#merge-incremental-loading).
 
 The choice of a specific implementation depends on what is supported by the source and destination systems as well as on how the source data is generated. Ideally, incremental extract should be used whenever possible. Then, whether you choose the “append” or “merge” write disposition depends on how the data is generated: if you can guarantee that only new records are produced and no existing data is ever modified, you can safely use the “append” disposition. Next, you need to check if the destination system handles the disposition you intend to use (eg. some systems don’t support the “merge” disposition).
 
 The following diagram from [dlt’s official documentation](https://dlthub.com/docs/general-usage/incremental-loading#two-simple-questions-determine-the-write-disposition-you-use) also provides a good overview of when to choose which write disposition:
 
-![](/src/assets/images/write_disposition.jpg)
+![](/src/assets/images/dlt_choosing_write_disposition-1.png)
 
 ## Orchestrating data pipelines with Prefect
 
@@ -228,7 +228,7 @@ Now that we’ve outlined the essential features of a production-grade dlt pipel
 
 The diagram below illustrates the key steps in this production workflow.
 
-![](/src/assets/images/production_workflow.jpg)
+![](/src/assets/images/ingestion_pipeline_workflow_overview-1.png)
 
 1. **Create a dlt pipeline:** We start by creating a dlt pipeline (if the one we need doesn’t exist yet). Once the pipeline is finished and tests pass, we can move on to the next step.
 2. **Create Prefect deployment**: We create a Prefect deployment for the pipeline. Notice we utilize Prefect’s `prefect.yaml` file together with a single `extract_and_load()` flow capable of executing any dlt pipeline to drastically simplify this process.

diff --git a/src/posts/posts.json b/src/posts/posts.json
@@ -1,5 +1,6 @@
 {
   "layout": "post",
   "tags": "posts",
-  "permalink": "/blog/{{ title | slugify }}/index.html"
+  "permalink": "/insights/{{ title | slugify }}/index.html",
+  "toc": true
 }