From 11e1b41039eac97d48400a138880dddb12ca8722 Mon Sep 17 00:00:00 2001 From: Hande Celikkanat <7702228+handecelikkanat@users.noreply.github.com> Date: Mon, 17 Nov 2025 13:56:12 +0200 Subject: [PATCH] fix: re-add --to and --from args to cdxt --- aws-ccf-dataset.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws-ccf-dataset.ipynb b/aws-ccf-dataset.ipynb index a0ee0d8..337df26 100644 --- a/aws-ccf-dataset.ipynb +++ b/aws-ccf-dataset.ipynb @@ -380,8 +380,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Lookup captures for the given url in the commoncrawl cdx index for CC-MAIN-2024-22, returning only the first match\n", - "!cdxt --limit 1 --crawl CC-MAIN-2024-22 iter an.wikipedia.org/wiki/Escopete\n", + "# Look up this capture in the comoncrawl cdx index\n", + "!cdxt --limit 1 --crawl CC-MAIN-2024-22 --from 20240518015810 --to 20240518015810 iter an.wikipedia.org/wiki/Escopete\n", "\n", "# Cleanup previous work\n", "!rm -f TEST-000000.extracted.warc.gz\n",