Created using Colab

sualeh · May 7, 2024 · 3426cb5 · 3426cb5
1 parent a7ffd94
commit 3426cb5
Showing 1 changed file with 197 additions and 168 deletions.
diff --git a/Notebooks/5_javascript_unicode_pattern_matching.ipynb b/Notebooks/5_javascript_unicode_pattern_matching.ipynb
@@ -1,170 +1,199 @@
 {
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Unicode Pattern Matching"
-   ]
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/sualeh/What-a-Character/blob/go/Notebooks/5_javascript_unicode_pattern_matching.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PHrPaZuXetrR"
+      },
+      "source": [
+        "# Unicode Pattern Matching"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l35sZ-DdetrS"
+      },
+      "source": [
+        "## Case Insensitive Matching"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4bpKPj0yetrT"
+      },
+      "source": [
+        "In Greek, the word for dog in lowercase is \"σκύλος\". Notice that the first and last letter are both sigma."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "b0D8XmmUetrT"
+      },
+      "outputs": [],
+      "source": [
+        "%%script node\n",
+        "\n",
+        "const lower_greek = /σκύλος/iu\n",
+        "const upper_greek = \"ΣΚΎΛΟΣ\";\n",
+        "matches = lower_greek.test(upper_greek);\n",
+        "\n",
+        "console.log(matches);"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e4kztbf8etrT"
+      },
+      "source": [
+        "When a lower -case character results in more than one uppercase character, there is no match."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IBI6YatUetrT"
+      },
+      "outputs": [],
+      "source": [
+        "%%script node\n",
+        "\n",
+        "  const lower_german = /\"straße\"/iu\n",
+        "  const upper_german = \"STRASSE\";\n",
+        "  matches = lower_german.test(upper_german);\n",
+        "\n",
+        "  console.log(matches);"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "N8U7JfYyetrT"
+      },
+      "source": [
+        "## Matching Numbers"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kEyK5CxBetrU"
+      },
+      "source": [
+        "A naive match with a range of digits `[0-9]` does not work."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3hm1wMm7etrU"
+      },
+      "outputs": [],
+      "source": [
+        "%%script node\n",
+        "\n",
+        "hindiNumber = \"१२३४५६७८९०\";\n",
+        "\n",
+        "digit = /[0-9]+/\n",
+        "matches = digit.test(hindiNumber);\n",
+        "\n",
+        "console.log(matches);"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "y9AJrxKFetrU"
+      },
+      "source": [
+        "A slightly better regular expression with a `\\d` pattern does not work either."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dR9XOTsTetrU"
+      },
+      "outputs": [],
+      "source": [
+        "%%script node\n",
+        "\n",
+        "hindiNumber = \"१२३४५६७८९०\";\n",
+        "\n",
+        "standard_digit = /\\d+/\n",
+        "matches = standard_digit.test(hindiNumber);\n",
+        "\n",
+        "console.log(matches);"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bFFCgcWKetrU"
+      },
+      "source": [
+        "The best way to match digits is by matching against the Unicode Decimal Number Category (Nd), using a Unicode Category pattern `\\p{Nd}`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EHmWCZjyetrU"
+      },
+      "outputs": [],
+      "source": [
+        "%%script node\n",
+        "\n",
+        "hindiNumber = \"१२३४५६७८९०\";\n",
+        "\n",
+        "unicode_digit = /\\p{Nd}+/u\n",
+        "matches = unicode_digit.test(hindiNumber);\n",
+        "\n",
+        "console.log(matches);"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "base",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.5"
+    },
+    "colab": {
+      "provenance": [],
+      "include_colab_link": true
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%script node\n",
-    "\n",
-    "matches = false;"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Case Insensitive Matching"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In Greek, the word for dog in lowercase is \"σκύλος\". Notice that the first and last letter are both sigma."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%script node\n",
-    "\n",
-    "const lower_greek = /σκύλος/iu\n",
-    "const upper_greek = \"ΣΚΎΛΟΣ\";\n",
-    "matches = lower_greek.test(upper_greek);\n",
-    "\n",
-    "console.log(matches);"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "When a lower -case character results in more than one uppercase character, there is no match."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%script node\n",
-    "\n",
-    "  const lower_german = /\"straße\"/iu\n",
-    "  const upper_german = \"STRASSE\";\n",
-    "  matches = lower_german.test(upper_german);\n",
-    "\n",
-    "  console.log(matches);"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Matching Numbers"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "A naive match with a range of digits `[0-9]` does not work."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%script node\n",
-    "\n",
-    "hindiNumber = \"१२३४५६७८९०\";\n",
-    "\n",
-    "digit = /[0-9]+/\n",
-    "matches = digit.test(hindiNumber);\n",
-    "\n",
-    "console.log(matches);"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "A slightly better regular expression with a `\\d` pattern does not work either."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%script node\n",
-    "\n",
-    "hindiNumber = \"१२३४५६७८९०\";\n",
-    "\n",
-    "standard_digit = /\\d+/\n",
-    "matches = standard_digit.test(hindiNumber);\n",
-    "\n",
-    "console.log(matches);"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The best way to match digits is by matching against the Unicode Decimal Number Category (Nd), using a Unicode Category pattern `\\p{Nd}`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%script node\n",
-    "\n",
-    "hindiNumber = \"१२३४५६७८९०\";\n",
-    "\n",
-    "unicode_digit = /\\p{Nd}+/u\n",
-    "matches = unicode_digit.test(hindiNumber);\n",
-    "\n",
-    "console.log(matches);"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+  "nbformat": 4,
+  "nbformat_minor": 0
+}