python-parsy · robjhornby · Feb 26, 2023 · Feb 26, 2023 · May 4, 2023 · May 4, 2023
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,7 @@
 src/parsy.egg-info
 docs/_build
 .cache
+__pycache__
+.python-version
+.venv
+.vscode
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,8 +4,8 @@ repos:
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.4
+  - repo: https://github.com/pycqa/flake8.git
+    rev: 3.9.2
     hooks:
       - id: flake8
         language_version: python3.9

diff --git a/README.rst b/README.rst
@@ -13,10 +13,12 @@ incompatible** version of parsy that has strong type guarantees, with no
 This means removing anything that cannot be typed fully, and providing
 alternatives. Main changes:
 
-* Removed ``seq``, and replaced it with ``&`` operator support which returns a 2-tuple
+* Removed ``seq``, and replaced it with ``join`` which creates a 2-tuple result, and
+  ``append`` which takes an ``n``-tuple result and adds the result of another parser to
+  the end, producing an ``n+1``-tuple result.
 * Removed ``alt`` - you can use only ``|`` operator.
-* Removed ``.combine`` and ``.combine_dict`` - you have to use ``.map`` instead,
-  which is type-safe but much trickier, especially once you have nested tuples.
+* Removed ``.combine_dict`` - you have to use ``.map`` or ``.combine`` instead,
+  which is type-safe but loses the benefit of keyword sequence parsers.
 
 The docs have not been updated, you’ll need to look at the source code
 if you are interested.

diff --git a/conftest.py b/conftest.py
@@ -1,6 +1,7 @@
 import sys
+from typing import List
 
-collect_ignore: list[str] = []
+collect_ignore: List[str] = []
 
 if sys.version_info < (3, 7):
     # Python 3.6 and below don't have `dataclasses`

diff --git a/docs/ref/methods_and_combinators.rst b/docs/ref/methods_and_combinators.rst
@@ -111,22 +111,19 @@ can be used and manipulated as below.
       Returns a parser that expects the initial parser at least ``n`` times, and
       produces a list of the results.
 
-   .. method:: until(other_parser, [min=0, max=inf, consume_other=False])
+   .. method:: until(other_parser, [min=0, max=inf])
 
       Returns a parser that expects the initial parser followed by ``other_parser``.
       The initial parser is expected at least ``min`` times and at most ``max`` times.
       By default, it does not consume ``other_parser`` and it produces a list of the
-      results excluding ``other_parser``. If ``consume_other`` is ``True`` then
-      ``other_parser`` is consumed and its result is included in the list of results.
+      results excluding ``other_parser``.
 
       .. code:: python
 
          >>> seq(string('A').until(string('B')), string('BC')).parse('AAABC')
          [['A','A','A'], 'BC']
          >>> string('A').until(string('B')).then(string('BC')).parse('AAABC')
          'BC'
-         >>> string('A').until(string('BC'), consume_other=True).parse('AAABC')
-         ['A', 'A', 'A', 'BC']
 
    .. versionadded:: 2.0
 

diff --git a/examples/dataclass_parser_demo.py b/examples/dataclass_parser_demo.py
@@ -0,0 +1,114 @@
+from dataclasses import dataclass
+from typing import List
+
+from parsy import dataclass_parser, parser_field, regex, string
+
+text = """Sample text
+
+A selection of students from Riverdale High and Hogwarts took part in a quiz. This is a record of their scores.
+
+School = Riverdale High
+Grade = 1
+Student number, Name
+0, Phoebe
+1, Rachel
+
+Student number, Score
+0, 3
+1, 7
+
+Grade = 2
+Student number, Name
+0, Angela
+1, Tristan
+2, Aurora
+
+Student number, Score
+0, 6
+1, 3
+2, 9
+
+School = Hogwarts
+Grade = 1
+Student number, Name
+0, Ginny
+1, Luna
+
+Student number, Score
+0, 8
+1, 7
+
+Grade = 2
+Student number, Name
+0, Harry
+1, Hermione
+
+Student number, Score
+0, 5
+1, 10
+
+Grade = 3
+Student number, Name
+0, Fred
+1, George
+
+Student number, Score
+0, 0
+1, 0
+"""
+
+
+integer = regex(r"\d+").map(int)
+any_text = regex(r"[^\n]+")
+
+
+@dataclass
+class Student:
+    number: int = parser_field(integer << string(", "))
+    name: str = parser_field(any_text << string("\n"))
+
+
+@dataclass
+class Score:
+    number: int = parser_field(integer << string(", "))
+    score: int = parser_field(integer << string("\n"))
+
+
+@dataclass
+class StudentWithScore:
+    name: str
+    number: int
+    score: int
+
+
+@dataclass
+class Grade:
+    grade: int = parser_field(string("Grade = ") >> integer << string("\n"))
+    students: List[Student] = parser_field(
+        string("Student number, Name\n") >> dataclass_parser(Student).many() << regex(r"\n*")
+    )
+    scores: List[Score] = parser_field(
+        string("Student number, Score\n") >> dataclass_parser(Score).many() << regex(r"\n*")
+    )
+
+    @property
+    def students_with_scores(self) -> List[StudentWithScore]:
+        names = {st.number: st.name for st in self.students}
+        return [StudentWithScore(names[score.number], score.number, score.score) for score in self.scores]
+
+
+@dataclass
+class School:
+    name: str = parser_field(string("School = ") >> any_text << string("\n"))
+    grades: List[Grade] = parser_field(dataclass_parser(Grade).many())
+
+
+@dataclass
+class File:
+    header: str = parser_field(regex(r"[\s\S]*?(?=School =)"))
+    schools: List[School] = parser_field(dataclass_parser(School).many())
+
+
+if __name__ == "__main__":
+    file = dataclass_parser(File).parse(text)
+    print(file.schools)
diff --git a/examples/dataclass_parsing.py b/examples/dataclass_parsing.py
@@ -0,0 +1,82 @@
+from dataclasses import dataclass
+from typing import Optional
+
+from parsy import dataclass_parser, parser_field, regex, string, whitespace
+
+
+@dataclass
+class Person:
+    name: str = parser_field(regex(r"\w+") << whitespace)
+    age: int = parser_field(regex(r"\d+").map(int) << whitespace)
+    note: str = parser_field(regex(".+"))
+
+
+person_parser = dataclass_parser(Person)
+person = person_parser.parse("Rob 2000 how time flies")
+print(person)
+assert person == Person(name="Rob", age=2000, note="how time flies")
+
+
+# Nesting dataclass parsers
+
+
+@dataclass
+class Id:
+    id: str = parser_field(regex(r"[^\s]+") << whitespace.optional())
+    from_year: Optional[int] = parser_field(
+        regex("[0-9]+").map(int).desc("Numeric").optional() << whitespace.optional()
+    )
+
+
+@dataclass
+class Name:
+    name: str = parser_field(regex(r"[a-zA-Z]+") << whitespace.optional())
+    abbreviated: Optional[bool] = parser_field(
+        (string("T") | string("F")).map(lambda x: x == "T").optional() << whitespace.optional()
+    )
+
+
+@dataclass
+class PersonDetail:
+    id: Id = parser_field(dataclass_parser(Id))
+    forename: Name = parser_field(dataclass_parser(Name))
+    surname: Optional[Name] = parser_field(dataclass_parser(Name).optional())
+
+
+out_parser = dataclass_parser(PersonDetail).many()
+
+new_person = out_parser.parse("007 2023 Rob T John 123 2004 Bob")
+print(new_person)
+
+res = [
+    PersonDetail(
+        id=Id(id="007", from_year=2023),
+        forename=Name(name="Rob", abbreviated=True),
+        surname=Name(name="John", abbreviated=None),
+    ),
+    PersonDetail(id=Id(id="123", from_year=2004), forename=Name(name="Bob", abbreviated=None), surname=None),
+]
+
+# Dataclass parsing where not all fields have a parsy parser
+
+
+@dataclass
+class PersonWithRarity:
+    name: str = parser_field(regex(r"\w+") << whitespace)
+    age: int = parser_field(regex(r"\d+").map(int) << whitespace)
+    note: str = parser_field(regex(".+"))
+    rare: bool = False
+
+    def __post_init__(self):
+        if self.age > 70:
+            self.rare = True
+
+
+person_parser = dataclass_parser(PersonWithRarity)
+person = person_parser.parse("Rob 20 whippersnapper")
+print(person)
+assert person == PersonWithRarity(name="Rob", age=20, note="whippersnapper", rare=False)
+
+person = person_parser.parse("Rob 2000 how time flies")
+print(person)
+assert person == PersonWithRarity(name="Rob", age=2000, note="how time flies", rare=True)
diff --git a/examples/generator_typed.py b/examples/generator_typed.py
@@ -0,0 +1,39 @@
+from dataclasses import dataclass
+from typing import Generator
+
+from parsy import Parser, generate, regex, success, whitespace
+
+
+@dataclass
+class Person:
+    name: str
+    age: int
+    note: str
+
+
+def person_parser():
+    @generate
+    def person_parser() -> Generator[Parser[str], str, Person]:
+        # By yielding parsers of a single type, the type system works.
+        # Homogeneous generator types don't exist.
+        name = yield regex(r"\w+") << whitespace
+
+        # But every parser starts by matching a string anyway: other types only come
+        # from further function logic, which doesn't need to be part of the parser when
+        # using a generator:
+        age_text = yield regex(r"\d+") << whitespace
+        age = int(age_text)
+        if age > 20:
+            # Parsing depends on previously parsed values
+            note = yield regex(".+") >> success("Older than a score")
+        else:
+            note = yield regex(".+")
+
+        return Person(name, age, note)
+
+    return person_parser
+
+
+person = person_parser().parse("Rob 21 once upon a time")
+
+print(person)
diff --git a/examples/json.py b/examples/json.py
@@ -1,5 +1,6 @@
-from typing import TypeVar
-from parsy import Parser, forward_declaration, regex, string
+from typing import Dict, List, TypeVar, Union
+
+from parsy import Parser, ParserReference, generate, regex, string
 
 # Utilities
 whitespace = regex(r"\s*")
@@ -39,20 +40,27 @@ def lexeme(p: Parser[T]) -> Parser[T]:
 quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"'))
 
 # Data structures
-json_value = forward_declaration()
-object_pair = (quoted << colon) & json_value
-json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace
-array = lbrack >> json_value.sep_by(comma) << rbrack
+JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]
+
+
+@generate
+def _json_parser() -> ParserReference[JSON]:
+    return (yield json_parser)
+
+
+object_pair = (quoted << colon) & _json_parser
+json_object = lbrace >> object_pair.sep_by(comma).map(lambda a: {g[0]: g[1] for g in a}) << rbrace
+array = lbrack >> _json_parser.sep_by(comma) << rbrack
 
 # Everything
-json_value.become(quoted | number | json_object | array | true | false | null)
-json_doc = whitespace >> json_value
+json_parser = quoted | number | json_object | array | true | false | null
+
+json_doc = whitespace >> json_parser
 
 
 def test():
-    assert (
-        json_doc.parse(
-            r"""
+    result = json_doc.parse(
+        r"""
     {
         "int": 1,
         "string": "hello",
@@ -62,19 +70,18 @@ def test():
         "other": [true, false, null]
     }
 """
-        )
-        == {
-            "int": 1,
-            "string": "hello",
-            "a list": [1, 2, 3],
-            "escapes": "\n ⓒ",
-            "nested": {"x": "y"},
-            "other": [True, False, None],
-        }
     )
+    print(result)
+    assert result == {
+        "int": 1,
+        "string": "hello",
+        "a list": [1, 2, 3],
+        "escapes": "\n ⓒ",
+        "nested": {"x": "y"},
+        "other": [True, False, None],
+    }
 
 
 if __name__ == "__main__":
-    from sys import stdin
-
-    print(repr(json_doc.parse(stdin.read())))
+    test()
+    # print(repr(json_doc.parse(stdin.read())))
diff --git a/examples/sequence.py b/examples/sequence.py
@@ -0,0 +1,24 @@
+from dataclasses import dataclass
+
+from parsy import regex, seq, whitespace
+
+
+@dataclass
+class Person:
+    name: str
+    age: int
+    note: str
+
+
+person_arg_sequence = seq(
+    regex(r"\w+"),
+    whitespace >> regex(r"\d+").map(int),
+    whitespace.then(regex(r".+")),
+)
+person_parser = person_arg_sequence.combine(Person)
+
+person = person_parser.parse("Rob 1000 pretty old")
+
+print(person)
+
+assert person == Person(name="Rob", age=1000, note="pretty old")
diff --git a/examples/simple_eval.py b/examples/simple_eval.py
@@ -1,70 +1,70 @@
-from parsy import digit, generate, match_item, regex, string, success, test_item
+# from parsy import digit, generate, match_char, regex, string, success
 
 
-def lexer(code):
-    whitespace = regex(r"\s*")
-    integer = digit.at_least(1).concat().map(int)
-    float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float)
-    parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many()
-    return parser.parse(code)
+# def lexer(code):
+#     whitespace = regex(r"\s*")
+#     integer = digit.at_least(1).concat().map(int)
+#     float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float)
+#     parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many()
+#     return parser.parse(code)
 
 
-def eval_tokens(tokens):
-    # This function parses and evaluates at the same time.
+# def eval_tokens(tokens):
+#     # This function parses and evaluates at the same time.
 
-    lparen = match_item("(")
-    rparen = match_item(")")
+#     lparen = match_char("(")
+#     rparen = match_char(")")
 
-    @generate
-    def additive():
-        res = yield multiplicative
-        sign = match_item("+") | match_item("-")
-        while True:
-            operation = yield sign | success("")
-            if not operation:
-                break
-            operand = yield multiplicative
-            if operation == "+":
-                res += operand
-            elif operation == "-":
-                res -= operand
-        return res
+#     @generate
+#     def additive():
+#         res = yield multiplicative
+#         sign = match_char("+") | match_char("-")
+#         while True:
+#             operation = yield sign | success("")
+#             if not operation:
+#                 break
+#             operand = yield multiplicative
+#             if operation == "+":
+#                 res += operand
+#             elif operation == "-":
+#                 res -= operand
+#         return res
 
-    @generate
-    def multiplicative():
-        res = yield simple
-        op = match_item("*") | match_item("/")
-        while True:
-            operation = yield op | success("")
-            if not operation:
-                break
-            operand = yield simple
-            if operation == "*":
-                res *= operand
-            elif operation == "/":
-                res /= operand
-        return res
+#     @generate
+#     def multiplicative():
+#         res = yield simple
+#         op = match_char("*") | match_char("/")
+#         while True:
+#             operation = yield op | success("")
+#             if not operation:
+#                 break
+#             operand = yield simple
+#             if operation == "*":
+#                 res *= operand
+#             elif operation == "/":
+#                 res /= operand
+#         return res
 
-    @generate
-    def number():
-        sign = yield match_item("+") | match_item("-") | success("+")
-        value = yield test_item(lambda x: isinstance(x, (int, float)), "number")
-        return value if sign == "+" else -value
+#     @generate
+#     def number():
+#         sign = yield match_char("+") | match_char("-") | success("+")
+#         value = yield test_item(lambda x: isinstance(x, (int, float)), "number")
+#         return value if sign == "+" else -value
 
-    expr = additive
-    simple = (lparen >> expr << rparen) | number
+#     expr = additive
+#     simple = (lparen >> expr << rparen) | number
 
-    return expr.parse(tokens)
+#     return expr.parse(tokens)
 
 
-def simple_eval(expr):
-    return eval_tokens(lexer(expr))
+# def simple_eval(expr):
+#     return eval_tokens(lexer(expr))
 
 
-import pytest  # noqa  isort:skip
+# import pytest  # noqa  isort:skip
 
-test_item = pytest.mark.skip(test_item)  # This is not a test
+# test_item = pytest.mark.skip(test_item)  # This is not a test
 
 
-if __name__ == "__main__":
-    print(simple_eval(input()))
+# if __name__ == "__main__":
+#     print(simple_eval(input()))
diff --git a/examples/simple_logo_lexer.py b/examples/simple_logo_lexer.py
@@ -8,14 +8,16 @@
 etc.
 """
 
-from parsy import eof, regex, string, string_from, whitespace, Parser
+from dataclasses import dataclass
+
+from parsy import dataclass_parser, eof, parser_field, regex, string, string_from, whitespace
 
 command = string_from("fd", "bk", "rt", "lt")
 number = regex(r"[0-9]+").map(int)
 optional_whitespace = regex(r"\s*")
 eol = string("\n")
-line = (optional_whitespace >> command) & (whitespace >> number) & (eof | eol | (whitespace >> eol)).result("\n")
-lexer: Parser[list[object]] = line.many().map(lambda lines: sum(([t0, t1, t2] for ((t0, t1), t2) in lines), []))
+line = (optional_whitespace >> command).join(whitespace >> number) << (eof | eol | (whitespace >> eol))
+lexer = line.many()
 
 
 def test_lexer() -> None:
@@ -25,5 +27,28 @@ def test_lexer() -> None:
 bk 2
 """
         )
-        == ["fd", 1, "\n", "bk", 2, "\n"]
+        == [("fd", 1), ("bk", 2)]
+    )
+
+
+"""
+Alternative which creates a more structured output
+"""
+
+
+@dataclass
+class Instruction:
+    command: str = parser_field(optional_whitespace >> command)
+    distance: int = parser_field(whitespace >> number << (eof | eol | (whitespace >> eol)))
+
+
+instruction_parser = dataclass_parser(Instruction).many()
+
+assert (
+    instruction_parser.parse(
+        """fd 1
+bk 2
+"""
     )
+    == [Instruction("fd", 1), Instruction("bk", 2)]
+)
diff --git a/examples/simple_logo_parser.py b/examples/simple_logo_parser.py
diff --git a/parsy/__init__.py b/parsy/__init__.py
diff --git a/src/parsy/py.typed → parsy/py.typed b/src/parsy/py.typed → parsy/py.typed
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,3 +10,24 @@ default_section = "THIRDPARTY"
 skip = [".tox", ".git", "docs", "dist", "build" , "todo"]
 known_first_party = "parsy"
 
+[tool.poetry]
+name = "parsy"
+version = "0.1.0"
+description = ""
+authors = ["Your Name <you@example.com>"]
+readme = "README.rst"
+packages = [{include = "parsy"}]
+
+[tool.poetry.dependencies]
+python = "^3.7"
+typing-extensions = "^4.5.0"
+
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.3.1"
+mypy = "^1.3.0"
+black = "^23.3.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/setup.py b/setup.py
@@ -42,4 +42,5 @@
     keywords="parser parsers parsing monad combinators",
     packages=find_packages("src"),
     package_dir={"": "src"},
+    install_requires=["typing-extensions"],
 )
diff --git a/src/parsy/__init__.py b/src/parsy/__init__.py
diff --git a/src/parsy/version.py b/src/parsy/version.py
diff --git a/tests/test_parsy.py b/tests/test_parsy.py
diff --git a/tests/test_sexpr.py b/tests/test_sexpr.py
@@ -1,13 +1,18 @@
-import re
 import unittest
+from typing import List, TypeVar, Union
 
-from parsy import generate, regex, string
+from parsy import Parser, ParserReference, generate, regex, string
 
-whitespace = regex(r"\s+", re.MULTILINE)
+whitespace = regex(r"\s+")
 comment = regex(r";.*")
 ignore = (whitespace | comment).many()
 
-lexeme = lambda p: p << ignore
+T = TypeVar("T")
+
+
+def lexeme(parser: Parser[T]) -> Parser[T]:
+    return parser << ignore
+
 
 lparen = lexeme(string("("))
 rparen = lexeme(string(")"))
@@ -18,22 +23,21 @@
 
 atom = true | false | number | symbol
 
-
-@generate
-def form():
-    yield lparen
-    els = yield expr.many()
-    yield rparen
-    return els
+PT = Union[str, bool, int, List["PT"]]
 
 
 @generate
-def quote():
-    yield string("'")
-    e = yield expr
-    return ["quote", e]
+def _expr() -> ParserReference[PT]:
+    # expr is referred to before it's defined
+    return (yield expr)
+
 
+# expr is indirectly used via _expr
+form = lparen >> _expr.many() << rparen
+quote = string("'") >> _expr.map(lambda e: ["quote", e])
 
+# Here, expr is finally defined, combining parsers which already refer to it via
+# _expr, which creates a recursive parser
 expr = form | quote | atom
 program = ignore >> expr.many()