Skip to content

Typed seq combine alternative #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -5,3 +5,7 @@
src/parsy.egg-info
docs/_build
.cache
__pycache__
.python-version
.venv
.vscode
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -4,8 +4,8 @@ repos:
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4
- repo: https://github.com/pycqa/flake8.git
rev: 3.9.2
hooks:
- id: flake8
language_version: python3.9
8 changes: 5 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
@@ -13,10 +13,12 @@ incompatible** version of parsy that has strong type guarantees, with no
This means removing anything that cannot be typed fully, and providing
alternatives. Main changes:

* Removed ``seq``, and replaced it with ``&`` operator support which returns a 2-tuple
* Removed ``seq``, and replaced it with ``join`` which creates a 2-tuple result, and
``append`` which takes an ``n``-tuple result and adds the result of another parser to
the end, producing an ``n+1``-tuple result.
* Removed ``alt`` - you can use only ``|`` operator.
* Removed ``.combine`` and ``.combine_dict`` - you have to use ``.map`` instead,
which is type-safe but much trickier, especially once you have nested tuples.
* Removed ``.combine_dict`` - you have to use ``.map`` or ``.combine`` instead,
which is type-safe but loses the benefit of keyword sequence parsers.

The docs have not been updated, you’ll need to look at the source code
if you are interested.
3 changes: 2 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys
from typing import List

collect_ignore: list[str] = []
collect_ignore: List[str] = []

if sys.version_info < (3, 7):
# Python 3.6 and below don't have `dataclasses`
7 changes: 2 additions & 5 deletions docs/ref/methods_and_combinators.rst
Original file line number Diff line number Diff line change
@@ -111,22 +111,19 @@ can be used and manipulated as below.
Returns a parser that expects the initial parser at least ``n`` times, and
produces a list of the results.

.. method:: until(other_parser, [min=0, max=inf, consume_other=False])
.. method:: until(other_parser, [min=0, max=inf])

Returns a parser that expects the initial parser followed by ``other_parser``.
The initial parser is expected at least ``min`` times and at most ``max`` times.
By default, it does not consume ``other_parser`` and it produces a list of the
results excluding ``other_parser``. If ``consume_other`` is ``True`` then
``other_parser`` is consumed and its result is included in the list of results.
results excluding ``other_parser``.

.. code:: python

>>> seq(string('A').until(string('B')), string('BC')).parse('AAABC')
[['A','A','A'], 'BC']
>>> string('A').until(string('B')).then(string('BC')).parse('AAABC')
'BC'
>>> string('A').until(string('BC'), consume_other=True).parse('AAABC')
['A', 'A', 'A', 'BC']

.. versionadded:: 2.0

114 changes: 114 additions & 0 deletions examples/dataclass_parser_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from dataclasses import dataclass
from typing import List

from parsy import dataclass_parser, parser_field, regex, string

text = """Sample text

A selection of students from Riverdale High and Hogwarts took part in a quiz. This is a record of their scores.

School = Riverdale High
Grade = 1
Student number, Name
0, Phoebe
1, Rachel

Student number, Score
0, 3
1, 7

Grade = 2
Student number, Name
0, Angela
1, Tristan
2, Aurora

Student number, Score
0, 6
1, 3
2, 9

School = Hogwarts
Grade = 1
Student number, Name
0, Ginny
1, Luna

Student number, Score
0, 8
1, 7

Grade = 2
Student number, Name
0, Harry
1, Hermione

Student number, Score
0, 5
1, 10

Grade = 3
Student number, Name
0, Fred
1, George

Student number, Score
0, 0
1, 0
"""


integer = regex(r"\d+").map(int)
any_text = regex(r"[^\n]+")


@dataclass
class Student:
number: int = parser_field(integer << string(", "))
name: str = parser_field(any_text << string("\n"))


@dataclass
class Score:
number: int = parser_field(integer << string(", "))
score: int = parser_field(integer << string("\n"))


@dataclass
class StudentWithScore:
name: str
number: int
score: int


@dataclass
class Grade:
grade: int = parser_field(string("Grade = ") >> integer << string("\n"))
students: List[Student] = parser_field(
string("Student number, Name\n") >> dataclass_parser(Student).many() << regex(r"\n*")
)
scores: List[Score] = parser_field(
string("Student number, Score\n") >> dataclass_parser(Score).many() << regex(r"\n*")
)

@property
def students_with_scores(self) -> List[StudentWithScore]:
names = {st.number: st.name for st in self.students}
return [StudentWithScore(names[score.number], score.number, score.score) for score in self.scores]


@dataclass
class School:
name: str = parser_field(string("School = ") >> any_text << string("\n"))
grades: List[Grade] = parser_field(dataclass_parser(Grade).many())


@dataclass
class File:
header: str = parser_field(regex(r"[\s\S]*?(?=School =)"))
schools: List[School] = parser_field(dataclass_parser(School).many())


if __name__ == "__main__":
file = dataclass_parser(File).parse(text)
print(file.schools)
82 changes: 82 additions & 0 deletions examples/dataclass_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from dataclasses import dataclass
from typing import Optional

from parsy import dataclass_parser, parser_field, regex, string, whitespace


@dataclass
class Person:
name: str = parser_field(regex(r"\w+") << whitespace)
age: int = parser_field(regex(r"\d+").map(int) << whitespace)
note: str = parser_field(regex(".+"))


person_parser = dataclass_parser(Person)
person = person_parser.parse("Rob 2000 how time flies")
print(person)
assert person == Person(name="Rob", age=2000, note="how time flies")


# Nesting dataclass parsers


@dataclass
class Id:
id: str = parser_field(regex(r"[^\s]+") << whitespace.optional())
from_year: Optional[int] = parser_field(
regex("[0-9]+").map(int).desc("Numeric").optional() << whitespace.optional()
)


@dataclass
class Name:
name: str = parser_field(regex(r"[a-zA-Z]+") << whitespace.optional())
abbreviated: Optional[bool] = parser_field(
(string("T") | string("F")).map(lambda x: x == "T").optional() << whitespace.optional()
)


@dataclass
class PersonDetail:
id: Id = parser_field(dataclass_parser(Id))
forename: Name = parser_field(dataclass_parser(Name))
surname: Optional[Name] = parser_field(dataclass_parser(Name).optional())


out_parser = dataclass_parser(PersonDetail).many()

new_person = out_parser.parse("007 2023 Rob T John 123 2004 Bob")
print(new_person)

res = [
PersonDetail(
id=Id(id="007", from_year=2023),
forename=Name(name="Rob", abbreviated=True),
surname=Name(name="John", abbreviated=None),
),
PersonDetail(id=Id(id="123", from_year=2004), forename=Name(name="Bob", abbreviated=None), surname=None),
]

# Dataclass parsing where not all fields have a parsy parser


@dataclass
class PersonWithRarity:
name: str = parser_field(regex(r"\w+") << whitespace)
age: int = parser_field(regex(r"\d+").map(int) << whitespace)
note: str = parser_field(regex(".+"))
rare: bool = False

def __post_init__(self):
if self.age > 70:
self.rare = True


person_parser = dataclass_parser(PersonWithRarity)
person = person_parser.parse("Rob 20 whippersnapper")
print(person)
assert person == PersonWithRarity(name="Rob", age=20, note="whippersnapper", rare=False)

person = person_parser.parse("Rob 2000 how time flies")
print(person)
assert person == PersonWithRarity(name="Rob", age=2000, note="how time flies", rare=True)
39 changes: 39 additions & 0 deletions examples/generator_typed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from dataclasses import dataclass
from typing import Generator

from parsy import Parser, generate, regex, success, whitespace


@dataclass
class Person:
name: str
age: int
note: str


def person_parser():
@generate
def person_parser() -> Generator[Parser[str], str, Person]:
# By yielding parsers of a single type, the type system works.
# Homogeneous generator types don't exist.
name = yield regex(r"\w+") << whitespace

# But every parser starts by matching a string anyway: other types only come
# from further function logic, which doesn't need to be part of the parser when
# using a generator:
age_text = yield regex(r"\d+") << whitespace
age = int(age_text)
if age > 20:
# Parsing depends on previously parsed values
note = yield regex(".+") >> success("Older than a score")
else:
note = yield regex(".+")

return Person(name, age, note)

return person_parser


person = person_parser().parse("Rob 21 once upon a time")

print(person)
53 changes: 30 additions & 23 deletions examples/json.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import TypeVar
from parsy import Parser, forward_declaration, regex, string
from typing import Dict, List, TypeVar, Union

from parsy import Parser, ParserReference, generate, regex, string

# Utilities
whitespace = regex(r"\s*")
@@ -39,20 +40,27 @@ def lexeme(p: Parser[T]) -> Parser[T]:
quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"'))

# Data structures
json_value = forward_declaration()
object_pair = (quoted << colon) & json_value
json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace
array = lbrack >> json_value.sep_by(comma) << rbrack
JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]


@generate
def _json_parser() -> ParserReference[JSON]:
return (yield json_parser)


object_pair = (quoted << colon) & _json_parser
json_object = lbrace >> object_pair.sep_by(comma).map(lambda a: {g[0]: g[1] for g in a}) << rbrace
array = lbrack >> _json_parser.sep_by(comma) << rbrack

# Everything
json_value.become(quoted | number | json_object | array | true | false | null)
json_doc = whitespace >> json_value
json_parser = quoted | number | json_object | array | true | false | null

json_doc = whitespace >> json_parser


def test():
assert (
json_doc.parse(
r"""
result = json_doc.parse(
r"""
{
"int": 1,
"string": "hello",
@@ -62,19 +70,18 @@ def test():
"other": [true, false, null]
}
"""
)
== {
"int": 1,
"string": "hello",
"a list": [1, 2, 3],
"escapes": "\n ⓒ",
"nested": {"x": "y"},
"other": [True, False, None],
}
)
print(result)
assert result == {
"int": 1,
"string": "hello",
"a list": [1, 2, 3],
"escapes": "\n ⓒ",
"nested": {"x": "y"},
"other": [True, False, None],
}


if __name__ == "__main__":
from sys import stdin

print(repr(json_doc.parse(stdin.read())))
test()
# print(repr(json_doc.parse(stdin.read())))
24 changes: 24 additions & 0 deletions examples/sequence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from dataclasses import dataclass

from parsy import regex, seq, whitespace


@dataclass
class Person:
name: str
age: int
note: str


person_arg_sequence = seq(
regex(r"\w+"),
whitespace >> regex(r"\d+").map(int),
whitespace.then(regex(r".+")),
)
person_parser = person_arg_sequence.combine(Person)

person = person_parser.parse("Rob 1000 pretty old")

print(person)

assert person == Person(name="Rob", age=1000, note="pretty old")
106 changes: 53 additions & 53 deletions examples/simple_eval.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,70 @@
from parsy import digit, generate, match_item, regex, string, success, test_item
# from parsy import digit, generate, match_char, regex, string, success


def lexer(code):
whitespace = regex(r"\s*")
integer = digit.at_least(1).concat().map(int)
float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float)
parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many()
return parser.parse(code)
# def lexer(code):
# whitespace = regex(r"\s*")
# integer = digit.at_least(1).concat().map(int)
# float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float)
# parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many()
# return parser.parse(code)


def eval_tokens(tokens):
# This function parses and evaluates at the same time.
# def eval_tokens(tokens):
# # This function parses and evaluates at the same time.

lparen = match_item("(")
rparen = match_item(")")
# lparen = match_char("(")
# rparen = match_char(")")

@generate
def additive():
res = yield multiplicative
sign = match_item("+") | match_item("-")
while True:
operation = yield sign | success("")
if not operation:
break
operand = yield multiplicative
if operation == "+":
res += operand
elif operation == "-":
res -= operand
return res
# @generate
# def additive():
# res = yield multiplicative
# sign = match_char("+") | match_char("-")
# while True:
# operation = yield sign | success("")
# if not operation:
# break
# operand = yield multiplicative
# if operation == "+":
# res += operand
# elif operation == "-":
# res -= operand
# return res

@generate
def multiplicative():
res = yield simple
op = match_item("*") | match_item("/")
while True:
operation = yield op | success("")
if not operation:
break
operand = yield simple
if operation == "*":
res *= operand
elif operation == "/":
res /= operand
return res
# @generate
# def multiplicative():
# res = yield simple
# op = match_char("*") | match_char("/")
# while True:
# operation = yield op | success("")
# if not operation:
# break
# operand = yield simple
# if operation == "*":
# res *= operand
# elif operation == "/":
# res /= operand
# return res

@generate
def number():
sign = yield match_item("+") | match_item("-") | success("+")
value = yield test_item(lambda x: isinstance(x, (int, float)), "number")
return value if sign == "+" else -value
# @generate
# def number():
# sign = yield match_char("+") | match_char("-") | success("+")
# value = yield test_item(lambda x: isinstance(x, (int, float)), "number")
# return value if sign == "+" else -value

expr = additive
simple = (lparen >> expr << rparen) | number
# expr = additive
# simple = (lparen >> expr << rparen) | number

return expr.parse(tokens)
# return expr.parse(tokens)


def simple_eval(expr):
return eval_tokens(lexer(expr))
# def simple_eval(expr):
# return eval_tokens(lexer(expr))


import pytest # noqa isort:skip
# import pytest # noqa isort:skip

test_item = pytest.mark.skip(test_item) # This is not a test
# test_item = pytest.mark.skip(test_item) # This is not a test


if __name__ == "__main__":
print(simple_eval(input()))
# if __name__ == "__main__":
# print(simple_eval(input()))
33 changes: 29 additions & 4 deletions examples/simple_logo_lexer.py
Original file line number Diff line number Diff line change
@@ -8,14 +8,16 @@
etc.
"""

from parsy import eof, regex, string, string_from, whitespace, Parser
from dataclasses import dataclass

from parsy import dataclass_parser, eof, parser_field, regex, string, string_from, whitespace

command = string_from("fd", "bk", "rt", "lt")
number = regex(r"[0-9]+").map(int)
optional_whitespace = regex(r"\s*")
eol = string("\n")
line = (optional_whitespace >> command) & (whitespace >> number) & (eof | eol | (whitespace >> eol)).result("\n")
lexer: Parser[list[object]] = line.many().map(lambda lines: sum(([t0, t1, t2] for ((t0, t1), t2) in lines), []))
line = (optional_whitespace >> command).join(whitespace >> number) << (eof | eol | (whitespace >> eol))
lexer = line.many()


def test_lexer() -> None:
@@ -25,5 +27,28 @@ def test_lexer() -> None:
bk 2
"""
)
== ["fd", 1, "\n", "bk", 2, "\n"]
== [("fd", 1), ("bk", 2)]
)


"""
Alternative which creates a more structured output
"""


@dataclass
class Instruction:
command: str = parser_field(optional_whitespace >> command)
distance: int = parser_field(whitespace >> number << (eof | eol | (whitespace >> eol)))


instruction_parser = dataclass_parser(Instruction).many()

assert (
instruction_parser.parse(
"""fd 1
bk 2
"""
)
== [Instruction("fd", 1), Instruction("bk", 2)]
)
49 changes: 0 additions & 49 deletions examples/simple_logo_parser.py

This file was deleted.

784 changes: 784 additions & 0 deletions parsy/__init__.py

Large diffs are not rendered by default.

File renamed without changes.
353 changes: 353 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -10,3 +10,24 @@ default_section = "THIRDPARTY"
skip = [".tox", ".git", "docs", "dist", "build" , "todo"]
known_first_party = "parsy"

[tool.poetry]
name = "parsy"
version = "0.1.0"
description = ""
authors = ["Your Name <you@example.com>"]
readme = "README.rst"
packages = [{include = "parsy"}]

[tool.poetry.dependencies]
python = "^3.7"
typing-extensions = "^4.5.0"


[tool.poetry.group.dev.dependencies]
pytest = "^7.3.1"
mypy = "^1.3.0"
black = "^23.3.0"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -42,4 +42,5 @@
keywords="parser parsers parsing monad combinators",
packages=find_packages("src"),
package_dir={"": "src"},
install_requires=["typing-extensions"],
)
541 changes: 0 additions & 541 deletions src/parsy/__init__.py

This file was deleted.

1 change: 0 additions & 1 deletion src/parsy/version.py

This file was deleted.

327 changes: 175 additions & 152 deletions tests/test_parsy.py

Large diffs are not rendered by default.

34 changes: 19 additions & 15 deletions tests/test_sexpr.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import re
import unittest
from typing import List, TypeVar, Union

from parsy import generate, regex, string
from parsy import Parser, ParserReference, generate, regex, string

whitespace = regex(r"\s+", re.MULTILINE)
whitespace = regex(r"\s+")
comment = regex(r";.*")
ignore = (whitespace | comment).many()

lexeme = lambda p: p << ignore
T = TypeVar("T")


def lexeme(parser: Parser[T]) -> Parser[T]:
return parser << ignore


lparen = lexeme(string("("))
rparen = lexeme(string(")"))
@@ -18,22 +23,21 @@

atom = true | false | number | symbol


@generate
def form():
yield lparen
els = yield expr.many()
yield rparen
return els
PT = Union[str, bool, int, List["PT"]]


@generate
def quote():
yield string("'")
e = yield expr
return ["quote", e]
def _expr() -> ParserReference[PT]:
# expr is referred to before it's defined
return (yield expr)


# expr is indirectly used via _expr
form = lparen >> _expr.many() << rparen
quote = string("'") >> _expr.map(lambda e: ["quote", e])

# Here, expr is finally defined, combining parsers which already refer to it via
# _expr, which creates a recursive parser
expr = form | quote | atom
program = ignore >> expr.many()