Skip to content

Typed seq combine alternative #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@
src/parsy.egg-info
docs/_build
.cache
__pycache__
.python-version
.venv
.vscode
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ repos:
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4
- repo: https://github.com/pycqa/flake8.git
rev: 3.9.2
hooks:
- id: flake8
language_version: python3.9
Expand Down
8 changes: 5 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ incompatible** version of parsy that has strong type guarantees, with no
This means removing anything that cannot be typed fully, and providing
alternatives. Main changes:

* Removed ``seq``, and replaced it with ``&`` operator support which returns a 2-tuple
* Removed ``seq``, and replaced it with ``join`` which creates a 2-tuple result, and
``append`` which takes an ``n``-tuple result and adds the result of another parser to
the end, producing an ``n+1``-tuple result.
* Removed ``alt`` - you can use only ``|`` operator.
* Removed ``.combine`` and ``.combine_dict`` - you have to use ``.map`` instead,
which is type-safe but much trickier, especially once you have nested tuples.
* Removed ``.combine_dict`` - you have to use ``.map`` or ``.combine`` instead,
which is type-safe but loses the benefit of keyword sequence parsers.

The docs have not been updated, you’ll need to look at the source code
if you are interested.
Expand Down
3 changes: 2 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys
from typing import List

collect_ignore: list[str] = []
collect_ignore: List[str] = []

if sys.version_info < (3, 7):
# Python 3.6 and below don't have `dataclasses`
Expand Down
7 changes: 2 additions & 5 deletions docs/ref/methods_and_combinators.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,22 +111,19 @@ can be used and manipulated as below.
Returns a parser that expects the initial parser at least ``n`` times, and
produces a list of the results.

.. method:: until(other_parser, [min=0, max=inf, consume_other=False])
.. method:: until(other_parser, [min=0, max=inf])

Returns a parser that expects the initial parser followed by ``other_parser``.
The initial parser is expected at least ``min`` times and at most ``max`` times.
By default, it does not consume ``other_parser`` and it produces a list of the
results excluding ``other_parser``. If ``consume_other`` is ``True`` then
``other_parser`` is consumed and its result is included in the list of results.
results excluding ``other_parser``.

.. code:: python

>>> seq(string('A').until(string('B')), string('BC')).parse('AAABC')
[['A','A','A'], 'BC']
>>> string('A').until(string('B')).then(string('BC')).parse('AAABC')
'BC'
>>> string('A').until(string('BC'), consume_other=True).parse('AAABC')
['A', 'A', 'A', 'BC']

.. versionadded:: 2.0

Expand Down
114 changes: 114 additions & 0 deletions examples/dataclass_parser_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from dataclasses import dataclass
from typing import List

from parsy import dataclass_parser, parser_field, regex, string

text = """Sample text

A selection of students from Riverdale High and Hogwarts took part in a quiz. This is a record of their scores.

School = Riverdale High
Grade = 1
Student number, Name
0, Phoebe
1, Rachel

Student number, Score
0, 3
1, 7

Grade = 2
Student number, Name
0, Angela
1, Tristan
2, Aurora

Student number, Score
0, 6
1, 3
2, 9

School = Hogwarts
Grade = 1
Student number, Name
0, Ginny
1, Luna

Student number, Score
0, 8
1, 7

Grade = 2
Student number, Name
0, Harry
1, Hermione

Student number, Score
0, 5
1, 10

Grade = 3
Student number, Name
0, Fred
1, George

Student number, Score
0, 0
1, 0
"""


integer = regex(r"\d+").map(int)
any_text = regex(r"[^\n]+")


@dataclass
class Student:
number: int = parser_field(integer << string(", "))
name: str = parser_field(any_text << string("\n"))


@dataclass
class Score:
number: int = parser_field(integer << string(", "))
score: int = parser_field(integer << string("\n"))


@dataclass
class StudentWithScore:
name: str
number: int
score: int


@dataclass
class Grade:
grade: int = parser_field(string("Grade = ") >> integer << string("\n"))
students: List[Student] = parser_field(
string("Student number, Name\n") >> dataclass_parser(Student).many() << regex(r"\n*")
)
scores: List[Score] = parser_field(
string("Student number, Score\n") >> dataclass_parser(Score).many() << regex(r"\n*")
)

@property
def students_with_scores(self) -> List[StudentWithScore]:
names = {st.number: st.name for st in self.students}
return [StudentWithScore(names[score.number], score.number, score.score) for score in self.scores]


@dataclass
class School:
name: str = parser_field(string("School = ") >> any_text << string("\n"))
grades: List[Grade] = parser_field(dataclass_parser(Grade).many())


@dataclass
class File:
header: str = parser_field(regex(r"[\s\S]*?(?=School =)"))
schools: List[School] = parser_field(dataclass_parser(School).many())


if __name__ == "__main__":
file = dataclass_parser(File).parse(text)
print(file.schools)
82 changes: 82 additions & 0 deletions examples/dataclass_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from dataclasses import dataclass
from typing import Optional

from parsy import dataclass_parser, parser_field, regex, string, whitespace


@dataclass
class Person:
name: str = parser_field(regex(r"\w+") << whitespace)
age: int = parser_field(regex(r"\d+").map(int) << whitespace)
note: str = parser_field(regex(".+"))


person_parser = dataclass_parser(Person)
person = person_parser.parse("Rob 2000 how time flies")
print(person)
assert person == Person(name="Rob", age=2000, note="how time flies")


# Nesting dataclass parsers


@dataclass
class Id:
id: str = parser_field(regex(r"[^\s]+") << whitespace.optional())
from_year: Optional[int] = parser_field(
regex("[0-9]+").map(int).desc("Numeric").optional() << whitespace.optional()
)


@dataclass
class Name:
name: str = parser_field(regex(r"[a-zA-Z]+") << whitespace.optional())
abbreviated: Optional[bool] = parser_field(
(string("T") | string("F")).map(lambda x: x == "T").optional() << whitespace.optional()
)


@dataclass
class PersonDetail:
id: Id = parser_field(dataclass_parser(Id))
forename: Name = parser_field(dataclass_parser(Name))
surname: Optional[Name] = parser_field(dataclass_parser(Name).optional())


out_parser = dataclass_parser(PersonDetail).many()

new_person = out_parser.parse("007 2023 Rob T John 123 2004 Bob")
print(new_person)

res = [
PersonDetail(
id=Id(id="007", from_year=2023),
forename=Name(name="Rob", abbreviated=True),
surname=Name(name="John", abbreviated=None),
),
PersonDetail(id=Id(id="123", from_year=2004), forename=Name(name="Bob", abbreviated=None), surname=None),
]

# Dataclass parsing where not all fields have a parsy parser


@dataclass
class PersonWithRarity:
name: str = parser_field(regex(r"\w+") << whitespace)
age: int = parser_field(regex(r"\d+").map(int) << whitespace)
note: str = parser_field(regex(".+"))
rare: bool = False

def __post_init__(self):
if self.age > 70:
self.rare = True


person_parser = dataclass_parser(PersonWithRarity)
person = person_parser.parse("Rob 20 whippersnapper")
print(person)
assert person == PersonWithRarity(name="Rob", age=20, note="whippersnapper", rare=False)

person = person_parser.parse("Rob 2000 how time flies")
print(person)
assert person == PersonWithRarity(name="Rob", age=2000, note="how time flies", rare=True)
39 changes: 39 additions & 0 deletions examples/generator_typed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from dataclasses import dataclass
from typing import Generator

from parsy import Parser, generate, regex, success, whitespace


@dataclass
class Person:
name: str
age: int
note: str


def person_parser():
@generate
def person_parser() -> Generator[Parser[str], str, Person]:
# By yielding parsers of a single type, the type system works.
# Homogeneous generator types don't exist.
name = yield regex(r"\w+") << whitespace

# But every parser starts by matching a string anyway: other types only come
# from further function logic, which doesn't need to be part of the parser when
# using a generator:
age_text = yield regex(r"\d+") << whitespace
age = int(age_text)
if age > 20:
# Parsing depends on previously parsed values
note = yield regex(".+") >> success("Older than a score")
else:
note = yield regex(".+")

return Person(name, age, note)

return person_parser


person = person_parser().parse("Rob 21 once upon a time")

print(person)
53 changes: 30 additions & 23 deletions examples/json.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import TypeVar
from parsy import Parser, forward_declaration, regex, string
from typing import Dict, List, TypeVar, Union

from parsy import Parser, ParserReference, generate, regex, string

# Utilities
whitespace = regex(r"\s*")
Expand Down Expand Up @@ -39,20 +40,27 @@ def lexeme(p: Parser[T]) -> Parser[T]:
quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"'))

# Data structures
json_value = forward_declaration()
object_pair = (quoted << colon) & json_value
json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace
array = lbrack >> json_value.sep_by(comma) << rbrack
JSON = Union[Dict[str, "JSON"], List["JSON"], str, int, float, bool, None]


@generate
def _json_parser() -> ParserReference[JSON]:
return (yield json_parser)


object_pair = (quoted << colon) & _json_parser
json_object = lbrace >> object_pair.sep_by(comma).map(lambda a: {g[0]: g[1] for g in a}) << rbrace
array = lbrack >> _json_parser.sep_by(comma) << rbrack

# Everything
json_value.become(quoted | number | json_object | array | true | false | null)
json_doc = whitespace >> json_value
json_parser = quoted | number | json_object | array | true | false | null

json_doc = whitespace >> json_parser


def test():
assert (
json_doc.parse(
r"""
result = json_doc.parse(
r"""
{
"int": 1,
"string": "hello",
Expand All @@ -62,19 +70,18 @@ def test():
"other": [true, false, null]
}
"""
)
== {
"int": 1,
"string": "hello",
"a list": [1, 2, 3],
"escapes": "\n ⓒ",
"nested": {"x": "y"},
"other": [True, False, None],
}
)
print(result)
assert result == {
"int": 1,
"string": "hello",
"a list": [1, 2, 3],
"escapes": "\n ⓒ",
"nested": {"x": "y"},
"other": [True, False, None],
}


if __name__ == "__main__":
from sys import stdin

print(repr(json_doc.parse(stdin.read())))
test()
# print(repr(json_doc.parse(stdin.read())))
Loading