Skip to content

Commit 91d4cf6

Browse files
committed
Python: Update python.tsg
First, we extend the various location overriding hacks to also accept list and dict splats in various places. Having done this, we then have to tackle how to actually desugar these new comprehension forms (as this is what we currently do for the old forms). As a reminder, a list comprehension like `[x for x in y]` currently gets desugared into a small local function, something like ```python def listcomp(a): for x in a: yield x listcomp(y) ``` For `[*x for x in y]`, the behaviour we want is that we unpack `x` before yielding its elements in turn. This is essentially what we would get if we were to use `yield from x` instead of `yield x` in the above desugaring, so that's what we do. This also works for set comprehensions. For dict comprehensions, it's slightly more complicated. Here, the generator function instead yields a stream of `(key, value)` tuples. (And apparently the old parser got this wrong and emitted `(value, key)` pairs instead, which we faithfully recreated in the new parser as well. We fix that bug in both parsers while we're at it). So, a bare `yield from` is not enough, we also need a `.items()` call to get the double-starred expression to emit its items as a stream of tuples (that we then `yield from`. To make this (hopefully) less verbose in the implementation, we defer the decision of whether to use `yield` or `yield from` by introducing a `yield_kind` scoped variable that determines the type of the actual AST node. And of course for dict comprehensions with unpacking we need to synthesise the extra machinery mentioned above. On the plus side, this means we don't have to mess with control-flow, as the existing machinery should be able to handle the desugared syntax just fine.
1 parent 97086c3 commit 91d4cf6

File tree

1 file changed

+66
-12
lines changed

1 file changed

+66
-12
lines changed

python/extractor/tsg-python/python.tsg

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@
403403

404404
;;; GeneratorExp
405405

406-
(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @generatorexp
406+
(generator_expression . "(" . (comment)* . [(expression) (list_splat)] @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @generatorexp
407407
{
408408
attr (@generatorexp.node) _location_start = (location-start @start)
409409
attr (@generatorexp.node) _location_end = (location-end @end)
@@ -415,13 +415,13 @@
415415
attr (@if.node) _location_end = (location-end @expr)
416416
}
417417

418-
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @_genexpr
418+
(generator_expression . "(" . (comment)* . [(expression) (list_splat)] @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @_genexpr
419419
{
420420
attr (@child.node) _location_start = (location-start @start)
421421
attr (@child.node) _location_end = (location-end @end)
422422
}
423423

424-
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @_genexpr
424+
(generator_expression . "(" . (comment)* . [(expression) (list_splat)] @start (for_in_clause) @end . (comment)* . ")" .) @_genexpr
425425
{
426426
attr (@end.node) _location_start = (location-start @start)
427427
attr (@end.node) _location_end = (location-end @end)
@@ -824,6 +824,29 @@
824824
attr (@genexpr.arg_use) ctx = "load"
825825
}
826826

827+
; DictComp with unpacking (PEP 798): `{**d for d in dicts}`
828+
(dictionary_comprehension
829+
body: (dictionary_splat)
830+
) @genexpr
831+
{
832+
let @genexpr.fun = (ast-node @genexpr "Function")
833+
attr (@genexpr.node) function = @genexpr.fun
834+
attr (@genexpr.fun) name = "dictcomp"
835+
836+
let @genexpr.arg = (ast-node @genexpr "Name")
837+
attr (@genexpr.arg) variable = ".0"
838+
attr (@genexpr.arg) ctx = "param"
839+
840+
edge @genexpr.fun -> @genexpr.arg
841+
attr (@genexpr.fun -> @genexpr.arg) args = 0
842+
attr (@genexpr.fun) kwonlyargs = #null
843+
attr (@genexpr.fun) kwarg = #null
844+
845+
let @genexpr.arg_use = (ast-node @genexpr "Name")
846+
attr (@genexpr.arg_use) variable = ".0"
847+
attr (@genexpr.arg_use) ctx = "load"
848+
}
849+
827850
;;;;;; End of DictComp (`{a: b for c in d if e}`)
828851

829852
;;;;;; GeneratorExp (`(a for b in c if d)`)
@@ -862,7 +885,7 @@
862885
; information for the entire generator expression (yes, it is a wide parameter!) and so we must recreate the logic for
863886
; setting this location information correctly.
864887

865-
(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
888+
(generator_expression . "(" . (comment)* . [(expression) (list_splat)] @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
866889
{
867890
; Synthesize the `genexpr` function
868891
let @genexpr.fun = (ast-node @genexpr "Function")
@@ -1034,12 +1057,25 @@
10341057
; For everything except dictionary comprehensions, the innermost expression is just the `body` of the
10351058
; comprehension.
10361059
[
1037-
(generator_expression body: (_) @body) @genexpr
1038-
(list_comprehension body: (_) @body) @genexpr
1039-
(set_comprehension body: (_) @body) @genexpr
1060+
(generator_expression body: (expression) @body) @genexpr
1061+
(list_comprehension body: (expression) @body) @genexpr
1062+
(set_comprehension body: (expression) @body) @genexpr
10401063
]
10411064
{
10421065
let @genexpr.result = @body.node
1066+
let @genexpr.yield_kind = "Yield"
1067+
}
1068+
1069+
; For starred comprehensions (PEP 798), the result is the inner expression (not the Starred
1070+
; wrapper), and we use `yield from` instead of `yield` to represent the unpacking semantics.
1071+
[
1072+
(generator_expression body: (list_splat (expression) @inner) @_body) @genexpr
1073+
(list_comprehension body: (list_splat (expression) @inner) @_body) @genexpr
1074+
(set_comprehension body: (list_splat (expression) @inner) @_body) @genexpr
1075+
]
1076+
{
1077+
let @genexpr.result = @inner.node
1078+
let @genexpr.yield_kind = "YieldFrom"
10431079
}
10441080

10451081
; For dict comprehensions, we build an explicit tuple using the key and value pair.
@@ -1052,13 +1088,31 @@
10521088
{
10531089
let tuple = (ast-node @body "Tuple")
10541090
edge tuple -> @key.node
1055-
attr (tuple -> @key.node) elts = 1
1091+
attr (tuple -> @key.node) elts = 0
10561092
edge tuple -> @value.node
1057-
attr (tuple -> @value.node) elts = 0
1058-
; TODO verify that it is correct to use a `(value, key)` tuple, and not a `(key, value)` tuple above.
1059-
; That is what the current parser does...
1093+
attr (tuple -> @value.node) elts = 1
10601094
attr (tuple) ctx = "load"
10611095
let @genexpr.result = tuple
1096+
let @genexpr.yield_kind = "Yield"
1097+
}
1098+
1099+
; For dict comprehensions with unpacking (PEP 798), `{**d for d in dicts}` desugars to
1100+
; `yield from d.items()` to produce (key, value) tuples consistent with the regular dict comp model.
1101+
(dictionary_comprehension
1102+
body: (dictionary_splat (expression) @inner) @_body
1103+
) @genexpr
1104+
{
1105+
; Synthesize `d.items()`: Attribute(value=d, attr='items') then Call(func=attr)
1106+
let attr = (ast-node @inner "Attribute")
1107+
attr (attr) value = @inner.node
1108+
attr (attr) attr = "items"
1109+
attr (attr) ctx = "load"
1110+
1111+
let call = (ast-node @inner "Call")
1112+
attr (call) func = attr
1113+
1114+
let @genexpr.result = call
1115+
let @genexpr.yield_kind = "YieldFrom"
10621116
}
10631117

10641118
; For the final clause, we need to hook it up with the rest of the expression.
@@ -1094,7 +1148,7 @@
10941148
let last = (get-last-element @last_candidates)
10951149

10961150
let expr = (ast-node @body "Expr")
1097-
let yield = (ast-node @body "Yield")
1151+
let yield = (ast-node @body @genexpr.yield_kind)
10981152

10991153
let @genexpr.expr = expr
11001154
let @genexpr.yield = yield

0 commit comments

Comments
 (0)