Skip to content

Commit 8a685e6

Browse files
authored
Expression cleanup and optimization. (#174)
* Expression cleanup and optimization. * Remove obsolete draft-2 pluggable expression engine. * Unify string interpolation for javascript / non-javascript * Check for simple parameter references and avoid calling out to full javascript * Remember successful execution of "docker pull node:slim"
1 parent 692fcf9 commit 8a685e6

File tree

3 files changed

+179
-237
lines changed

3 files changed

+179
-237
lines changed

cwltool/expression.py

Lines changed: 129 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,93 +1,109 @@
1-
from . import docker
21
import subprocess
32
import json
4-
from .utils import aslist, get_feature
53
import logging
64
import os
7-
from .errors import WorkflowException
5+
import re
6+
7+
from typing import Any, AnyStr, Union, Text, Dict, List
88
import schema_salad.validate as validate
99
import schema_salad.ref_resolver
10+
11+
from .utils import aslist, get_feature
12+
from .errors import WorkflowException
1013
from . import sandboxjs
11-
import re
12-
from typing import Any, AnyStr, Union, Text
14+
from . import docker
1315

1416
_logger = logging.getLogger("cwltool")
1517

1618
def jshead(engineConfig, rootvars):
1719
# type: (List[Text], Dict[Text, Any]) -> Text
1820
return u"\n".join(engineConfig + [u"var %s = %s;" % (k, json.dumps(v, indent=4)) for k, v in rootvars.items()])
1921

20-
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
21-
# type: (Dict[Text, Any], Dict[Text, Union[Dict, List, Text]], List[Dict[Text, Any]], Text, Text, Any, bool) -> sandboxjs.JSON
22-
23-
if ex["engine"] == "https://w3id.org/cwl/cwl#JavascriptEngine":
24-
engineConfig = [] # type: List[Text]
25-
for r in reversed(requirements):
26-
if r["class"] == "ExpressionEngineRequirement" and r["id"] == "https://w3id.org/cwl/cwl#JavascriptEngine":
27-
engineConfig = r.get("engineConfig", [])
28-
break
29-
rootvars = {
30-
u"inputs": jobinput,
31-
u"self": context,
32-
u"runtime": {
33-
u"tmpdir": tmpdir,
34-
u"outdir": outdir }
35-
}
36-
return sandboxjs.execjs(ex["script"], jshead(engineConfig, rootvars))
37-
38-
for r in reversed(requirements):
39-
if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]:
40-
runtime = [] # type: List[str]
41-
42-
class DR(object):
43-
def __init__(self): # type: () -> None
44-
self.requirements = None # type: List[None]
45-
self.hints = None # type: List[None]
46-
dr = DR()
47-
dr.requirements = r.get("requirements", [])
48-
dr.hints = r.get("hints", [])
49-
50-
(docker_req, docker_is_req) = get_feature(dr, "DockerRequirement")
51-
img_id = None
52-
if docker_req:
53-
img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
54-
if img_id:
55-
runtime = ["docker", "run", "-i", "--rm", str(img_id)]
56-
57-
inp = {
58-
"script": ex["script"],
59-
"engineConfig": r.get("engineConfig", []),
60-
"job": jobinput,
61-
"context": context,
62-
"outdir": outdir,
63-
"tmpdir": tmpdir,
64-
}
65-
66-
_logger.debug(u"Invoking expression engine %s with %s",
67-
runtime + aslist(r["engineCommand"]),
68-
json.dumps(inp, indent=4))
69-
70-
sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
71-
shell=False,
72-
close_fds=True,
73-
stdin=subprocess.PIPE,
74-
stdout=subprocess.PIPE)
75-
76-
(stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
77-
if sp.returncode != 0:
78-
raise WorkflowException(u"Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4))
79-
80-
return json.loads(stdoutdata)
81-
82-
raise WorkflowException(u"Unknown expression engine '%s'" % ex["engine"])
83-
8422
seg_symbol = r"""\w+"""
8523
seg_single = r"""\['([^']|\\')+'\]"""
8624
seg_double = r"""\["([^"]|\\")+"\]"""
8725
seg_index = r"""\[[0-9]+\]"""
8826
segments = r"(\.%s|%s|%s|%s)" % (seg_symbol, seg_single, seg_double, seg_index)
8927
segment_re = re.compile(segments, flags=re.UNICODE)
90-
param_re = re.compile(r"\$\((%s)%s*\)" % (seg_symbol, segments), flags=re.UNICODE)
28+
param_re = re.compile(r"\((%s)%s*\)$" % (seg_symbol, segments), flags=re.UNICODE)
29+
30+
JSON = Union[Dict[Any,Any], List[Any], Text, int, long, float, bool, None]
31+
32+
class SubstitutionError(Exception):
33+
pass
34+
35+
def scanner(scan): # type: (Text) -> List[int]
36+
DEFAULT = 0
37+
DOLLAR = 1
38+
PAREN = 2
39+
BRACE = 3
40+
SINGLE_QUOTE = 4
41+
DOUBLE_QUOTE = 5
42+
BACKSLASH = 6
43+
44+
i = 0
45+
stack = [DEFAULT]
46+
start = 0
47+
while i < len(scan):
48+
state = stack[-1]
49+
c = scan[i]
50+
51+
if state == DEFAULT:
52+
if c == '$':
53+
stack.append(DOLLAR)
54+
elif c == '\\':
55+
stack.append(BACKSLASH)
56+
elif state == BACKSLASH:
57+
stack.pop()
58+
if stack[-1] == DEFAULT:
59+
return [i-1, i+1]
60+
elif state == DOLLAR:
61+
if c == '(':
62+
start = i-1
63+
stack.append(PAREN)
64+
elif c == '{':
65+
start = i-1
66+
stack.append(BRACE)
67+
else:
68+
stack.pop()
69+
elif state == PAREN:
70+
if c == '(':
71+
stack.append(PAREN)
72+
elif c == ')':
73+
stack.pop()
74+
if stack[-1] == DOLLAR:
75+
return [start, i+1]
76+
elif c == "'":
77+
stack.append(SINGLE_QUOTE)
78+
elif c == '"':
79+
stack.append(DOUBLE_QUOTE)
80+
elif state == BRACE:
81+
if c == '{':
82+
stack.append(BRACE)
83+
elif c == '}':
84+
stack.pop()
85+
if stack[-1] == DOLLAR:
86+
return [start, i+1]
87+
elif c == "'":
88+
stack.append(SINGLE_QUOTE)
89+
elif c == '"':
90+
stack.append(DOUBLE_QUOTE)
91+
elif state == SINGLE_QUOTE:
92+
if c == "'":
93+
stack.pop()
94+
elif c == '\\':
95+
stack.append(BACKSLASH)
96+
elif state == DOUBLE_QUOTE:
97+
if c == '"':
98+
stack.pop()
99+
elif c == '\\':
100+
stack.append(BACKSLASH)
101+
i += 1
102+
103+
if len(stack) > 1:
104+
raise SubstitutionError("Substitution error, unfinished block starting at position {}: {}".format(start, scan[start:]))
105+
else:
106+
return None
91107

92108
def next_seg(remain, obj): # type: (Text, Any)->Text
93109
if remain:
@@ -103,24 +119,42 @@ def next_seg(remain, obj): # type: (Text, Any)->Text
103119
else:
104120
return obj
105121

106-
107-
def param_interpolate(ex, obj, strip=True):
108-
# type: (Text, Dict[Any, Any], bool) -> Union[Text, Text]
109-
m = param_re.search(ex)
122+
def evaluator(ex, jslib, obj, fullJS=False, timeout=None):
123+
# type: (Text, Text, Dict[Text, Any], bool, int) -> JSON
124+
m = param_re.match(ex)
110125
if m:
111-
leaf = next_seg(m.group(0)[m.end(1) - m.start(0):-1], obj[m.group(1)])
112-
if strip and len(ex.strip()) == len(m.group(0)):
113-
return leaf
114-
else:
115-
leaf = json.dumps(leaf, sort_keys=True)
126+
return next_seg(m.group(0)[m.end(1) - m.start(0):-1], obj[m.group(1)])
127+
elif fullJS:
128+
return sandboxjs.execjs(ex, jslib, timeout=timeout)
129+
else:
130+
raise sandboxjs.JavascriptException("Syntax error in parameter reference '%s' or used Javascript code without specifying InlineJavascriptRequirement.", ex)
131+
132+
def interpolate(scan, rootvars,
133+
timeout=None, fullJS=None, jslib=""):
134+
# type: (Text, Dict[Text, Any], int, bool, Union[str, Text]) -> JSON
135+
scan = scan.strip()
136+
parts = []
137+
w = scanner(scan)
138+
while w:
139+
parts.append(scan[0:w[0]])
140+
141+
if scan[w[0]] == '$':
142+
e = evaluator(scan[w[0]+1:w[1]], jslib, rootvars, fullJS=fullJS,
143+
timeout=timeout)
144+
if w[0] == 0 and w[1] == len(scan):
145+
return e
146+
leaf = json.dumps(e, sort_keys=True)
116147
if leaf[0] == '"':
117148
leaf = leaf[1:-1]
118-
return ex[0:m.start(0)] + leaf + param_interpolate(ex[m.end(0):], obj, False)
119-
else:
120-
if "$(" in ex or "${" in ex:
121-
_logger.warn(u"Warning possible workflow bug: found '$(' or '${' in '%s' but did not match valid parameter reference and InlineJavascriptRequirement not specified.", ex)
122-
return ex
149+
parts.append(leaf)
150+
elif scan[w[0]] == '\\':
151+
e = scan[w[1]-1]
152+
parts.append(e)
123153

154+
scan = scan[w[1]:]
155+
w = scanner(scan)
156+
parts.append(scan)
157+
return ''.join(parts)
124158

125159
def do_eval(ex, jobinput, requirements, outdir, tmpdir, resources,
126160
context=None, pull_image=True, timeout=None):
@@ -135,13 +169,19 @@ def do_eval(ex, jobinput, requirements, outdir, tmpdir, resources,
135169
u"self": context,
136170
u"runtime": runtime }
137171

138-
if isinstance(ex, dict) and "engine" in ex and "script" in ex:
139-
return exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image)
140172
if isinstance(ex, (str, Text)):
173+
fullJS = False
174+
jslib = u""
141175
for r in reversed(requirements):
142176
if r["class"] == "InlineJavascriptRequirement":
143-
return sandboxjs.interpolate(Text(ex), jshead(
144-
r.get("expressionLib", []), rootvars), timeout=timeout)
145-
return param_interpolate(Text(ex), rootvars)
177+
fullJS = True
178+
jslib = jshead(r.get("expressionLib", []), rootvars)
179+
break
180+
181+
return interpolate(ex,
182+
rootvars,
183+
timeout=timeout,
184+
fullJS=fullJS,
185+
jslib=jslib)
146186
else:
147187
return ex

cwltool/sandboxjs.py

Lines changed: 6 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ class JavascriptException(Exception):
1313

1414
JSON = Union[Dict[Any,Any], List[Any], Text, int, long, float, bool, None]
1515

16-
def execjs(js, jslib, timeout=None): # type: (Union[Mapping,Text], Any, int) -> JSON
16+
have_node_slim = False
17+
18+
def execjs(js, jslib, timeout=None): # type: (Union[Mapping, Text], Any, int) -> JSON
1719
nodejs = None
1820
trynodes = ("nodejs", "node")
1921
for n in trynodes:
@@ -29,10 +31,11 @@ def execjs(js, jslib, timeout=None): # type: (Union[Mapping,Text], Any, int) ->
2931
if nodejs is None:
3032
try:
3133
nodeimg = "node:slim"
32-
dlist = subprocess.check_output(["docker", "images", nodeimg])
33-
if "node" not in dlist:
34+
global have_node_slim
35+
if not have_node_slim:
3436
nodejsimg = subprocess.check_output(["docker", "pull", nodeimg])
3537
_logger.info("Pulled Docker image %s %s", nodeimg, nodejsimg)
38+
have_node_slim = True
3639
nodejs = subprocess.Popen(["docker", "run",
3740
"--attach=STDIN", "--attach=STDOUT", "--attach=STDERR",
3841
"--sig-proxy=true", "--interactive",
@@ -86,104 +89,3 @@ def fn_linenum(): # type: () -> Text
8689
return json.loads(stdoutdata)
8790
except ValueError as e:
8891
raise JavascriptException(u"%s\nscript was:\n%s\nstdout was: '%s'\nstderr was: '%s'\n" % (e, fn_linenum(), stdoutdata, stderrdata))
89-
90-
class SubstitutionError(Exception):
91-
pass
92-
93-
94-
def scanner(scan): # type: (Text) -> List[int]
95-
DEFAULT = 0
96-
DOLLAR = 1
97-
PAREN = 2
98-
BRACE = 3
99-
SINGLE_QUOTE = 4
100-
DOUBLE_QUOTE = 5
101-
BACKSLASH = 6
102-
103-
i = 0
104-
stack = [DEFAULT]
105-
start = 0
106-
while i < len(scan):
107-
state = stack[-1]
108-
c = scan[i]
109-
110-
if state == DEFAULT:
111-
if c == '$':
112-
stack.append(DOLLAR)
113-
elif c == '\\':
114-
stack.append(BACKSLASH)
115-
elif state == BACKSLASH:
116-
stack.pop()
117-
if stack[-1] == DEFAULT:
118-
return [i-1, i+1]
119-
elif state == DOLLAR:
120-
if c == '(':
121-
start = i-1
122-
stack.append(PAREN)
123-
elif c == '{':
124-
start = i-1
125-
stack.append(BRACE)
126-
elif state == PAREN:
127-
if c == '(':
128-
stack.append(PAREN)
129-
elif c == ')':
130-
stack.pop()
131-
if stack[-1] == DOLLAR:
132-
return [start, i+1]
133-
elif c == "'":
134-
stack.append(SINGLE_QUOTE)
135-
elif c == '"':
136-
stack.append(DOUBLE_QUOTE)
137-
elif state == BRACE:
138-
if c == '{':
139-
stack.append(BRACE)
140-
elif c == '}':
141-
stack.pop()
142-
if stack[-1] == DOLLAR:
143-
return [start, i+1]
144-
elif c == "'":
145-
stack.append(SINGLE_QUOTE)
146-
elif c == '"':
147-
stack.append(DOUBLE_QUOTE)
148-
elif state == SINGLE_QUOTE:
149-
if c == "'":
150-
stack.pop()
151-
elif c == '\\':
152-
stack.append(BACKSLASH)
153-
elif state == DOUBLE_QUOTE:
154-
if c == '"':
155-
stack.pop()
156-
elif c == '\\':
157-
stack.append(BACKSLASH)
158-
i += 1
159-
160-
if len(stack) > 1:
161-
raise SubstitutionError("Substitution error, unfinished block starting at position {}: {}".format(start, scan[start:]))
162-
else:
163-
return None
164-
165-
166-
def interpolate(scan, jslib, timeout=None):
167-
# type: (Text, Union[Text, Text], int) -> JSON
168-
scan = scan.strip()
169-
parts = []
170-
w = scanner(scan)
171-
while w:
172-
parts.append(scan[0:w[0]])
173-
174-
if scan[w[0]] == '$':
175-
e = execjs(scan[w[0]+1:w[1]], jslib, timeout=timeout)
176-
if w[0] == 0 and w[1] == len(scan):
177-
return e
178-
leaf = json.dumps(e, sort_keys=True)
179-
if leaf[0] == '"':
180-
leaf = leaf[1:-1]
181-
parts.append(leaf)
182-
elif scan[w[0]] == '\\':
183-
e = scan[w[1]-1]
184-
parts.append(e)
185-
186-
scan = scan[w[1]:]
187-
w = scanner(scan)
188-
parts.append(scan)
189-
return ''.join(parts)

0 commit comments

Comments
 (0)