Skip to content

Commit 0433da3

Browse files
authored
Packing refactor (#177)
* Refactor 'pack into single graph' functionality into separate module. * Don't modify input step object in WorkflowStep, copy it instead. * Use Text type * Add test for packing feature. * Sort output pack output for deterministic results.
1 parent 8a685e6 commit 0433da3

11 files changed

+404
-67
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ eggs/
88
.eggs/
99
*.egg-info/
1010
*.egg
11+
.tox/
1112

1213
# Editor Temps
1314
.*.sw?

cwltool/main.py

Lines changed: 3 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from . import draft2tool
3131
from .builder import adjustFileObjs, adjustDirObjs
3232
from .stdfsaccess import StdFsAccess
33+
from .pack import pack
3334

3435
_logger = logging.getLogger("cwltool")
3536

@@ -495,74 +496,9 @@ def makeRelative(ob):
495496

496497
stdout.write(json.dumps(deps, indent=4))
497498

498-
def flatten_deps(d, files): # type: (Any, Set[Text]) -> None
499-
if isinstance(d, list):
500-
for s in d:
501-
flatten_deps(s, files)
502-
elif isinstance(d, dict):
503-
files.add(d["location"])
504-
if "secondaryFiles" in d:
505-
flatten_deps(d["secondaryFiles"], files)
506-
507-
def find_run(d, runs): # type: (Any, Set[Text]) -> None
508-
if isinstance(d, list):
509-
for s in d:
510-
find_run(s, runs)
511-
elif isinstance(d, dict):
512-
if "run" in d and isinstance(d["run"], (Text, Text)):
513-
runs.add(d["run"])
514-
for s in d.values():
515-
find_run(s, runs)
516-
517-
def replace_refs(d, rewrite, stem, newstem):
518-
# type: (Any, Dict[Text, Text], Text, Text) -> None
519-
if isinstance(d, list):
520-
for s,v in enumerate(d):
521-
if isinstance(v, (str, Text)) and v.startswith(stem):
522-
d[s] = newstem + v[len(stem):]
523-
else:
524-
replace_refs(v, rewrite, stem, newstem)
525-
elif isinstance(d, dict):
526-
if "run" in d and isinstance(d["run"], (str, Text)):
527-
d["run"] = rewrite[d["run"]]
528-
for s,v in d.items():
529-
if isinstance(v, (str, Text)) and v.startswith(stem):
530-
d[s] = newstem + v[len(stem):]
531-
replace_refs(v, rewrite, stem, newstem)
532-
533499
def print_pack(document_loader, processobj, uri, metadata):
534-
# type: (Loader, Any, Text, Dict[Text, Text]) -> Text
535-
def loadref(b, u):
536-
# type: (Text, Text) -> Union[Dict, List, Text]
537-
return document_loader.resolve_ref(u, base_url=b)[0]
538-
deps = scandeps(uri, processobj, set(("run",)), set(), loadref)
539-
540-
fdeps = set((uri,))
541-
flatten_deps(deps, fdeps)
542-
543-
runs = set() # type: Set[Text]
544-
for f in fdeps:
545-
find_run(document_loader.idx[f], runs)
546-
547-
rewrite = {}
548-
if isinstance(processobj, list):
549-
for p in processobj:
550-
rewrite[p["id"]] = "#" + shortname(p["id"])
551-
else:
552-
rewrite[uri] = "#main"
553-
554-
for r in runs:
555-
rewrite[r] = "#" + shortname(r)
556-
557-
packed = {"$graph": [], "cwlVersion": metadata["cwlVersion"]
558-
} # type: Dict[Text, Any]
559-
for r,v in rewrite.items():
560-
dc = cast(Dict[Text, Any], copy.deepcopy(document_loader.idx[r]))
561-
dc["id"] = v
562-
dc["name"] = v
563-
replace_refs(dc, rewrite, r+"/" if "#" in r else r+"#", v+"/")
564-
packed["$graph"].append(dc)
565-
500+
# type: (Loader, Union[Dict[unicode, Any], List[Dict[unicode, Any]]], unicode, Dict[unicode, Any]) -> str
501+
packed = pack(document_loader, processobj, uri, metadata)
566502
if len(packed["$graph"]) > 1:
567503
return json.dumps(packed, indent=4)
568504
else:

cwltool/pack.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import copy
2+
import json
3+
4+
from schema_salad.ref_resolver import Loader
5+
6+
from .process import scandeps, shortname
7+
8+
from typing import Union, Any, cast, Callable, Dict, Tuple, Type, IO, Text
9+
10+
def flatten_deps(d, files): # type: (Any, Set[Text]) -> None
11+
if isinstance(d, list):
12+
for s in d:
13+
flatten_deps(s, files)
14+
elif isinstance(d, dict):
15+
files.add(d["location"])
16+
if "secondaryFiles" in d:
17+
flatten_deps(d["secondaryFiles"], files)
18+
19+
def find_run(d, runs): # type: (Any, Set[Text]) -> None
20+
if isinstance(d, list):
21+
for s in d:
22+
find_run(s, runs)
23+
elif isinstance(d, dict):
24+
if "run" in d and isinstance(d["run"], (str, unicode)):
25+
runs.add(d["run"])
26+
for s in d.values():
27+
find_run(s, runs)
28+
29+
def replace_refs(d, rewrite, stem, newstem):
30+
# type: (Any, Dict[Text, Text], Text, Text) -> None
31+
if isinstance(d, list):
32+
for s,v in enumerate(d):
33+
if isinstance(v, (str, unicode)) and v.startswith(stem):
34+
d[s] = newstem + v[len(stem):]
35+
else:
36+
replace_refs(v, rewrite, stem, newstem)
37+
elif isinstance(d, dict):
38+
if "package" in d:
39+
raise Exception("where the fuck did this come from %s" % json.dumps(d, indent=4))
40+
if "run" in d and isinstance(d["run"], (str, unicode)):
41+
d["run"] = rewrite[d["run"]]
42+
for s,v in d.items():
43+
if isinstance(v, (str, unicode)) and v.startswith(stem):
44+
d[s] = newstem + v[len(stem):]
45+
replace_refs(v, rewrite, stem, newstem)
46+
47+
def pack(document_loader, processobj, uri, metadata):
48+
# type: (Loader, Union[Dict[Text, Any], List[Dict[Text, Any]]], Text, Dict[Text, Text]) -> Dict[Text, Any]
49+
def loadref(b, u):
50+
# type: (Text, Text) -> Union[Dict, List, Text]
51+
return document_loader.resolve_ref(u, base_url=b)[0]
52+
deps = scandeps(uri, processobj, set(("run",)), set(), loadref)
53+
54+
fdeps = set((uri,))
55+
flatten_deps(deps, fdeps)
56+
57+
runs = set() # type: Set[Text]
58+
for f in fdeps:
59+
find_run(document_loader.idx[f], runs)
60+
61+
rewrite = {}
62+
if isinstance(processobj, list):
63+
for p in processobj:
64+
rewrite[p["id"]] = "#" + shortname(p["id"])
65+
else:
66+
rewrite[uri] = "#main"
67+
68+
for r in runs:
69+
rewrite[r] = "#" + shortname(r)
70+
71+
packed = {"$graph": [], "cwlVersion": metadata["cwlVersion"]
72+
} # type: Dict[Text, Any]
73+
74+
for r in sorted(rewrite.keys()):
75+
v = rewrite[r]
76+
dc = cast(Dict[Text, Any], copy.deepcopy(document_loader.idx[r]))
77+
dc["id"] = v
78+
for n in ("name", "package", "cwlVersion"):
79+
if n in dc:
80+
del dc[n]
81+
replace_refs(dc, rewrite, r+"/" if "#" in r else r+"#", v+"/")
82+
packed["$graph"].append(dc)
83+
84+
return packed

cwltool/process.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,17 @@ class Process(object):
312312

313313
def __init__(self, toolpath_object, **kwargs):
314314
# type: (Dict[Text, Any], **Any) -> None
315+
"""
316+
kwargs:
317+
318+
metadata: tool document metadata
319+
requirements: inherited requirements
320+
hints: inherited hints
321+
loader: schema_salad.ref_resolver.Loader used to load tool document
322+
avsc_names: CWL Avro schema object used to validate document
323+
strict: flag to determine strict validation (fail on unrecognized fields)
324+
"""
325+
315326
self.metadata = kwargs.get("metadata", {}) # type: Dict[Text,Any]
316327
self.names = None # type: avro.schema.Names
317328

@@ -338,6 +349,9 @@ def __init__(self, toolpath_object, **kwargs):
338349
if "loader" in kwargs:
339350
self.formatgraph = kwargs["loader"].graph
340351

352+
self.doc_loader = kwargs["loader"]
353+
self.doc_schema = kwargs["avsc_names"]
354+
341355
checkRequirements(self.tool, supportedProcessRequirements)
342356
self.validate_hints(kwargs["avsc_names"], self.tool.get("hints", []),
343357
strict=kwargs.get("strict"))
@@ -395,6 +409,22 @@ def __init__(self, toolpath_object, **kwargs):
395409

396410
def _init_job(self, joborder, **kwargs):
397411
# type: (Dict[Text, Text], **Any) -> Builder
412+
"""
413+
kwargs:
414+
415+
eval_timeout: javascript evaluation timeout
416+
use_container: do/don't use Docker when DockerRequirement hint provided
417+
make_fs_access: make an FsAccess() object with given basedir
418+
basedir: basedir for FsAccess
419+
docker_outdir: output directory inside docker for this job
420+
docker_tmpdir: tmpdir inside docker for this job
421+
docker_stagedir: stagedir inside docker for this job
422+
outdir: outdir on host for this job
423+
tmpdir: tmpdir on host for this job
424+
stagedir: stagedir on host for this job
425+
select_resources: callback to select compute resources
426+
"""
427+
398428
builder = Builder()
399429
builder.job = cast(Dict[Text, Union[Dict[Text, Any], List,
400430
Text]], copy.deepcopy(joborder))

cwltool/workflow.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ def __init__(self, toolpath_object, pos, **kwargs):
443443
u"Tool definition %s failed validation:\n%s" %
444444
(toolpath_object["run"], validate.indent(str(v))))
445445

446+
self.tool = toolpath_object = copy.deepcopy(toolpath_object)
446447
for stepfield, toolfield in (("in", "inputs"), ("out", "outputs")):
447448
toolpath_object[toolfield] = []
448449
for step_entry in toolpath_object[stepfield]:

tests/test_pack.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import unittest
2+
import json
3+
from cwltool.load_tool import fetch_document, validate_document
4+
import cwltool.pack
5+
import cwltool.workflow
6+
7+
class TestPack(unittest.TestCase):
8+
def test_pack(self):
9+
self.maxDiff = None
10+
11+
document_loader, workflowobj, uri = fetch_document("tests/wf/revsort.cwl")
12+
document_loader, avsc_names, processobj, metadata, uri = validate_document(
13+
document_loader, workflowobj, uri)
14+
packed = cwltool.pack.pack(document_loader, processobj, uri, metadata)
15+
with open("tests/wf/expect_packed.cwl") as f:
16+
expect_packed = json.load(f)
17+
self.assertEqual(expect_packed, packed)

tests/wf/expect_packed.cwl

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
{
2+
"cwlVersion": "v1.0",
3+
"$graph": [
4+
{
5+
"inputs": [
6+
{
7+
"doc": "The input file to be processed.",
8+
"type": "File",
9+
"id": "#main/input"
10+
},
11+
{
12+
"default": true,
13+
"doc": "If true, reverse (decending) sort",
14+
"type": "boolean",
15+
"id": "#main/reverse_sort"
16+
}
17+
],
18+
"doc": "Reverse the lines in a document, then sort those lines.",
19+
"class": "Workflow",
20+
"steps": [
21+
{
22+
"out": [
23+
"#main/rev/output"
24+
],
25+
"run": "#revtool.cwl",
26+
"id": "#main/rev",
27+
"in": [
28+
{
29+
"source": "#main/input",
30+
"id": "#main/rev/input"
31+
}
32+
]
33+
},
34+
{
35+
"out": [
36+
"#main/sorted/output"
37+
],
38+
"run": "#sorttool.cwl",
39+
"id": "#main/sorted",
40+
"in": [
41+
{
42+
"source": "#main/rev/output",
43+
"id": "#main/sorted/input"
44+
},
45+
{
46+
"source": "#main/reverse_sort",
47+
"id": "#main/sorted/reverse"
48+
}
49+
]
50+
}
51+
],
52+
"outputs": [
53+
{
54+
"outputSource": "#main/sorted/output",
55+
"type": "File",
56+
"id": "#main/output",
57+
"doc": "The output with the lines reversed and sorted."
58+
}
59+
],
60+
"id": "#main",
61+
"hints": [
62+
{
63+
"dockerPull": "debian:8",
64+
"class": "DockerRequirement"
65+
}
66+
]
67+
},
68+
{
69+
"inputs": [
70+
{
71+
"inputBinding": {},
72+
"type": "File",
73+
"id": "#revtool.cwl/input"
74+
}
75+
],
76+
"stdout": "output.txt",
77+
"doc": "Reverse each line using the `rev` command",
78+
"baseCommand": "rev",
79+
"class": "CommandLineTool",
80+
"outputs": [
81+
{
82+
"outputBinding": {
83+
"glob": "output.txt"
84+
},
85+
"type": "File",
86+
"id": "#revtool.cwl/output"
87+
}
88+
],
89+
"id": "#revtool.cwl"
90+
},
91+
{
92+
"inputs": [
93+
{
94+
"inputBinding": {
95+
"position": 1,
96+
"prefix": "--reverse"
97+
},
98+
"type": "boolean",
99+
"id": "#sorttool.cwl/reverse"
100+
},
101+
{
102+
"inputBinding": {
103+
"position": 2
104+
},
105+
"type": "File",
106+
"id": "#sorttool.cwl/input"
107+
}
108+
],
109+
"stdout": "output.txt",
110+
"doc": "Sort lines using the `sort` command",
111+
"baseCommand": "sort",
112+
"class": "CommandLineTool",
113+
"outputs": [
114+
{
115+
"outputBinding": {
116+
"glob": "output.txt"
117+
},
118+
"type": "File",
119+
"id": "#sorttool.cwl/output"
120+
}
121+
],
122+
"id": "#sorttool.cwl"
123+
}
124+
]
125+
}

tests/wf/revsort-job.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"input": {
3+
"class": "File",
4+
"location": "whale.txt"
5+
}
6+
}

0 commit comments

Comments
 (0)