Skip to content

Commit cbfc14b

Browse files
committed
excel mapping hints
1 parent 00b6c4f commit cbfc14b

File tree

7 files changed

+153
-10
lines changed

7 files changed

+153
-10
lines changed

uml2es/examples/hr/README.md

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ Confirm:
116116

117117
Next, move our UML model into ML as an ES model. Run the following:
118118

119-
gradle -PenvironmentName=local -i ingestModel deployESModelToFinal loadExtendedModel
119+
gradle -PenvironmentName=local -i ingestModel deployESModelToFinal loadExtendedModel loadMappingSpec
120120

121121
Confirm:
122122
- Final DB (xmi2es-examples-hr-FINAL) includes the following documents
@@ -137,12 +137,25 @@ Among the results, you should see the following:
137137
- <http://com.marklogic.es.uml.hr/HR-0.0.1/Employee/memberOf> <http://marklogic.com/xmi2es/xes#relationship> "association" from the extended ES model
138138

139139
### Run Cookie-Cutter to Create DHF Entities and Flows for HR Model
140-
Now we create our DHF entity plugins.
140+
Now we create our DHF entity plugins. We leverage's the toolkit's ability to cut/generate code.
141141

142+
#### 1. Create DHF Entities
142143
First, ask the toolkit to create the basic plugins (without any flows). It will infer which classes in the model should be plugins.
143144

144145
gradle -PenvironmentName=local -i umlCreateEntities -PmodelName=DHFEmployeeSample -PentitySelect=infer
145146

147+
Confirm:
148+
TODO
149+
150+
#### 2. The Excel stuff..
151+
TODO ... First, ask the toolkit to create the basic plugins (without any flows). It will infer which classes in the model should be plugins.
152+
153+
gradle -PenvironmentName=local -i umlCreateEntities -PmodelName=DHFEmployeeSample -PentitySelect=infer
154+
155+
Confirm:
156+
TODO
157+
158+
146159
Next, ask the toolkit to create harmonization flows that construct content using ES-style code.
147160

148161
gradle -PenvironmentName=local -i umlCreateHarmonizeFlow -PmodelName=DHFEmployeeSample -PflowName=harmonizeES -PentityName=Department -PpluginFormat=xqy -PdataFormat=xml -PcontentMode=es

uml2es/examples/hr/build.gradle

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,29 @@ task deployESModelToFinal(type: com.marklogic.gradle.task.MlcpTask) {
290290
The rest is cookie cutter
291291
*/
292292

293+
task loadMappingSpec(type: com.marklogic.gradle.task.MlcpTask) {
294+
def dataDir = "${projectDir}";
295+
def unixDir = dataDir.replace('\\', '/');
296+
def regexDir = unixDir + "/data/mapping"
297+
def regex = '"' + regexDir + ",'',/,''" + '"'
298+
299+
classpath = configurations.mlcp
300+
command = "IMPORT"
301+
document_type = "binary"
302+
input_file_path = regexDir + "/*.xlsx"
303+
input_file_type = "documents"
304+
output_uri_replace = regex
305+
output_uri_prefix = "/xmi2es/excel-mapper/"
306+
output_permissions = "rest-reader,read,rest-writer,update"
307+
output_collections = "xmi2es"
308+
host = mlHost
309+
port = mlFinalPort.toInteger()
310+
311+
transform_module = "/xmi2es/excelMapper.xqy"
312+
transform_namespace = "http://marklogic.com/xmi2es/xlsx/mapper"
313+
transform_param = "dummy"
314+
}
315+
293316
task deleteCutDump(type: Delete) {
294317
delete "data/cookieCutter-dump"
295318
}
256 Bytes
Binary file not shown.
428 Bytes
Binary file not shown.
221 Bytes
Binary file not shown.

uml2es/uml2esTransform/src/main/ml-modules/root/xmi2es/excel2uml.xqy

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ declare variable $VAL-YN := "y";
1818
declare variable $VAL-INT := "i";
1919
declare variable $VAL-CARDINALITY := "c";
2020

21-
declare variable $FIRST_PROP_ROW := 21;
21+
declare variable $FIRST-PROP-ROW := 21;
22+
23+
declare variable $DELIM-LINE := "[\n\r]";
24+
declare variable $DELIM-COMMA-LINE := "[\n\r,]";
2225

2326
(:
2427
Convert attributes in the excel to XMI
@@ -29,14 +32,14 @@ declare function xlsx:convertAttributes($entitySheet as node(), $classSheet as x
2932
let $attribNames := json:array()
3033

3134
(: the attributes :)
32-
let $lastPropertyRow := xlsx:excelLastRow($entitySheet, $classSheet, $stringTable, $pt)
35+
let $lastPropertyRow := xlsx:excelLastRow($entitySheet, $classSheet, $stringTable, $FIRST-PROP-ROW, $pt)
3336

3437
let $_ := xdmp:log(concat($classSheet, " last row ", $lastPropertyRow), "info")
3538

3639
let $_ := if (not(exists($lastPropertyRow))) then
3740
fn:error(xs:QName("ERROR"), concat("programming error, unable to find last row in ", $classSheet)) else ()
3841

39-
for $row in $FIRST_PROP_ROW to $lastPropertyRow return
42+
for $row in $FIRST-PROP-ROW to $lastPropertyRow return
4043
let $attribName := xlsx:excelCell($entitySheet, $classSheet, $stringTable, "A"||$row, $pt, $VAL-MANDATORY)
4144
let $attribLoc := concat($classSheet, ".", $attribName, " at ", $row)
4245
return
@@ -359,6 +362,12 @@ If there are multiple values (delimited by newline), return the sequence of them
359362
declare function xlsx:excelCell($sheet as node(), $sheetName as xs:string,
360363
$stringTable as node(), $cellCoord as xs:string, $pt, $validation as xs:string*) as xs:string* {
361364

365+
xlsx:excelCell($sheet, $sheetName, $stringTable, $cellCoord, $pt, $validation, $DELIM-LINE)
366+
};
367+
368+
declare function xlsx:excelCell($sheet as node(), $sheetName as xs:string,
369+
$stringTable as node(), $cellCoord as xs:string, $pt, $validation as xs:string*, $fieldDelim as xs:string) as xs:string* {
370+
362371
let $errorSource := concat($sheetName, ".", $cellCoord)
363372

364373
let $cell := $sheet//*:row/*:c[@*:r eq $cellCoord]
@@ -377,7 +386,7 @@ declare function xlsx:excelCell($sheet as node(), $sheetName as xs:string,
377386
let $_ := pt:addProblem($pt, (), $errorSource, "Ignoring unknown cell type", "*" || $cellType || "*")
378387
return ""
379388

380-
let $cellVals := for $tok in fn:tokenize($cellValWS, "[\n\r]") return
389+
let $cellVals := for $tok in fn:tokenize($cellValWS, $fieldDelim) return
381390
let $n := fn:normalize-space($tok)
382391
return
383392
if (string-length($n) gt 0) then $n
@@ -412,24 +421,24 @@ declare function xlsx:excelCell($sheet as node(), $sheetName as xs:string,
412421
Return the last row that in an entity sheet has a property
413422
:)
414423
declare function xlsx:excelLastRow($sheet as node(), $sheetName as xs:string,
415-
$stringTable as node(), $pt) as xs:integer? {
424+
$stringTable as node(), $firstRow as xs:integer, $pt) as xs:integer? {
416425

417426
(: find last A cell at or beyond the last prop row :)
418427
let $lastARowAttrib := $sheet//*:row[
419-
xs:integer(@*:r) ge $FIRST_PROP_ROW and
428+
xs:integer(@*:r) ge $firstRow and
420429
exists(*:c[fn:starts-with(@*:r, "A")])][last()]/@*:r
421430
return
422431
if (not(exists($lastARowAttrib))) then 0
423432
else
424433
let $lastARow := xs:integer($lastARowAttrib)
425434
let $firstEmptyRow := 0
426435
let $lastPopRow := 0
427-
let $_ := for $row in $FIRST_PROP_ROW to $lastARow return
436+
let $_ := for $row in $firstRow to $lastARow return
428437
let $cellVal := xlsx:excelCell($sheet, $sheetName, $stringTable, "A"||$row, $pt, ())
429438

430439
return
431440
if ($firstEmptyRow eq 0 and (count($cellVal) eq 0 or $cellVal eq "")) then (
432-
if ($row eq $FIRST_PROP_ROW) then () else xdmp:set($lastPopRow, $row - 1),
441+
if ($row eq $firstRow) then () else xdmp:set($lastPopRow, $row - 1),
433442
xdmp:set($firstEmptyRow, $row)
434443
)
435444
else ()
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
(:
2+
This module converts an Excel (based on our template) to XMI form.
3+
We make the Excel just like UML.
4+
:)
5+
6+
xquery version "1.0-ml";
7+
8+
module namespace xlsxm = "http://marklogic.com/xmi2es/xlsx/mapper";
9+
import module namespace pt = "http://marklogic.com/xmi2es/problemTracker" at "/xmi2es/problemTracker.xqy";
10+
import module namespace xlsx = "http://marklogic.com/xmi2es/xlsx" at "/xmi2es/excel2uml.xqy";
11+
12+
declare variable $FIRST-PROP-ROW := 13;
13+
14+
(:
15+
Convert the excel to JSON. Put errors in $pt
16+
:)
17+
declare function xlsxm:convert($excel, $pt) as json:object {
18+
let $json := json:object()
19+
20+
(: Get the stuff we need from the xlsx file :)
21+
let $stringTable := xdmp:zip-get($excel, "xl/sharedStrings.xml")/node()
22+
let $contents := xdmp:zip-get($excel, "[Content_Types].xml")/node()
23+
let $mappingSheet :=
24+
if (exists($contents/*:Override[@PartName eq "/xl/worksheets/sheet2.xml"])) then xdmp:zip-get($excel, "xl/worksheets/sheet2.xml")/node()
25+
else pt:addProblem($pt, (), "excel", "No mapping sheet found", ())
26+
let $entitySheets :=
27+
for $sheet in $contents/*:Override[
28+
@ContentType eq "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"
29+
and @PartName ne "/xl/worksheets/sheet1.xml"
30+
and @PartName ne "/xl/worksheets/sheet2.xml"]/@PartName
31+
return xdmp:zip-get($excel, fn:substring($sheet, 2))/node()
32+
let $mapping := json:object()
33+
let $entities := json:object()
34+
let $_ := (
35+
map:put($json, "mapping", $mapping),
36+
map:put($mapping, "source", xlsx:excelCell($mappingSheet, "mapping", $stringTable, "B1", $pt, ())),
37+
map:put($mapping, "notes", xlsx:excelCell($mappingSheet, "mapping", $stringTable, "B2", $pt, ())),
38+
map:put($json, "entities", $entities)
39+
)
40+
let $_ := for $entitySheet at $pos in $entitySheets return
41+
let $entityName := xlsx:excelCell($entitySheet, concat("Sheet at ", ($pos + 2)), $stringTable, "B1", $xlsx:VAL-MANDATORY, ())
42+
return
43+
if (string-length($entityName) eq 0) then ()
44+
else if (map:contains($entities, $entityName)) then pt:addProblem($pt, (), $entityName, "Ignoring duplicate entity", ($pos + 2))
45+
else
46+
let $thisEntity := json:object()
47+
let $thisAttributes := json:object()
48+
let $_ := map:put($entities, $entityName, $thisEntity)
49+
let $_ := map:put($thisEntity, "source", xlsx:excelCell($entitySheet, $entityName, $stringTable, "B2", $pt, ()))
50+
let $_ := map:put($thisEntity, "notes", xlsx:excelCell($entitySheet, $entityName, $stringTable, "B3", $pt, ()))
51+
let $_ := map:put($thisEntity, "discoveryCollections", xlsx:excelCell($entitySheet, $entityName, $stringTable, "B4", $pt, (), $xlsx:DELIM-COMMA-LINE))
52+
let $_ := map:put($thisEntity, "discoveryURIPatterns", xlsx:excelCell($entitySheet, $entityName, $stringTable, "B5", $pt, ()))
53+
let $_ := map:put($thisEntity, "discoverySampleData", xlsx:excelCell($entitySheet, $entityName, $stringTable, "B6", $pt, ()))
54+
let $_ := map:put($thisEntity, "attributes", $thisAttributes)
55+
for $row in $FIRST-PROP-ROW to xlsx:excelLastRow($entitySheet, $entityName, $stringTable, $FIRST-PROP-ROW, $pt) return
56+
let $attribName := xlsx:excelCell($entitySheet, $entityName, $stringTable, "A"||$row, $pt, $xlsx:VAL-MANDATORY)
57+
return
58+
if (string-length($attribName) eq 0) then ()
59+
else if (map:contains($thisAttributes, $attribName)) then pt:addProblem($pt, (), $attribName, "Ignoring duplicate attribute", $row)
60+
else
61+
let $thisAttrib := json:object()
62+
return (
63+
map:put($thisAttributes, $attribName, $thisAttrib),
64+
map:put($thisAttrib, "mapping", xlsx:excelCell($entitySheet, $entityName, $stringTable, "B" || $row, $pt, ())),
65+
map:put($thisAttrib, "notes", xlsx:excelCell($entitySheet, $entityName, $stringTable, "C" || $row, $pt, ())),
66+
map:put($thisAttrib, "discoverySampleData", xlsx:excelCell($entitySheet, $entityName, $stringTable, "D" || $row, $pt, ())),
67+
map:put($thisAttrib, "discoveryAKA", xlsx:excelCell($entitySheet, $entityName, $stringTable, "E" || $row, $pt, (), $xlsx:DELIM-COMMA-LINE))
68+
)
69+
70+
return $json
71+
};
72+
73+
(:
74+
We ingest the spreadsheet and convert it to a JSON.
75+
Expected
76+
:)
77+
declare function xlsxm:transform(
78+
$content as map:map,
79+
$context as map:map
80+
) as map:map* {
81+
let $excelURI := map:get($content, "uri")
82+
let $excelDoc := map:get($content, "value")
83+
let $docName := substring-before(substring-after($excelURI,"/xmi2es/excel-mapper/"), ".xlsx")
84+
let $jsonURI := concat("/xmi2es/excel-mapper/", $docName, ".json")
85+
86+
(: convert Excel to XMI :)
87+
let $problems := pt:init()
88+
let $jsonDoc := xlsxm:convert($excelDoc, $problems)
89+
90+
(: Return original content, transformed content, problems during conversion:)
91+
return ($content,
92+
map:new((
93+
map:entry("uri", concat("/xmi2es/excel-mapper/findings/", $docName, ".xml")),
94+
map:entry("value", pt:dumpProblems($problems)))),
95+
map:new((
96+
map:entry("uri", $jsonURI),
97+
map:entry("value",xdmp:to-json($jsonDoc)))))
98+
};

0 commit comments

Comments
 (0)