Skip to content

Commit 3a96df4

Browse files
committed
#8986 different behaviors based on type of meta
1 parent 6ac7e0b commit 3a96df4

File tree

3 files changed

+204
-31
lines changed

3 files changed

+204
-31
lines changed

src/Text/Pandoc/Readers/Docx.hs

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -169,36 +169,54 @@ spansToKeep = []
169169
divsToKeep :: [ParaStyleName]
170170
divsToKeep = ["Definition", "Definition Term"]
171171

172+
extractFirst :: (a -> Bool) -> [a] -> (Maybe a, [a])
173+
extractFirst _ [] = (Nothing, [])
174+
extractFirst pred (x:xs)
175+
| pred x = (Just x, xs)
176+
| otherwise = let (found, rest) = extractFirst pred xs
177+
in (found, x : rest)
178+
179+
multiMetaStyles :: M.Map ParaStyleName T.Text
180+
multiMetaStyles = M.fromList [ ("Author", "author") ]
181+
182+
-- | Meta Styles where just the first single instance is kept.
183+
singleMetaStyles :: M.Map ParaStyleName T.Text
184+
singleMetaStyles = M.fromList [ ("Title", "title")
185+
, ("Subtitle", "subtitle")
186+
, ("Date", "date")
187+
, ("Abstract", "abstract")]
188+
172189
metaStyles :: M.Map ParaStyleName T.Text
173-
metaStyles = M.fromList [ ("Title", "title")
174-
, ("Subtitle", "subtitle")
175-
, ("Author", "author")
176-
, ("Date", "date")
177-
, ("Abstract", "abstract")]
190+
metaStyles = M.union singleMetaStyles multiMetaStyles
178191

179192
sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
180-
sepBodyParts bps = (metaWithoutEmpty, nonMetaFirst ++ emptyPars ++ nonMetaLast)
193+
sepBodyParts bps = (multiMetas ++ singleMetas, rest)
181194
where
182-
(nonMetaFirst, rest) = break isMetaOrEmpty bps
183-
(meta, nonMetaLast) = span isMetaOrEmpty rest
184-
isMetaOrEmpty bp = isMetaPar bp || isEmptyPar bp
195+
-- extract all metas from bps only based on metaStyles
196+
(multiMetas, restWithoutMulti) = partition isMultiMetaPar bps
185197

186-
(metaWithoutEmpty, emptyPars) = partition (not . isEmptyPar) meta
198+
-- extract the first of every in singleMetaStyles and add to singleMetas, remaining elements to rest
199+
(singleMetas, rest) = foldr extractSingle ([], restWithoutMulti) (M.keys singleMetaStyles)
187200

188-
isMetaPar :: BodyPart -> Bool
189-
isMetaPar (Paragraph pPr _) =
190-
not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles)
191-
isMetaPar _ = False
201+
extractSingle :: ParaStyleName -> ([BodyPart], [BodyPart]) -> ([BodyPart], [BodyPart])
202+
extractSingle styleName (accSingleMetas, remainingBPs) =
203+
let (found, rest) = extractFirst (isSingleMetaPar styleName) remainingBPs
204+
in (maybeToList found ++ accSingleMetas, rest)
192205

193-
isEmptyPar :: BodyPart -> Bool
194-
isEmptyPar (Paragraph _ parParts) =
195-
all isEmptyParPart parParts
196-
where
197-
isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
198-
isEmptyParPart _ = False
199-
isEmptyElem (TextRun s) = trim s == ""
200-
isEmptyElem _ = True
201-
isEmptyPar _ = False
206+
maybeToList :: Maybe a -> [a]
207+
maybeToList Nothing = []
208+
maybeToList (Just x) = [x]
209+
210+
isSingleMetaPar :: ParaStyleName -> BodyPart -> Bool
211+
isSingleMetaPar styleName (Paragraph pPr _) =
212+
styleName `elem` getStyleNames (pStyle pPr)
213+
isSingleMetaPar _ _ = False
214+
215+
216+
isMultiMetaPar :: BodyPart -> Bool
217+
isMultiMetaPar (Paragraph pPr _) =
218+
not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys multiMetaStyles)
219+
isMultiMetaPar _ = False
202220

203221
bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue)
204222
bodyPartsToMeta' [] = return M.empty

test/Tests/Readers/Docx.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ tests = [ testGroup "document"
507507
"docx/metadata.docx"
508508
"docx/metadata.native"
509509
, testCompareWithOpts def{readerStandalone=True}
510-
"stop recording metadata with normal text"
510+
"recording metadata after normal text only if author"
511511
"docx/metadata_after_normal.docx"
512512
"docx/metadata_after_normal.native"
513513
]
Lines changed: 162 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,162 @@
1-
Pandoc (Meta {unMeta = fromList [("abstract",MetaInlines [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]),("author",MetaList [MetaInlines [Str "Mary",Space,Str "Ann",Space,Str "Evans"],MetaInlines [Str "Aurore",Space,Str "Dupin"]]),("date",MetaInlines [Str "July",Space,Str "28,",Space,Str "2014"]),("title",MetaInlines [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"])]})
2-
[Para [Str "And",Space,Str "now",Space,Str "this",Space,Str "is",Space,Str "normal",Space,Str "text."]
3-
,Para [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"]
4-
,Para [Str "Mary",Space,Str "Ann",Space,Str "Evans"]
5-
,Para [Str "Aurore",Space,Str "Dupin"]
6-
,Para [Str "July",Space,Str "28,",Space,Str "2014"]
7-
,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]]
1+
Pandoc
2+
Meta
3+
{ unMeta =
4+
fromList
5+
[ ( "abstract"
6+
, MetaInlines
7+
[ Str "This"
8+
, Space
9+
, Str "is"
10+
, Space
11+
, Str "a"
12+
, Space
13+
, Str "test"
14+
, Space
15+
, Str "of"
16+
, Space
17+
, Str "how"
18+
, Space
19+
, Str "this"
20+
, Space
21+
, Str "all"
22+
, Space
23+
, Str "works."
24+
, Space
25+
, Str "I\8217ve"
26+
, Space
27+
, Str "skipped"
28+
, Space
29+
, Str "lines"
30+
, Space
31+
, Str "here,"
32+
, Space
33+
, Str "which"
34+
, Space
35+
, Str "pandoc"
36+
, Space
37+
, Str "doesn\8217t"
38+
, Space
39+
, Str "do,"
40+
, Space
41+
, Str "but"
42+
, Space
43+
, Str "which"
44+
, Space
45+
, Str "shouldn\8217t"
46+
, Space
47+
, Str "make"
48+
, Space
49+
, Str "a"
50+
, Space
51+
, Str "difference."
52+
]
53+
)
54+
, ( "author"
55+
, MetaList
56+
[ MetaInlines
57+
[ Str "Mary"
58+
, Space
59+
, Str "Ann"
60+
, Space
61+
, Str "Evans"
62+
]
63+
, MetaInlines [ Str "Aurore" , Space , Str "Dupin" ]
64+
, MetaInlines
65+
[ Str "Mary"
66+
, Space
67+
, Str "Ann"
68+
, Space
69+
, Str "Evans"
70+
]
71+
, MetaInlines [ Str "Aurore" , Space , Str "Dupin" ]
72+
]
73+
)
74+
, ( "date"
75+
, MetaInlines
76+
[ Str "July" , Space , Str "28," , Space , Str "2014" ]
77+
)
78+
, ( "title"
79+
, MetaInlines
80+
[ Str "This"
81+
, Space
82+
, Str "Is"
83+
, Space
84+
, Str "the"
85+
, Space
86+
, Str "Title"
87+
]
88+
)
89+
]
90+
}
91+
[ Para
92+
[ Str "And"
93+
, Space
94+
, Str "now"
95+
, Space
96+
, Str "this"
97+
, Space
98+
, Str "is"
99+
, Space
100+
, Str "normal"
101+
, Space
102+
, Str "text."
103+
]
104+
, Para
105+
[ Str "This"
106+
, Space
107+
, Str "Is"
108+
, Space
109+
, Str "the"
110+
, Space
111+
, Str "Title"
112+
]
113+
, Para
114+
[ Str "July" , Space , Str "28," , Space , Str "2014" ]
115+
, Para
116+
[ Str "This"
117+
, Space
118+
, Str "is"
119+
, Space
120+
, Str "a"
121+
, Space
122+
, Str "test"
123+
, Space
124+
, Str "of"
125+
, Space
126+
, Str "how"
127+
, Space
128+
, Str "this"
129+
, Space
130+
, Str "all"
131+
, Space
132+
, Str "works."
133+
, Space
134+
, Str "I\8217ve"
135+
, Space
136+
, Str "skipped"
137+
, Space
138+
, Str "lines"
139+
, Space
140+
, Str "here,"
141+
, Space
142+
, Str "which"
143+
, Space
144+
, Str "pandoc"
145+
, Space
146+
, Str "doesn\8217t"
147+
, Space
148+
, Str "do,"
149+
, Space
150+
, Str "but"
151+
, Space
152+
, Str "which"
153+
, Space
154+
, Str "shouldn\8217t"
155+
, Space
156+
, Str "make"
157+
, Space
158+
, Str "a"
159+
, Space
160+
, Str "difference."
161+
]
162+
]

0 commit comments

Comments
 (0)