diff --git a/docling_core/transforms/serializer/html.py b/docling_core/transforms/serializer/html.py
index 4c18ec3c..c9955217 100644
--- a/docling_core/transforms/serializer/html.py
+++ b/docling_core/transforms/serializer/html.py
@@ -787,7 +787,7 @@ def serialize(
)
# Join all parts without separators
- inline_html = " ".join([p.text for p in parts if p.text])
+ inline_html = "".join([p.text for p in parts if p.text])
# Wrap in span if needed
if inline_html:
diff --git a/docling_core/transforms/serializer/markdown.py b/docling_core/transforms/serializer/markdown.py
index 0702241b..6dc68fe8 100644
--- a/docling_core/transforms/serializer/markdown.py
+++ b/docling_core/transforms/serializer/markdown.py
@@ -670,7 +670,7 @@ def serialize(
visited=my_visited,
**kwargs,
)
- text_res = " ".join([p.text for p in parts if p.text])
+ text_res = "".join([p.text for p in parts if p.text])
return create_ser_result(text=text_res, span_source=parts)
diff --git a/test/data/doc/2408.09869v3_enriched_p2_p3_p5.gt.html b/test/data/doc/2408.09869v3_enriched_p2_p3_p5.gt.html
index 00bf0385..2b12619a 100644
--- a/test/data/doc/2408.09869v3_enriched_p2_p3_p5.gt.html
+++ b/test/data/doc/2408.09869v3_enriched_p2_p3_p5.gt.html
@@ -105,7 +105,7 @@
Can leverage different accelerators (GPU, MPS, etc).
2 Getting Started
-To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling . All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.
+To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling. All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.
Docling provides an easy code interface to convert PDF documents from file system, URLs or binary streams, and retrieve the output in either JSON or Markdown format. For convenience, separate methods are offered to convert single documents or batches of documents. A basic usage example is illustrated below. Further examples are available in the Doclign code repository.
from docling.document_converter import DocumentConverter Large
source = "https://arxiv.org/pdf/2206.01062" # PDF path or URL converter = DocumentConverter() result = converter.convert_single(source) print(result.render_as_markdown()) # output: "## DocLayNet: A Human -Annotated Dataset for Document -Layout Analysis [...]"
diff --git a/test/data/doc/2408.09869v3_enriched_split.gt.html b/test/data/doc/2408.09869v3_enriched_split.gt.html
index 1adaa3d9..cbcb7741 100644
--- a/test/data/doc/2408.09869v3_enriched_split.gt.html
+++ b/test/data/doc/2408.09869v3_enriched_split.gt.html
@@ -127,7 +127,7 @@ 1 Introduction
Can leverage different accelerators (GPU, MPS, etc).
2 Getting Started
-To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling . All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.
+To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling. All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.
Docling provides an easy code interface to convert PDF documents from file system, URLs or binary streams, and retrieve the output in either JSON or Markdown format. For convenience, separate methods are offered to convert single documents or batches of documents. A basic usage example is illustrated below. Further examples are available in the Doclign code repository.
from docling.document_converter import DocumentConverter Large
source = "https://arxiv.org/pdf/2206.01062" # PDF path or URL converter = DocumentConverter() result = converter.convert_single(source) print(result.render_as_markdown()) # output: "## DocLayNet: A Human -Annotated Dataset for Document -Layout Analysis [...]"
diff --git a/test/data/doc/2408.09869v3_enriched_split_p2.gt.html b/test/data/doc/2408.09869v3_enriched_split_p2.gt.html
index 5c43aecb..e4b5f944 100644
--- a/test/data/doc/2408.09869v3_enriched_split_p2.gt.html
+++ b/test/data/doc/2408.09869v3_enriched_split_p2.gt.html
@@ -106,7 +106,7 @@
Can leverage different accelerators (GPU, MPS, etc).
2 Getting Started
-To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling . All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.
+To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling. All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.
Docling provides an easy code interface to convert PDF documents from file system, URLs or binary streams, and retrieve the output in either JSON or Markdown format. For convenience, separate methods are offered to convert single documents or batches of documents. A basic usage example is illustrated below. Further examples are available in the Doclign code repository.
from docling.document_converter import DocumentConverter Large
source = "https://arxiv.org/pdf/2206.01062" # PDF path or URL converter = DocumentConverter() result = converter.convert_single(source) print(result.render_as_markdown()) # output: "## DocLayNet: A Human -Annotated Dataset for Document -Layout Analysis [...]"
diff --git a/test/data/doc/concatenated.html b/test/data/doc/concatenated.html
index 413f009b..d62e9363 100644
--- a/test/data/doc/concatenated.html
+++ b/test/data/doc/concatenated.html
@@ -363,10 +363,10 @@ 1. Introduction
- item 1 of sub list
-
-Here a code snippet:
print("Hello world") (to be displayed inline)
+Here a code snippet:print("Hello world")(to be displayed inline)
-
-Here a formula: (to be displayed inline)
+Here a formula:(to be displayed inline)
@@ -387,7 +387,7 @@ 1. Introduction
-Some formatting chops: bold italic underline strikethrough subscript superscript hyperlink & everything at the same time.
+Some formatting chops:bolditalicunderlinestrikethroughsubscriptsuperscripthyperlink&everything at the same time.
- Item 1 in A
- Item 2 in A
diff --git a/test/data/doc/constructed_doc.embedded.html.gt b/test/data/doc/constructed_doc.embedded.html.gt
index 646da35f..d39ea34b 100644
--- a/test/data/doc/constructed_doc.embedded.html.gt
+++ b/test/data/doc/constructed_doc.embedded.html.gt
@@ -167,10 +167,10 @@ item 2 of neighboring list
- item 1 of sub list
-
-Here a code snippet:
print("Hello world") (to be displayed inline)
+Here a code snippet:print("Hello world")(to be displayed inline)
-
-Here a formula: (to be displayed inline)
+Here a formula:(to be displayed inline)
@@ -191,7 +191,7 @@ item 2 of neighboring list
-Some formatting chops: bold italic underline strikethrough subscript superscript hyperlink & everything at the same time.
+Some formatting chops:bolditalicunderlinestrikethroughsubscriptsuperscripthyperlink&everything at the same time.
- Item 1 in A
- Item 2 in A
diff --git a/test/data/doc/constructed_doc.embedded.md.gt b/test/data/doc/constructed_doc.embedded.md.gt
index 5a4f9440..6c8a34e2 100644
--- a/test/data/doc/constructed_doc.embedded.md.gt
+++ b/test/data/doc/constructed_doc.embedded.md.gt
@@ -44,8 +44,8 @@ This is the caption of figure 2.
- item 1 of neighboring list
- item 2 of neighboring list
- item 1 of sub list
- - Here a code snippet: `print("Hello world")` (to be displayed inline)
- - Here a formula: $E=mc^2$ (to be displayed inline)
+ - Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
- (i) Item 1 in A
- (ii) Item 2 in A
diff --git a/test/data/doc/constructed_doc.placeholder.html.gt b/test/data/doc/constructed_doc.placeholder.html.gt
index 1548ad1f..f7bf7b4c 100644
--- a/test/data/doc/constructed_doc.placeholder.html.gt
+++ b/test/data/doc/constructed_doc.placeholder.html.gt
@@ -167,10 +167,10 @@ item 2 of neighboring list
- item 1 of sub list
-
-Here a code snippet:
print("Hello world") (to be displayed inline)
+Here a code snippet:print("Hello world")(to be displayed inline)
-
-Here a formula: (to be displayed inline)
+Here a formula:(to be displayed inline)
@@ -191,7 +191,7 @@ item 2 of neighboring list
-Some formatting chops: bold italic underline strikethrough subscript superscript hyperlink & everything at the same time.
+Some formatting chops:bolditalicunderlinestrikethroughsubscriptsuperscripthyperlink&everything at the same time.
- Item 1 in A
- Item 2 in A
diff --git a/test/data/doc/constructed_doc.placeholder.md.gt b/test/data/doc/constructed_doc.placeholder.md.gt
index 9b899327..bc4973a8 100644
--- a/test/data/doc/constructed_doc.placeholder.md.gt
+++ b/test/data/doc/constructed_doc.placeholder.md.gt
@@ -44,8 +44,8 @@ This is the caption of figure 2.
- item 1 of neighboring list
- item 2 of neighboring list
- item 1 of sub list
- - Here a code snippet: `print("Hello world")` (to be displayed inline)
- - Here a formula: $E=mc^2$ (to be displayed inline)
+ - Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
- (i) Item 1 in A
- (ii) Item 2 in A
diff --git a/test/data/doc/constructed_doc.referenced.html.gt b/test/data/doc/constructed_doc.referenced.html.gt
index cdc58aa1..c642a973 100644
--- a/test/data/doc/constructed_doc.referenced.html.gt
+++ b/test/data/doc/constructed_doc.referenced.html.gt
@@ -167,10 +167,10 @@ item 2 of neighboring list
- item 1 of sub list
-
-Here a code snippet:
print("Hello world") (to be displayed inline)
+Here a code snippet:print("Hello world")(to be displayed inline)
-
-Here a formula: (to be displayed inline)
+Here a formula:(to be displayed inline)
@@ -191,7 +191,7 @@ item 2 of neighboring list
-Some formatting chops: bold italic underline strikethrough subscript superscript hyperlink & everything at the same time.
+Some formatting chops:bolditalicunderlinestrikethroughsubscriptsuperscripthyperlink&everything at the same time.
- Item 1 in A
- Item 2 in A
diff --git a/test/data/doc/constructed_doc.referenced.md.gt b/test/data/doc/constructed_doc.referenced.md.gt
index 6a30a582..d9806228 100644
--- a/test/data/doc/constructed_doc.referenced.md.gt
+++ b/test/data/doc/constructed_doc.referenced.md.gt
@@ -44,8 +44,8 @@ This is the caption of figure 2.
- item 1 of neighboring list
- item 2 of neighboring list
- item 1 of sub list
- - Here a code snippet: `print("Hello world")` (to be displayed inline)
- - Here a formula: $E=mc^2$ (to be displayed inline)
+ - Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
- (i) Item 1 in A
- (ii) Item 2 in A
diff --git a/test/data/doc/constructed_document.yaml.html b/test/data/doc/constructed_document.yaml.html
index f2741aa2..31c0a973 100644
--- a/test/data/doc/constructed_document.yaml.html
+++ b/test/data/doc/constructed_document.yaml.html
@@ -167,10 +167,10 @@ 1. Introduction
- item 1 of sub list
-
-Here a code snippet:
print("Hello world") (to be displayed inline)
+Here a code snippet:print("Hello world")(to be displayed inline)
-
-Here a formula: (to be displayed inline)
+Here a formula:(to be displayed inline)
@@ -191,7 +191,7 @@ 1. Introduction
-Some formatting chops: bold italic underline strikethrough subscript superscript hyperlink & everything at the same time.
+Some formatting chops:bolditalicunderlinestrikethroughsubscriptsuperscripthyperlink&everything at the same time.
- Item 1 in A
- Item 2 in A
diff --git a/test/data/doc/constructed_document.yaml.md b/test/data/doc/constructed_document.yaml.md
index ea562f91..5d86bd2b 100644
--- a/test/data/doc/constructed_document.yaml.md
+++ b/test/data/doc/constructed_document.yaml.md
@@ -44,8 +44,8 @@ This is the caption of figure 2.
- item 1 of neighboring list
- item 2 of neighboring list
- item 1 of sub list
- - Here a code snippet: `print("Hello world")` (to be displayed inline)
- - Here a formula: $E=mc^2$ (to be displayed inline)
+ - Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
- (i) Item 1 in A
- (ii) Item 2 in A
diff --git a/test/data/doc/constructed_legacy_annot_mark_false.gt.md b/test/data/doc/constructed_legacy_annot_mark_false.gt.md
index f0f3c469..b8850b8a 100644
--- a/test/data/doc/constructed_legacy_annot_mark_false.gt.md
+++ b/test/data/doc/constructed_legacy_annot_mark_false.gt.md
@@ -46,8 +46,8 @@ This is the caption of figure 2.
- item 1 of neighboring list
- item 2 of neighboring list
- item 1 of sub list
- - Here a code snippet: `print("Hello world")` (to be displayed inline)
- - Here a formula: $E=mc^2$ (to be displayed inline)
+ - Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -63,7 +63,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
- (i) Item 1 in A
- (ii) Item 2 in A
diff --git a/test/data/doc/constructed_legacy_annot_mark_true.gt.md b/test/data/doc/constructed_legacy_annot_mark_true.gt.md
index 0382ae17..f0a91e37 100644
--- a/test/data/doc/constructed_legacy_annot_mark_true.gt.md
+++ b/test/data/doc/constructed_legacy_annot_mark_true.gt.md
@@ -46,8 +46,8 @@ This is the caption of figure 2.
- item 1 of neighboring list
- item 2 of neighboring list
- item 1 of sub list
- - Here a code snippet: `print("Hello world")` (to be displayed inline)
- - Here a formula: $E=mc^2$ (to be displayed inline)
+ - Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -63,7 +63,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
- (i) Item 1 in A
- (ii) Item 2 in A
diff --git a/test/data/doc/constructed_mode_always_valid_false.gt.md b/test/data/doc/constructed_mode_always_valid_false.gt.md
index 3121af34..cba2527d 100644
--- a/test/data/doc/constructed_mode_always_valid_false.gt.md
+++ b/test/data/doc/constructed_mode_always_valid_false.gt.md
@@ -44,8 +44,8 @@ item 2 of list after empty list
■ item 1 of neighboring list
■ item 2 of neighboring list
□ item 1 of sub list
- □ Here a code snippet: `print("Hello world")` (to be displayed inline)
- □ Here a formula: $E=mc^2$ (to be displayed inline)
+ □ Here a code snippet:`print("Hello world")`(to be displayed inline)
+ □ Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
(i) Item 1 in A
(ii) Item 2 in A
diff --git a/test/data/doc/constructed_mode_always_valid_true.gt.md b/test/data/doc/constructed_mode_always_valid_true.gt.md
index 630d5458..b0fab96d 100644
--- a/test/data/doc/constructed_mode_always_valid_true.gt.md
+++ b/test/data/doc/constructed_mode_always_valid_true.gt.md
@@ -44,8 +44,8 @@ This is the caption of figure 2.
- ■ item 1 of neighboring list
- ■ item 2 of neighboring list
- □ item 1 of sub list
- - □ Here a code snippet: `print("Hello world")` (to be displayed inline)
- - □ Here a formula: $E=mc^2$ (to be displayed inline)
+ - □ Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - □ Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
1. (i) Item 1 in A
2. (ii) Item 2 in A
diff --git a/test/data/doc/constructed_mode_auto_valid_false.gt.md b/test/data/doc/constructed_mode_auto_valid_false.gt.md
index 585e10b8..2e7d5278 100644
--- a/test/data/doc/constructed_mode_auto_valid_false.gt.md
+++ b/test/data/doc/constructed_mode_auto_valid_false.gt.md
@@ -44,8 +44,8 @@ item 2 of list after empty list
item 1 of neighboring list
item 2 of neighboring list
item 1 of sub list
- Here a code snippet: `print("Hello world")` (to be displayed inline)
- Here a formula: $E=mc^2$ (to be displayed inline)
+ Here a code snippet:`print("Hello world")`(to be displayed inline)
+ Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
(i) Item 1 in A
(ii) Item 2 in A
diff --git a/test/data/doc/constructed_mode_auto_valid_true.gt.md b/test/data/doc/constructed_mode_auto_valid_true.gt.md
index ea562f91..5d86bd2b 100644
--- a/test/data/doc/constructed_mode_auto_valid_true.gt.md
+++ b/test/data/doc/constructed_mode_auto_valid_true.gt.md
@@ -44,8 +44,8 @@ This is the caption of figure 2.
- item 1 of neighboring list
- item 2 of neighboring list
- item 1 of sub list
- - Here a code snippet: `print("Hello world")` (to be displayed inline)
- - Here a formula: $E=mc^2$ (to be displayed inline)
+ - Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
- (i) Item 1 in A
- (ii) Item 2 in A
diff --git a/test/data/doc/constructed_mode_never_valid_false.gt.md b/test/data/doc/constructed_mode_never_valid_false.gt.md
index 1a065857..1e69125b 100644
--- a/test/data/doc/constructed_mode_never_valid_false.gt.md
+++ b/test/data/doc/constructed_mode_never_valid_false.gt.md
@@ -44,8 +44,8 @@ item 2 of list after empty list
item 1 of neighboring list
item 2 of neighboring list
item 1 of sub list
- Here a code snippet: `print("Hello world")` (to be displayed inline)
- Here a formula: $E=mc^2$ (to be displayed inline)
+ Here a code snippet:`print("Hello world")`(to be displayed inline)
+ Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
Item 1 in A
Item 2 in A
diff --git a/test/data/doc/constructed_mode_never_valid_true.gt.md b/test/data/doc/constructed_mode_never_valid_true.gt.md
index c42f4e83..c6d5ad49 100644
--- a/test/data/doc/constructed_mode_never_valid_true.gt.md
+++ b/test/data/doc/constructed_mode_never_valid_true.gt.md
@@ -44,8 +44,8 @@ This is the caption of figure 2.
- item 1 of neighboring list
- item 2 of neighboring list
- item 1 of sub list
- - Here a code snippet: `print("Hello world")` (to be displayed inline)
- - Here a formula: $E=mc^2$ (to be displayed inline)
+ - Here a code snippet:`print("Hello world")`(to be displayed inline)
+ - Here a formula:$E=mc^2$(to be displayed inline)
Here a code block:
@@ -61,7 +61,7 @@ $$E=mc^2$$
-Some formatting chops: **bold** *italic* underline ~~strikethrough~~ subscript superscript [hyperlink](.) & [~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
+Some formatting chops:**bold***italic*underline~~strikethrough~~subscriptsuperscript[hyperlink](.)&[~~***everything at the same time.***~~](https://github.com/DS4SD/docling)
1. Item 1 in A
2. Item 2 in A
diff --git a/test/data/doc/constructed_orig_false.gt.html b/test/data/doc/constructed_orig_false.gt.html
index e3f6f7a5..1955c13f 100644
--- a/test/data/doc/constructed_orig_false.gt.html
+++ b/test/data/doc/constructed_orig_false.gt.html
@@ -167,10 +167,10 @@ 1. Introduction
- item 1 of sub list
-
-Here a code snippet:
print("Hello world") (to be displayed inline)
+Here a code snippet:print("Hello world")(to be displayed inline)
-
-Here a formula: (to be displayed inline)
+Here a formula:(to be displayed inline)
@@ -191,7 +191,7 @@ 1. Introduction
-Some formatting chops: bold italic underline strikethrough subscript superscript hyperlink & everything at the same time.
+Some formatting chops:bolditalicunderlinestrikethroughsubscriptsuperscripthyperlink&everything at the same time.
- Item 1 in A
- Item 2 in A
diff --git a/test/data/doc/constructed_orig_true.gt.html b/test/data/doc/constructed_orig_true.gt.html
index f2741aa2..31c0a973 100644
--- a/test/data/doc/constructed_orig_true.gt.html
+++ b/test/data/doc/constructed_orig_true.gt.html
@@ -167,10 +167,10 @@ 1. Introduction
- item 1 of sub list
-
-Here a code snippet:
print("Hello world") (to be displayed inline)
+Here a code snippet:print("Hello world")(to be displayed inline)
-
-Here a formula: (to be displayed inline)
+Here a formula:(to be displayed inline)
@@ -191,7 +191,7 @@ 1. Introduction
-Some formatting chops: bold italic underline strikethrough subscript superscript hyperlink & everything at the same time.
+Some formatting chops:bolditalicunderlinestrikethroughsubscriptsuperscripthyperlink&everything at the same time.
- Item 1 in A
- Item 2 in A
diff --git a/test/data/doc/inline_and_formatting.gt.html b/test/data/doc/inline_and_formatting.gt.html
index 2165b1be..c801af75 100644
--- a/test/data/doc/inline_and_formatting.gt.html
+++ b/test/data/doc/inline_and_formatting.gt.html
@@ -126,31 +126,31 @@
Contribution guideline example
This is simple.
-
Foo emphasis strong emphasis both .
-
Create your feature branch: git checkout -b feature/AmazingFeature .
+
Fooemphasisstrong emphasisboth.
+
Create your feature branch:git checkout -b feature/AmazingFeature.
-
-Pull the repository .
+Pull therepository.
-
-Create your feature branch (
git checkout -b feature/AmazingFeature )
+Create your feature branch (git checkout -b feature/AmazingFeature)
-
-Commit your changes (
git commit -m 'Add some AmazingFeature' )
+Commit your changes (git commit -m 'Add some AmazingFeature')
-
-Push to the branch (
git push origin feature/AmazingFeature )
+Push to the branch (git push origin feature/AmazingFeature)
- Open a Pull Request
- Whole list item has same formatting
-
-List item has mixed or partial formatting
+List item hasmixed or partialformatting
Whole heading is italic
-
Some formatted_code
-
Partially formatted heading to_escape not_to_escape & ampersand
-
A hyperlink on code in a line
+
Someformatted_code
+
Partially formattedheading to_escapenot_to_escape& ampersand
+
A hyperlink oncode in a line
A hyperlink on code as paragraph
The end.
diff --git a/test/data/doc/inline_and_formatting.gt.md b/test/data/doc/inline_and_formatting.gt.md
index 0455b064..cac10221 100644
--- a/test/data/doc/inline_and_formatting.gt.md
+++ b/test/data/doc/inline_and_formatting.gt.md
@@ -2,25 +2,25 @@
This is simple.
-Foo *emphasis* **strong emphasis** ***both*** .
+Foo*emphasis***strong emphasis*****both***.
-Create your feature branch: `git checkout -b feature/AmazingFeature` .
+Create your feature branch:`git checkout -b feature/AmazingFeature`.
-1. Pull the [**repository**](https://github.com/docling-project/docling) .
-2. Create your feature branch ( `git checkout -b feature/AmazingFeature` )
-3. Commit your changes ( `git commit -m 'Add some AmazingFeature'` )
-4. Push to the branch ( `git push origin feature/AmazingFeature` )
+1. Pull the[**repository**](https://github.com/docling-project/docling).
+2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
+3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
+4. Push to the branch (`git push origin feature/AmazingFeature`)
5. Open a Pull Request
6. **Whole list item has same formatting**
-7. List item has *mixed or partial* formatting
+7. List item has*mixed or partial*formatting
# *Whole heading is italic*
-Some *`formatted_code`*
+Some*`formatted_code`*
-## *Partially formatted* heading to\_escape `not_to_escape` [$E=mc^2$](https://en.wikipedia.org/wiki/Albert_Einstein) & ampersand
+## *Partially formatted*heading to\_escape`not_to_escape`[$E=mc^2$](https://en.wikipedia.org/wiki/Albert_Einstein)& ampersand
-A hyperlink on [`code in a line`](#link)
+A hyperlink on[`code in a line`](#link)
[`A hyperlink on code as paragraph`](#test)
diff --git a/test/data/doc/polymers.gt.html b/test/data/doc/polymers.gt.html
index d70699e9..cf274a98 100644
--- a/test/data/doc/polymers.gt.html
+++ b/test/data/doc/polymers.gt.html
@@ -188,10 +188,10 @@ Safety and Regulatory Considerations
Extraction in food simulants
-
-What it is : Samples of the packaging material are immersed in a liquid that mimics the chemical properties of a specific food type (e.g., aqueous, acidic, fatty).
+What it is: Samples of the packaging material are immersed in a liquid that mimics the chemical properties of a specific food type (e.g., aqueous, acidic, fatty).
-
-Typical simulants :
+Typical simulants:
- 3% acetic acid (for acidic foods)
- 50% ethanol (for alcohol‑based foods)
@@ -200,7 +200,7 @@ Safety and Regulatory Considerations
-
-Procedure :
+Procedure:
- Prepare a defined volume of simulant in a sealed vessel.
- Immerse the material for a set time at a controlled temperature (often 50 °C–70 °C).
@@ -208,13 +208,13 @@ Safety and Regulatory Considerations
-
-Analysis : GC‑MS, LC‑MS, or HPLC depending on the analyte class.
+Analysis: GC‑MS, LC‑MS, or HPLC depending on the analyte class.
-
-Advantages : Direct assessment of potential migration into a realistic medium; scalable for routine testing.
+Advantages: Direct assessment of potential migration into a realistic medium; scalable for routine testing.
-
-Limitations : Does not account for headspace gas migration; may underestimate migration of highly volatile substances.
+Limitations: Does not account for headspace gas migration; may underestimate migration of highly volatile substances.
@@ -222,10 +222,10 @@ Safety and Regulatory Considerations
Headspace analysis
-
-What it is : Measurement of volatile substances that migrate from the material into the surrounding gas phase.
+What it is: Measurement of volatile substances that migrate from the material into the surrounding gas phase.
-
-Procedure :
+Procedure:
- Seal the material in a headspace vial or chamber.
- Equilibrate at a defined temperature (commonly 25 °C–60 °C).
@@ -234,13 +234,13 @@ Safety and Regulatory Considerations
-
-Applications : Assessment of aromas, flavor compounds, or volatile contaminants.
+Applications: Assessment of aromas, flavor compounds, or volatile contaminants.
-
-Advantages : Sensitive to low‑concentration volatiles; minimal sample preparation.
+Advantages: Sensitive to low‑concentration volatiles; minimal sample preparation.
-
-Limitations : Does not capture non‑volatile migration; results depend on equilibrium time and temperature.
+Limitations: Does not capture non‑volatile migration; results depend on equilibrium time and temperature.
@@ -248,10 +248,10 @@ Safety and Regulatory Considerations
Direct contact tests
-
-What it is : The packaging material is placed in direct contact with the food or food simulant, often using a defined food‑packaging configuration.
+What it is: The packaging material is placed in direct contact with the food or food simulant, often using a defined food‑packaging configuration.
-
-Procedure :
+Procedure:
- Assemble the material and food (or simulant) in a mold or container that simulates real usage (e.g., sealed pouch, jar).
- Incubate for the intended storage time at the relevant temperature.
@@ -260,10 +260,10 @@ Safety and Regulatory Considerations
-
-Advantages : Mimics real consumer exposure; captures both liquid and vapor migration pathways.
+Advantages: Mimics real consumer exposure; captures both liquid and vapor migration pathways.
-
-Limitations : More labor‑intensive; requires careful control of contact area, thickness, and sealing integrity.
+Limitations: More labor‑intensive; requires careful control of contact area, thickness, and sealing integrity.
diff --git a/test/data/doc/polymers.gt.md b/test/data/doc/polymers.gt.md
index e44bcb8d..3bc75356 100644
--- a/test/data/doc/polymers.gt.md
+++ b/test/data/doc/polymers.gt.md
@@ -50,38 +50,38 @@
**Common migration testing methods**
- **Extraction in food simulants**
- - *What it is* : Samples of the packaging material are immersed in a liquid that mimics the chemical properties of a specific food type (e.g., aqueous, acidic, fatty).
- - *Typical simulants* :
+ - *What it is*: Samples of the packaging material are immersed in a liquid that mimics the chemical properties of a specific food type (e.g., aqueous, acidic, fatty).
+ - *Typical simulants*:
- 3% acetic acid (for acidic foods)
- 50% ethanol (for alcohol‑based foods)
- 95% ethanol (for high‑fat foods)
- Distilled water (for aqueous foods)
- - *Procedure* :
+ - *Procedure*:
- Prepare a defined volume of simulant in a sealed vessel.
- Immerse the material for a set time at a controlled temperature (often 50 °C–70 °C).
- Remove, filter, and concentrate the extract for analysis.
- - *Analysis* : GC‑MS, LC‑MS, or HPLC depending on the analyte class.
- - *Advantages* : Direct assessment of potential migration into a realistic medium; scalable for routine testing.
- - *Limitations* : Does not account for headspace gas migration; may underestimate migration of highly volatile substances.
+ - *Analysis*: GC‑MS, LC‑MS, or HPLC depending on the analyte class.
+ - *Advantages*: Direct assessment of potential migration into a realistic medium; scalable for routine testing.
+ - *Limitations*: Does not account for headspace gas migration; may underestimate migration of highly volatile substances.
- **Headspace analysis**
- - *What it is* : Measurement of volatile substances that migrate from the material into the surrounding gas phase.
- - *Procedure* :
+ - *What it is*: Measurement of volatile substances that migrate from the material into the surrounding gas phase.
+ - *Procedure*:
- Seal the material in a headspace vial or chamber.
- Equilibrate at a defined temperature (commonly 25 °C–60 °C).
- Sample the gas phase with a gas sampling needle or syringe.
- Analyze via GC‑FID, GC‑MS, or PTR‑MS.
- - *Applications* : Assessment of aromas, flavor compounds, or volatile contaminants.
- - *Advantages* : Sensitive to low‑concentration volatiles; minimal sample preparation.
- - *Limitations* : Does not capture non‑volatile migration; results depend on equilibrium time and temperature.
+ - *Applications*: Assessment of aromas, flavor compounds, or volatile contaminants.
+ - *Advantages*: Sensitive to low‑concentration volatiles; minimal sample preparation.
+ - *Limitations*: Does not capture non‑volatile migration; results depend on equilibrium time and temperature.
- **Direct contact tests**
- - *What it is* : The packaging material is placed in direct contact with the food or food simulant, often using a defined food‑packaging configuration.
- - *Procedure* :
+ - *What it is*: The packaging material is placed in direct contact with the food or food simulant, often using a defined food‑packaging configuration.
+ - *Procedure*:
- Assemble the material and food (or simulant) in a mold or container that simulates real usage (e.g., sealed pouch, jar).
- Incubate for the intended storage time at the relevant temperature.
- Extract or sample the food directly (e.g., through the material or by taking a portion of the food).
- Analyze for migrated substances.
- - *Advantages* : Mimics real consumer exposure; captures both liquid and vapor migration pathways.
- - *Limitations* : More labor‑intensive; requires careful control of contact area, thickness, and sealing integrity.
+ - *Advantages*: Mimics real consumer exposure; captures both liquid and vapor migration pathways.
+ - *Limitations*: More labor‑intensive; requires careful control of contact area, thickness, and sealing integrity.
These three approaches—extraction in food simulants, headspace analysis, and direct contact tests—complement each other to provide a comprehensive assessment of potential migration from packaging into food.
diff --git a/test/test_serialization.py b/test/test_serialization.py
index a783c410..5eae245d 100644
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@@ -25,6 +25,7 @@
from docling_core.types.doc.document import (
DescriptionAnnotation,
DoclingDocument,
+ Formatting,
TableCell,
TableData,
)
@@ -650,3 +651,150 @@ def test_idoctags_meta():
ser = IDocTagsDocSerializer(doc=doc)
actual = ser.serialize().text
verify(exp_file=src.with_suffix(".gt.idt.xml"), actual=actual)
+
+
+# ===============================
+# Tests for inline group join behavior without spaces
+# ===============================
+
+
+def test_md_inline_group_no_spaces():
+ """Test that inline groups join text parts without spaces for continuous text."""
+ doc = DoclingDocument(name="test")
+
+ # Create an inline group with multiple text items that should be joined without spaces
+ # Simulating the case where "Docling" is split into "D" (bold) and "ocling" (normal)
+ group = doc.add_inline_group()
+ doc.add_text(
+ label="text",
+ parent=group,
+ text="D",
+ formatting=Formatting(bold=True, italic=False, underline=False, strikethrough=False, script="baseline")
+ )
+ doc.add_text(
+ label="text",
+ parent=group,
+ text="ocling",
+ formatting=Formatting(bold=False, italic=False, underline=False, strikethrough=False, script="baseline")
+ )
+
+ # This should serialize as "**D**ocling" without space
+ ser = MarkdownDocSerializer(doc=doc)
+ actual = ser.serialize().text.strip()
+
+ expected = "**D**ocling"
+ assert actual == expected
+
+
+def test_html_inline_group_no_spaces():
+ """Test that inline groups join text parts without spaces for continuous text."""
+ doc = DoclingDocument(name="test")
+
+ # Create an inline group with multiple text items that should be joined without spaces
+ group = doc.add_inline_group()
+ doc.add_text(
+ label="text",
+ parent=group,
+ text="Project",
+ formatting=Formatting(bold=True, italic=False, underline=False, strikethrough=False, script="baseline")
+ )
+ doc.add_text(
+ label="text",
+ parent=group,
+ text="ing",
+ formatting=Formatting(bold=False, italic=False, underline=False, strikethrough=False, script="baseline")
+ )
+
+ # This should serialize as Projecting without space
+ ser = HTMLDocSerializer(doc=doc, params=HTMLParams(html_head="", prettify=False))
+ actual = ser.serialize().text
+
+ # Extract the body content between and
+ start = actual.find("") + 6
+ end = actual.find("")
+ body_content = actual[start:end].strip()
+
+ # Check that the span contains the expected content
+ assert 'Projecting' in body_content
+
+
+
+
+def test_md_inline_group_mixed_formatting_mid_word():
+ """Test inline group with different formatting mid-word."""
+ doc = DoclingDocument(name="test")
+
+ # Simulate "Parsing" with "Pars" normal and "ing" italic
+ group = doc.add_inline_group()
+ doc.add_text(label="text", parent=group, text="Pars")
+ doc.add_text(
+ label="text",
+ parent=group,
+ text="ing",
+ formatting=Formatting(bold=False, italic=True, underline=False, strikethrough=False, script="baseline")
+ )
+
+ ser = MarkdownDocSerializer(doc=doc)
+ actual = ser.serialize().text.strip()
+
+ expected = "Pars*ing*"
+ assert actual == expected
+
+
+def test_html_inline_group_mixed_formatting_mid_word():
+ """Test inline group with different formatting mid-word."""
+ doc = DoclingDocument(name="test")
+
+ # Simulate "Parsing" with "Pars" normal and "ing" italic
+ group = doc.add_inline_group()
+ doc.add_text(label="text", parent=group, text="Pars")
+ doc.add_text(
+ label="text",
+ parent=group,
+ text="ing",
+ formatting=Formatting(bold=False, italic=True, underline=False, strikethrough=False, script="baseline")
+ )
+
+ ser = HTMLDocSerializer(doc=doc, params=HTMLParams(html_head="", prettify=False))
+ actual = ser.serialize().text
+
+ # Extract the body content between and
+ start = actual.find("") + 6
+ end = actual.find("")
+ body_content = actual[start:end].strip()
+
+ # Check that both parts are present without spaces between
+ assert 'Parsing' in body_content.replace('\n', ' ')
+
+
+def test_md_inline_group_single_part():
+ """Test inline group with single text part (no joining needed)."""
+ doc = DoclingDocument(name="test")
+
+ group = doc.add_inline_group()
+ doc.add_text(label="text", parent=group, text="Single")
+
+ ser = MarkdownDocSerializer(doc=doc)
+ actual = ser.serialize().text.strip()
+
+ expected = "Single"
+ assert actual == expected
+
+
+def test_html_inline_group_single_part():
+ """Test inline group with single text part (no joining needed)."""
+ doc = DoclingDocument(name="test")
+
+ group = doc.add_inline_group()
+ doc.add_text(label="text", parent=group, text="Single")
+
+ ser = HTMLDocSerializer(doc=doc, params=HTMLParams(html_head="", prettify=False))
+ actual = ser.serialize().text
+
+ # Extract the body content between and
+ start = actual.find("") + 6
+ end = actual.find("")
+ body_content = actual[start:end].strip()
+
+ # Check that the single part content is present
+ assert 'Single' in body_content