diff --git a/=3.4.0 b/=3.4.0 new file mode 100644 index 000000000..47a848de9 --- /dev/null +++ b/=3.4.0 @@ -0,0 +1,5 @@ +Collecting markdown + Downloading markdown-3.8-py3-none-any.whl.metadata (5.1 kB) +Downloading markdown-3.8-py3-none-any.whl (106 kB) +Installing collected packages: markdown +Successfully installed markdown-3.8 diff --git a/pyproject.toml b/pyproject.toml index a285d2351..a4896c4ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ dependencies = [ "numpy>=2.2.2", "mcp[cli]", "neo4j", + "markdown>=3.4.0", "modal>=0.73.45", "slack-sdk", "lox>=0.12.0", diff --git a/src/codegen/shared/markdown_adf/README.md b/src/codegen/shared/markdown_adf/README.md new file mode 100644 index 000000000..950df9416 --- /dev/null +++ b/src/codegen/shared/markdown_adf/README.md @@ -0,0 +1,259 @@ +# Markdown to ADF Adapter + +This module provides utilities to convert Markdown text to Atlassian Document Format (ADF), which is used by Atlassian products like Jira and Confluence. + +## Overview + +The Atlassian Document Format (ADF) is a JSON-based format that represents rich text content in Atlassian products. This adapter converts standard Markdown syntax to the corresponding ADF structure. + +## Usage + +### Basic Usage + +````python +from codegen.shared.markdown_adf import MarkdownToADFAdapter + +# Create an adapter instance +adapter = MarkdownToADFAdapter() + +# Convert markdown to ADF +markdown_text = """ +# Hello World + +This is a paragraph with **bold** and *italic* text. + +## Code Example + +Here's some Python code: + +```python +def greet(name): + print(f"Hello, {name}!") +```` + +## Lists + +- Item 1 +- Item 2 with `inline code` +- Item 3 + +> This is a blockquote with important information. +> """ + +adf_document = adapter.convert(markdown_text) +print(json.dumps(adf_document, indent=2)) + +```` + +### Output Structure + +The adapter returns an `ADFDocument` which is a dictionary with the following structure: + +```python +{ + "version": 1, + "type": "doc", + "content": [ + # Array of ADF nodes + ] +} +```` + +## Supported Markdown Elements + +### Text Formatting + +| Markdown | ADF Mark Type | Description | +| ------------------- | ------------- | ------------------ | +| `**bold**` | `strong` | Bold text | +| `*italic*` | `em` | Italic text | +| `` `code` `` | `code` | Inline code | +| `[link](url)` | `link` | Hyperlinks | +| `~~strikethrough~~` | `strike` | Strikethrough text | + +### Block Elements + +| Markdown | ADF Node Type | Description | +| ----------- | ------------- | ---------------------------------- | +| `# Heading` | `heading` | Headings (H1-H6) | +| Paragraphs | `paragraph` | Regular paragraphs | +| `code` | `codeBlock` | Code blocks with optional language | +| `- item` | `bulletList` | Bullet lists | +| `1. item` | `orderedList` | Numbered lists | +| `> quote` | `blockquote` | Block quotes | +| `---` | `rule` | Horizontal rules | + +### Advanced Features + +- **Code blocks with syntax highlighting**: Language detection from fenced code blocks +- **Nested lists**: Support for multi-level lists +- **Mixed formatting**: Combination of multiple inline formats +- **Link handling**: Automatic conversion of markdown links to ADF link marks + +## Examples + +### Simple Text with Formatting + +```python +markdown = "This is **bold** and *italic* text with `inline code`." +adf = adapter.convert(markdown) +``` + +Results in: + +```json +{ + "version": 1, + "type": "doc", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "This is " + }, + { + "type": "text", + "text": "bold", + "marks": [ + { + "type": "strong" + } + ] + }, + { + "type": "text", + "text": " and " + }, + { + "type": "text", + "text": "italic", + "marks": [ + { + "type": "em" + } + ] + }, + { + "type": "text", + "text": " text with " + }, + { + "type": "text", + "text": "inline code", + "marks": [ + { + "type": "code" + } + ] + }, + { + "type": "text", + "text": "." + } + ] + } + ] +} +``` + +### Code Block with Language + +````python +markdown = """```python +def hello(): + print("Hello, world!") +```""" +adf = adapter.convert(markdown) +```` + +Results in: + +```json +{ + "version": 1, + "type": "doc", + "content": [ + { + "type": "codeBlock", + "attrs": { + "language": "python" + }, + "content": [ + { + "type": "text", + "text": "def hello():\n print(\"Hello, world!\")" + } + ] + } + ] +} +``` + +### Lists + +```python +markdown = """ +- First item +- Second item with **bold** text +- Third item +""" +adf = adapter.convert(markdown) +``` + +Results in a bullet list with properly formatted list items. + +## Error Handling + +The adapter is designed to be robust and handle malformed markdown gracefully: + +- **Invalid HTML**: Falls back to creating a simple paragraph with the original text +- **Empty input**: Creates an empty paragraph +- **Unsupported elements**: Extracts text content and wraps in paragraphs +- **Malformed markdown**: Processes what it can and creates valid ADF structure + +## Type Safety + +The module includes comprehensive TypeScript-style type definitions: + +- `ADFDocument`: The root document structure +- `ADFNode`: Base node type with all possible properties +- `ADFMark`: Inline formatting marks +- Specific node types: `ADFTextNode`, `ADFParagraphNode`, `ADFHeadingNode`, etc. + +## Dependencies + +- `markdown`: Python markdown parser +- `typing`: Type hints support + +## Testing + +The module includes comprehensive tests covering: + +- Basic text conversion +- All supported markdown elements +- Complex nested structures +- Error handling scenarios +- Edge cases and malformed input + +Run tests with: + +```bash +pytest tests/shared/test_markdown_adf_adapter.py +``` + +## Limitations + +- **Tables**: Not yet implemented (markdown tables are complex to convert to ADF) +- **Images**: Not implemented (requires media handling) +- **Custom HTML**: Raw HTML in markdown is not processed +- **Advanced ADF features**: Some ADF-specific features like panels, mentions, etc. are not supported + +## Future Enhancements + +- Table support +- Image and media handling +- Custom ADF node types (panels, mentions, etc.) +- Configuration options for conversion behavior +- Performance optimizations for large documents diff --git a/src/codegen/shared/markdown_adf/__init__.py b/src/codegen/shared/markdown_adf/__init__.py new file mode 100644 index 000000000..380518ae2 --- /dev/null +++ b/src/codegen/shared/markdown_adf/__init__.py @@ -0,0 +1,10 @@ +"""Markdown to ADF (Atlassian Document Format) Adapter + +This module provides utilities to convert Markdown text to Atlassian Document Format (ADF), +which is used by Atlassian products like Jira and Confluence. +""" + +from .adapter import MarkdownToADFAdapter +from .adf_types import ADFDocument, ADFMark, ADFNode + +__all__ = ["ADFDocument", "ADFMark", "ADFNode", "MarkdownToADFAdapter"] diff --git a/src/codegen/shared/markdown_adf/adapter.py b/src/codegen/shared/markdown_adf/adapter.py new file mode 100644 index 000000000..168d72105 --- /dev/null +++ b/src/codegen/shared/markdown_adf/adapter.py @@ -0,0 +1,337 @@ +"""Markdown to ADF (Atlassian Document Format) Adapter + +This module provides the main adapter class for converting Markdown text to ADF format. +""" + +import re +from typing import Optional +from xml.etree.ElementTree import Element + +from markdown import Markdown + +try: + from .adf_types import ( + ADFCodeBlockNode, + ADFDocument, + ADFHeadingNode, + ADFListItemNode, + ADFListNode, + ADFMark, + ADFMarkType, + ADFNode, + ADFNodeType, + ADFParagraphNode, + ADFTextNode, + ) +except ImportError: + # Fallback for direct execution + from adf_types import ( + ADFCodeBlockNode, + ADFDocument, + ADFHeadingNode, + ADFListItemNode, + ADFListNode, + ADFMark, + ADFMarkType, + ADFNode, + ADFNodeType, + ADFParagraphNode, + ADFTextNode, + ) + + +class MarkdownToADFAdapter: + r"""Converts Markdown text to Atlassian Document Format (ADF). + + This adapter parses Markdown using Python's markdown library and converts + the resulting HTML/XML tree to ADF JSON structure. + + Example: + adapter = MarkdownToADFAdapter() + adf_doc = adapter.convert("# Hello World\\n\\nThis is **bold** text.") + """ + + def __init__(self): + """Initialize the adapter with markdown parser.""" + self.md = Markdown( + extensions=[ + "fenced_code", + "codehilite", + "tables", + "nl2br", + ], + extension_configs={ + "codehilite": { + "use_pygments": False, + "noclasses": True, + } + }, + ) + + def convert(self, markdown_text: str) -> ADFDocument: + """Convert markdown text to ADF document. + + Args: + markdown_text: The markdown text to convert + + Returns: + ADFDocument: The converted ADF document structure + """ + # Parse markdown to HTML/XML tree + html = self.md.convert(markdown_text) + + # Parse the HTML back to XML tree for processing + from xml.etree.ElementTree import fromstring + + # Wrap in a root element to handle multiple top-level elements + wrapped_html = f"{html}" + try: + root = fromstring(wrapped_html) + except Exception as e: + # Fallback for malformed HTML - create a simple paragraph + return self._create_document([self._create_paragraph([self._create_text(markdown_text)])]) + + # Convert XML tree to ADF nodes + content_nodes = [] + for child in root: + node = self._convert_element_to_adf(child) + if node: + content_nodes.append(node) + + # If no content was generated, create a simple paragraph + if not content_nodes: + content_nodes = [self._create_paragraph([self._create_text(markdown_text or "")])] + + return self._create_document(content_nodes) + + def _create_document(self, content: list[ADFNode]) -> ADFDocument: + """Create an ADF document with the given content.""" + return {"version": 1, "type": ADFNodeType.DOC, "content": content} + + def _convert_element_to_adf(self, element: Element) -> Optional[ADFNode]: + """Convert an XML element to an ADF node.""" + tag = element.tag.lower() + + if tag == "p": + return self._convert_paragraph(element) + elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]: + return self._convert_heading(element, int(tag[1])) + elif tag == "pre": + return self._convert_code_block(element) + elif tag == "code" and element.getparent() is not None and element.getparent().tag != "pre": + # Inline code - this should be handled as a mark, not a separate node + return None + elif tag == "ul": + return self._convert_bullet_list(element) + elif tag == "ol": + return self._convert_ordered_list(element) + elif tag == "li": + return self._convert_list_item(element) + elif tag == "blockquote": + return self._convert_blockquote(element) + elif tag == "hr": + return self._create_rule() + elif tag == "br": + return self._create_hard_break() + else: + # For unknown elements, try to extract text content + text_content = self._extract_text_with_marks(element) + if text_content: + return self._create_paragraph(text_content) + return None + + def _convert_paragraph(self, element: Element) -> ADFParagraphNode: + """Convert a paragraph element to ADF paragraph node.""" + content = self._extract_text_with_marks(element) + return self._create_paragraph(content) + + def _convert_heading(self, element: Element, level: int) -> ADFHeadingNode: + """Convert a heading element to ADF heading node.""" + content = self._extract_text_with_marks(element) + return {"type": ADFNodeType.HEADING, "attrs": {"level": level}, "content": content} + + def _convert_code_block(self, element: Element) -> ADFCodeBlockNode: + """Convert a code block element to ADF code block node.""" + # Extract language from class attribute if present + language = None + code_element = element.find(".//code") + if code_element is not None: + class_attr = code_element.get("class", "") + if class_attr: + # Extract language from class like "language-python" or "python" + lang_match = re.search(r"(?:language-)?([a-zA-Z0-9_+-]+)", class_attr) + if lang_match: + language = lang_match.group(1) + + # Get the text content + text_content = element.text or "" + if code_element is not None: + text_content = code_element.text or "" + + # Clean up the text content + text_content = text_content.strip() + + node: ADFCodeBlockNode = {"type": ADFNodeType.CODE_BLOCK, "content": [self._create_text(text_content)]} + + if language: + node["attrs"] = {"language": language} + + return node + + def _convert_bullet_list(self, element: Element) -> ADFListNode: + """Convert a bullet list element to ADF bullet list node.""" + content = [] + for li in element.findall("li"): + list_item = self._convert_list_item(li) + if list_item: + content.append(list_item) + + return {"type": ADFNodeType.BULLET_LIST, "content": content} + + def _convert_ordered_list(self, element: Element) -> ADFListNode: + """Convert an ordered list element to ADF ordered list node.""" + content = [] + for li in element.findall("li"): + list_item = self._convert_list_item(li) + if list_item: + content.append(list_item) + + return {"type": ADFNodeType.ORDERED_LIST, "content": content} + + def _convert_list_item(self, element: Element) -> ADFListItemNode: + """Convert a list item element to ADF list item node.""" + content = [] + + # Process child elements + for child in element: + child_node = self._convert_element_to_adf(child) + if child_node: + content.append(child_node) + + # If no child elements, create a paragraph with the text content + if not content: + text_content = self._extract_text_with_marks(element) + if text_content: + content = [self._create_paragraph(text_content)] + + return {"type": ADFNodeType.LIST_ITEM, "content": content} + + def _convert_blockquote(self, element: Element) -> ADFNode: + """Convert a blockquote element to ADF blockquote node.""" + content = [] + for child in element: + child_node = self._convert_element_to_adf(child) + if child_node: + content.append(child_node) + + # If no child elements, create a paragraph with the text content + if not content: + text_content = self._extract_text_with_marks(element) + if text_content: + content = [self._create_paragraph(text_content)] + + return {"type": ADFNodeType.BLOCKQUOTE, "content": content} + + def _extract_text_with_marks(self, element: Element) -> list[ADFNode]: + """Extract text content with inline formatting marks.""" + result = [] + + # Handle text before first child + if element.text: + result.append(self._create_text(element.text)) + + # Process child elements + for child in element: + if child.tag.lower() in ["strong", "b"]: + # Bold text + child_text = self._get_element_text(child) + if child_text: + result.append(self._create_text(child_text, [self._create_strong_mark()])) + elif child.tag.lower() in ["em", "i"]: + # Italic text + child_text = self._get_element_text(child) + if child_text: + result.append(self._create_text(child_text, [self._create_em_mark()])) + elif child.tag.lower() == "code": + # Inline code + child_text = self._get_element_text(child) + if child_text: + result.append(self._create_text(child_text, [self._create_code_mark()])) + elif child.tag.lower() == "a": + # Link + href = child.get("href", "") + child_text = self._get_element_text(child) + if child_text and href: + result.append(self._create_text(child_text, [self._create_link_mark(href)])) + elif child_text: + result.append(self._create_text(child_text)) + elif child.tag.lower() in ["del", "s", "strike"]: + # Strikethrough + child_text = self._get_element_text(child) + if child_text: + result.append(self._create_text(child_text, [self._create_strike_mark()])) + else: + # For other elements, just extract text + child_text = self._get_element_text(child) + if child_text: + result.append(self._create_text(child_text)) + + # Handle tail text after child element + if child.tail: + result.append(self._create_text(child.tail)) + + # If no content was extracted, create empty text node + if not result: + result = [self._create_text("")] + + return result + + def _get_element_text(self, element: Element) -> str: + """Get all text content from an element and its children.""" + text_parts = [] + if element.text: + text_parts.append(element.text) + for child in element: + text_parts.append(self._get_element_text(child)) + if child.tail: + text_parts.append(child.tail) + return "".join(text_parts) + + def _create_paragraph(self, content: list[ADFNode]) -> ADFParagraphNode: + """Create an ADF paragraph node.""" + return {"type": ADFNodeType.PARAGRAPH, "content": content} + + def _create_text(self, text: str, marks: Optional[list[ADFMark]] = None) -> ADFTextNode: + """Create an ADF text node.""" + node: ADFTextNode = {"type": ADFNodeType.TEXT, "text": text} + if marks: + node["marks"] = marks + return node + + def _create_rule(self) -> ADFNode: + """Create an ADF rule (horizontal line) node.""" + return {"type": ADFNodeType.RULE} + + def _create_hard_break(self) -> ADFNode: + """Create an ADF hard break node.""" + return {"type": ADFNodeType.HARD_BREAK} + + def _create_strong_mark(self) -> ADFMark: + """Create a strong (bold) mark.""" + return {"type": ADFMarkType.STRONG} + + def _create_em_mark(self) -> ADFMark: + """Create an emphasis (italic) mark.""" + return {"type": ADFMarkType.EM} + + def _create_code_mark(self) -> ADFMark: + """Create a code mark.""" + return {"type": ADFMarkType.CODE} + + def _create_link_mark(self, href: str) -> ADFMark: + """Create a link mark.""" + return {"type": ADFMarkType.LINK, "attrs": {"href": href}} + + def _create_strike_mark(self) -> ADFMark: + """Create a strikethrough mark.""" + return {"type": ADFMarkType.STRIKE} diff --git a/src/codegen/shared/markdown_adf/adf_types.py b/src/codegen/shared/markdown_adf/adf_types.py new file mode 100644 index 000000000..7a787a9bc --- /dev/null +++ b/src/codegen/shared/markdown_adf/adf_types.py @@ -0,0 +1,119 @@ +"""Type definitions for Atlassian Document Format (ADF) structures.""" + +from enum import Enum +from typing import Any, Literal, Optional, TypedDict, Union + + +class ADFNodeType(str, Enum): + """ADF node types.""" + + DOC = "doc" + PARAGRAPH = "paragraph" + HEADING = "heading" + TEXT = "text" + HARD_BREAK = "hardBreak" + BULLET_LIST = "bulletList" + ORDERED_LIST = "orderedList" + LIST_ITEM = "listItem" + CODE_BLOCK = "codeBlock" + BLOCKQUOTE = "blockquote" + RULE = "rule" + TABLE = "table" + TABLE_ROW = "tableRow" + TABLE_HEADER = "tableHeader" + TABLE_CELL = "tableCell" + + +class ADFMarkType(str, Enum): + """ADF mark types for inline formatting.""" + + STRONG = "strong" + EM = "em" + CODE = "code" + LINK = "link" + STRIKE = "strike" + UNDERLINE = "underline" + TEXT_COLOR = "textColor" + SUBSUP = "subsup" + + +class ADFMark(TypedDict, total=False): + """ADF mark structure for inline formatting.""" + + type: ADFMarkType + attrs: Optional[dict[str, Any]] + + +class ADFNode(TypedDict, total=False): + """Base ADF node structure.""" + + type: ADFNodeType + content: Optional[list["ADFNode"]] + attrs: Optional[dict[str, Any]] + marks: Optional[list[ADFMark]] + text: Optional[str] + + +class ADFTextNode(ADFNode): + """ADF text node with required text field.""" + + type: Literal[ADFNodeType.TEXT] + text: str + marks: Optional[list[ADFMark]] + + +class ADFParagraphNode(ADFNode): + """ADF paragraph node.""" + + type: Literal[ADFNodeType.PARAGRAPH] + content: list[ADFNode] + + +class ADFHeadingNode(ADFNode): + """ADF heading node.""" + + type: Literal[ADFNodeType.HEADING] + content: list[ADFNode] + attrs: dict[str, int] # Contains level: 1-6 + + +class ADFCodeBlockNode(ADFNode): + """ADF code block node.""" + + type: Literal[ADFNodeType.CODE_BLOCK] + content: list[ADFTextNode] + attrs: Optional[dict[str, str]] # Contains language if specified + + +class ADFListNode(ADFNode): + """ADF list node (bullet or ordered).""" + + type: Union[Literal[ADFNodeType.BULLET_LIST], Literal[ADFNodeType.ORDERED_LIST]] + content: list["ADFListItemNode"] + + +class ADFListItemNode(ADFNode): + """ADF list item node.""" + + type: Literal[ADFNodeType.LIST_ITEM] + content: list[ADFNode] + + +class ADFDocument(TypedDict): + """Complete ADF document structure.""" + + version: Literal[1] + type: Literal[ADFNodeType.DOC] + content: list[ADFNode] + + +# Type aliases for convenience +AnyADFNode = Union[ + ADFNode, + ADFTextNode, + ADFParagraphNode, + ADFHeadingNode, + ADFCodeBlockNode, + ADFListNode, + ADFListItemNode, +] diff --git a/tests/shared/test_markdown_adf_adapter.py b/tests/shared/test_markdown_adf_adapter.py new file mode 100644 index 000000000..df6548fa3 --- /dev/null +++ b/tests/shared/test_markdown_adf_adapter.py @@ -0,0 +1,335 @@ +"""Tests for the Markdown to ADF adapter.""" + +from src.codegen.shared.markdown_adf import MarkdownToADFAdapter +from src.codegen.shared.markdown_adf.adf_types import ADFMarkType, ADFNodeType + + +class TestMarkdownToADFAdapter: + """Test cases for the MarkdownToADFAdapter class.""" + + def setup_method(self): + """Set up test fixtures.""" + self.adapter = MarkdownToADFAdapter() + + def test_simple_text(self): + """Test conversion of simple text.""" + markdown = "Hello world" + result = self.adapter.convert(markdown) + + assert result["version"] == 1 + assert result["type"] == ADFNodeType.DOC + assert len(result["content"]) == 1 + + paragraph = result["content"][0] + assert paragraph["type"] == ADFNodeType.PARAGRAPH + assert len(paragraph["content"]) == 1 + + text_node = paragraph["content"][0] + assert text_node["type"] == ADFNodeType.TEXT + assert text_node["text"] == "Hello world" + + def test_paragraph(self): + """Test conversion of paragraphs.""" + markdown = "First paragraph.\n\nSecond paragraph." + result = self.adapter.convert(markdown) + + assert len(result["content"]) == 2 + + # First paragraph + first_para = result["content"][0] + assert first_para["type"] == ADFNodeType.PARAGRAPH + assert first_para["content"][0]["text"] == "First paragraph." + + # Second paragraph + second_para = result["content"][1] + assert second_para["type"] == ADFNodeType.PARAGRAPH + assert second_para["content"][0]["text"] == "Second paragraph." + + def test_headings(self): + """Test conversion of headings.""" + markdown = "# Heading 1\n## Heading 2\n### Heading 3" + result = self.adapter.convert(markdown) + + assert len(result["content"]) == 3 + + # H1 + h1 = result["content"][0] + assert h1["type"] == ADFNodeType.HEADING + assert h1["attrs"]["level"] == 1 + assert h1["content"][0]["text"] == "Heading 1" + + # H2 + h2 = result["content"][1] + assert h2["type"] == ADFNodeType.HEADING + assert h2["attrs"]["level"] == 2 + assert h2["content"][0]["text"] == "Heading 2" + + # H3 + h3 = result["content"][2] + assert h3["type"] == ADFNodeType.HEADING + assert h3["attrs"]["level"] == 3 + assert h3["content"][0]["text"] == "Heading 3" + + def test_bold_text(self): + """Test conversion of bold text.""" + markdown = "This is **bold** text." + result = self.adapter.convert(markdown) + + paragraph = result["content"][0] + content = paragraph["content"] + + # Should have 3 text nodes: "This is ", "bold", " text." + assert len(content) == 3 + + # First text node + assert content[0]["text"] == "This is " + assert "marks" not in content[0] or not content[0]["marks"] + + # Bold text node + assert content[1]["text"] == "bold" + assert len(content[1]["marks"]) == 1 + assert content[1]["marks"][0]["type"] == ADFMarkType.STRONG + + # Last text node + assert content[2]["text"] == " text." + assert "marks" not in content[2] or not content[2]["marks"] + + def test_italic_text(self): + """Test conversion of italic text.""" + markdown = "This is *italic* text." + result = self.adapter.convert(markdown) + + paragraph = result["content"][0] + content = paragraph["content"] + + # Should have 3 text nodes + assert len(content) == 3 + + # Italic text node + assert content[1]["text"] == "italic" + assert len(content[1]["marks"]) == 1 + assert content[1]["marks"][0]["type"] == ADFMarkType.EM + + def test_inline_code(self): + """Test conversion of inline code.""" + markdown = "This is `inline code` text." + result = self.adapter.convert(markdown) + + paragraph = result["content"][0] + content = paragraph["content"] + + # Should have 3 text nodes + assert len(content) == 3 + + # Code text node + assert content[1]["text"] == "inline code" + assert len(content[1]["marks"]) == 1 + assert content[1]["marks"][0]["type"] == ADFMarkType.CODE + + def test_links(self): + """Test conversion of links.""" + markdown = "Visit [Google](https://google.com) for search." + result = self.adapter.convert(markdown) + + paragraph = result["content"][0] + content = paragraph["content"] + + # Should have 3 text nodes + assert len(content) == 3 + + # Link text node + assert content[1]["text"] == "Google" + assert len(content[1]["marks"]) == 1 + assert content[1]["marks"][0]["type"] == ADFMarkType.LINK + assert content[1]["marks"][0]["attrs"]["href"] == "https://google.com" + + def test_code_block(self): + """Test conversion of code blocks.""" + markdown = "```python\nprint('Hello, world!')\n```" + result = self.adapter.convert(markdown) + + assert len(result["content"]) == 1 + + code_block = result["content"][0] + assert code_block["type"] == ADFNodeType.CODE_BLOCK + assert code_block["attrs"]["language"] == "python" + assert len(code_block["content"]) == 1 + assert code_block["content"][0]["text"] == "print('Hello, world!')" + + def test_code_block_without_language(self): + """Test conversion of code blocks without language specification.""" + markdown = "```\nsome code\n```" + result = self.adapter.convert(markdown) + + code_block = result["content"][0] + assert code_block["type"] == ADFNodeType.CODE_BLOCK + assert "attrs" not in code_block or "language" not in code_block.get("attrs", {}) + assert code_block["content"][0]["text"] == "some code" + + def test_bullet_list(self): + """Test conversion of bullet lists.""" + markdown = "- Item 1\n- Item 2\n- Item 3" + result = self.adapter.convert(markdown) + + assert len(result["content"]) == 1 + + bullet_list = result["content"][0] + assert bullet_list["type"] == ADFNodeType.BULLET_LIST + assert len(bullet_list["content"]) == 3 + + # Check first list item + first_item = bullet_list["content"][0] + assert first_item["type"] == ADFNodeType.LIST_ITEM + assert len(first_item["content"]) == 1 + assert first_item["content"][0]["type"] == ADFNodeType.PARAGRAPH + assert first_item["content"][0]["content"][0]["text"] == "Item 1" + + def test_ordered_list(self): + """Test conversion of ordered lists.""" + markdown = "1. First item\n2. Second item\n3. Third item" + result = self.adapter.convert(markdown) + + assert len(result["content"]) == 1 + + ordered_list = result["content"][0] + assert ordered_list["type"] == ADFNodeType.ORDERED_LIST + assert len(ordered_list["content"]) == 3 + + # Check first list item + first_item = ordered_list["content"][0] + assert first_item["type"] == ADFNodeType.LIST_ITEM + assert first_item["content"][0]["content"][0]["text"] == "First item" + + def test_blockquote(self): + """Test conversion of blockquotes.""" + markdown = "> This is a blockquote.\n> It spans multiple lines." + result = self.adapter.convert(markdown) + + assert len(result["content"]) == 1 + + blockquote = result["content"][0] + assert blockquote["type"] == ADFNodeType.BLOCKQUOTE + assert len(blockquote["content"]) >= 1 + + def test_horizontal_rule(self): + """Test conversion of horizontal rules.""" + markdown = "Before rule\n\n---\n\nAfter rule" + result = self.adapter.convert(markdown) + + # Should have 3 elements: paragraph, rule, paragraph + assert len(result["content"]) == 3 + + # Check rule + rule = result["content"][1] + assert rule["type"] == ADFNodeType.RULE + + def test_mixed_formatting(self): + """Test conversion of mixed inline formatting.""" + markdown = "This has **bold** and *italic* and `code` formatting." + result = self.adapter.convert(markdown) + + paragraph = result["content"][0] + content = paragraph["content"] + + # Should have multiple text nodes with different marks + assert len(content) > 3 + + # Find the bold text + bold_node = next((node for node in content if node.get("text") == "bold"), None) + assert bold_node is not None + assert any(mark["type"] == ADFMarkType.STRONG for mark in bold_node.get("marks", [])) + + # Find the italic text + italic_node = next((node for node in content if node.get("text") == "italic"), None) + assert italic_node is not None + assert any(mark["type"] == ADFMarkType.EM for mark in italic_node.get("marks", [])) + + # Find the code text + code_node = next((node for node in content if node.get("text") == "code"), None) + assert code_node is not None + assert any(mark["type"] == ADFMarkType.CODE for mark in code_node.get("marks", [])) + + def test_empty_input(self): + """Test conversion of empty input.""" + result = self.adapter.convert("") + + assert result["version"] == 1 + assert result["type"] == ADFNodeType.DOC + assert len(result["content"]) == 1 + + # Should create an empty paragraph + paragraph = result["content"][0] + assert paragraph["type"] == ADFNodeType.PARAGRAPH + assert paragraph["content"][0]["text"] == "" + + def test_complex_document(self): + """Test conversion of a complex document with multiple elements.""" + markdown = """# Main Title + +This is a paragraph with **bold** and *italic* text. + +## Subsection + +Here's a list: +- Item 1 +- Item 2 with `inline code` +- Item 3 + +And a code block: + +```python +def hello(): + print("Hello, world!") +``` + +> This is a blockquote with some important information. + +--- + +Final paragraph after the rule.""" + + result = self.adapter.convert(markdown) + + # Should have multiple content elements + assert len(result["content"]) > 5 + + # Check that we have different types of nodes + node_types = [node["type"] for node in result["content"]] + assert ADFNodeType.HEADING in node_types + assert ADFNodeType.PARAGRAPH in node_types + assert ADFNodeType.BULLET_LIST in node_types + assert ADFNodeType.CODE_BLOCK in node_types + assert ADFNodeType.BLOCKQUOTE in node_types + assert ADFNodeType.RULE in node_types + + def test_malformed_markdown(self): + """Test handling of malformed markdown.""" + markdown = "**unclosed bold and *unclosed italic" + result = self.adapter.convert(markdown) + + # Should still produce a valid ADF document + assert result["version"] == 1 + assert result["type"] == ADFNodeType.DOC + assert len(result["content"]) >= 1 + + # Should have at least one paragraph + assert any(node["type"] == ADFNodeType.PARAGRAPH for node in result["content"]) + + def test_nested_lists(self): + """Test conversion of nested lists.""" + markdown = """- Item 1 + - Nested item 1 + - Nested item 2 +- Item 2""" + + result = self.adapter.convert(markdown) + + # Should have a bullet list + assert len(result["content"]) == 1 + bullet_list = result["content"][0] + assert bullet_list["type"] == ADFNodeType.BULLET_LIST + + # The nested structure might be flattened depending on markdown parser + # Just ensure we have list items + assert len(bullet_list["content"]) >= 2 + assert all(item["type"] == ADFNodeType.LIST_ITEM for item in bullet_list["content"])