1
+ """
2
+ markdownify_graph module
3
+ """
4
+
5
+ from typing import Dict , List , Optional , Tuple
6
+
7
+ from ..nodes import (
8
+ FetchNode ,
9
+ MarkdownifyNode ,
10
+ )
11
+ from .base_graph import BaseGraph
12
+
13
+
14
+ class MarkdownifyGraph (BaseGraph ):
15
+ """
16
+ A graph that converts HTML content to Markdown format.
17
+
18
+ This graph takes a URL or HTML content as input and converts it to clean, readable Markdown.
19
+ It uses a two-step process:
20
+ 1. Fetch the content (if URL is provided)
21
+ 2. Convert the content to Markdown format
22
+
23
+ Args:
24
+ llm_model: The language model to use for processing
25
+ embedder_model: The embedding model to use (optional)
26
+ node_config: Additional configuration for the nodes (optional)
27
+
28
+ Example:
29
+ >>> graph = MarkdownifyGraph(
30
+ ... llm_model=your_llm_model,
31
+ ... embedder_model=your_embedder_model
32
+ ... )
33
+ >>> result, _ = graph.execute({"url": "https://example.com"})
34
+ >>> print(result["markdown"])
35
+ """
36
+
37
+ def __init__ (
38
+ self ,
39
+ llm_model ,
40
+ embedder_model = None ,
41
+ node_config : Optional [Dict ] = None ,
42
+ ):
43
+ # Initialize nodes
44
+ fetch_node = FetchNode (
45
+ input = "url | html" ,
46
+ output = ["html_content" ],
47
+ node_config = node_config ,
48
+ )
49
+
50
+ markdownify_node = MarkdownifyNode (
51
+ input = "html_content" ,
52
+ output = ["markdown" ],
53
+ node_config = node_config ,
54
+ )
55
+
56
+ # Define graph structure
57
+ nodes = [fetch_node , markdownify_node ]
58
+ edges = [(fetch_node , markdownify_node )]
59
+
60
+ super ().__init__ (
61
+ nodes = nodes ,
62
+ edges = edges ,
63
+ entry_point = fetch_node ,
64
+ graph_name = "Markdownify" ,
65
+ )
66
+
67
+ def execute (
68
+ self , initial_state : Dict
69
+ ) -> Tuple [Dict , List [Dict ]]:
70
+ """
71
+ Execute the markdownify graph.
72
+
73
+ Args:
74
+ initial_state: A dictionary containing either:
75
+ - "url": The URL to fetch and convert to markdown
76
+ - "html": The HTML content to convert to markdown
77
+
78
+ Returns:
79
+ Tuple containing:
80
+ - Dictionary with the markdown result in the "markdown" key
81
+ - List of execution logs
82
+ """
83
+ return super ().execute (initial_state )
0 commit comments