1+ """
2+ markdownify_graph module
3+ """
4+
5+ from typing import Dict , List , Optional , Tuple
6+
7+ from ..nodes import (
8+ FetchNode ,
9+ MarkdownifyNode ,
10+ )
11+ from .base_graph import BaseGraph
12+
13+
14+ class MarkdownifyGraph (BaseGraph ):
15+ """
16+ A graph that converts HTML content to Markdown format.
17+
18+ This graph takes a URL or HTML content as input and converts it to clean, readable Markdown.
19+ It uses a two-step process:
20+ 1. Fetch the content (if URL is provided)
21+ 2. Convert the content to Markdown format
22+
23+ Args:
24+ llm_model: The language model to use for processing
25+ embedder_model: The embedding model to use (optional)
26+ node_config: Additional configuration for the nodes (optional)
27+
28+ Example:
29+ >>> graph = MarkdownifyGraph(
30+ ... llm_model=your_llm_model,
31+ ... embedder_model=your_embedder_model
32+ ... )
33+ >>> result, _ = graph.execute({"url": "https://example.com"})
34+ >>> print(result["markdown"])
35+ """
36+
37+ def __init__ (
38+ self ,
39+ llm_model ,
40+ embedder_model = None ,
41+ node_config : Optional [Dict ] = None ,
42+ ):
43+ # Initialize nodes
44+ fetch_node = FetchNode (
45+ input = "url | html" ,
46+ output = ["html_content" ],
47+ node_config = node_config ,
48+ )
49+
50+ markdownify_node = MarkdownifyNode (
51+ input = "html_content" ,
52+ output = ["markdown" ],
53+ node_config = node_config ,
54+ )
55+
56+ # Define graph structure
57+ nodes = [fetch_node , markdownify_node ]
58+ edges = [(fetch_node , markdownify_node )]
59+
60+ super ().__init__ (
61+ nodes = nodes ,
62+ edges = edges ,
63+ entry_point = fetch_node ,
64+ graph_name = "Markdownify" ,
65+ )
66+
67+ def execute (
68+ self , initial_state : Dict
69+ ) -> Tuple [Dict , List [Dict ]]:
70+ """
71+ Execute the markdownify graph.
72+
73+ Args:
74+ initial_state: A dictionary containing either:
75+ - "url": The URL to fetch and convert to markdown
76+ - "html": The HTML content to convert to markdown
77+
78+ Returns:
79+ Tuple containing:
80+ - Dictionary with the markdown result in the "markdown" key
81+ - List of execution logs
82+ """
83+ return super ().execute (initial_state )
0 commit comments