The-Pocket · svngoku · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025
diff --git a/.env.sample b/.env.sample
@@ -1,2 +1,26 @@
-GEMINI_PROJECT_ID=<GEMINI_PROJECT_ID>
-GITHUB_TOKEN=<GITHUB_TOKEN>
+# Google Gemini API Configuration
+GEMINI_PROJECT_ID=<your-project-id>
+GEMINI_LOCATION=us-central1
+GEMINI_MODEL=gemini-2.5-pro-exp-03-25
+# Uncomment if using API key instead of project ID
+# GEMINI_API_KEY=<your-api-key>
+
+# Alternative LLM APIs (uncomment to use)
+# ANTHROPIC_API_KEY=<your-anthropic-api-key>
+# OPENAI_API_KEY=<your-openai-api-key>
+
+# GitHub API Configuration
+GITHUB_TOKEN=<your-github-token>
+
+# Logging Configuration
+LOG_DIR=logs
+
+# Cache Configuration
+CACHE_ENABLED=true
+CACHE_FILE=llm_cache.json
+
+# Streamlit Configuration
+STREAMLIT_SERVER_PORT=8501
+STREAMLIT_SERVER_HEADLESS=true
+STREAMLIT_SERVER_ADDRESS=0.0.0.0
+STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,45 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+# Install system dependencies including Git, bash, and PDF conversion tools
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    bash \
+    pandoc \
+    wkhtmltopdf \
+    texlive-xetex \
+    texlive-fonts-recommended \
+    texlive-plain-generic \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements first for better caching
+COPY requirements.txt .
+
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the application
+COPY . .
+
+# Create necessary directories with proper permissions
+RUN mkdir -p logs output && chmod -R 777 logs output
+
+# Expose the Streamlit port
+EXPOSE 8501
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV STREAMLIT_SERVER_PORT=8501
+ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV LOG_DIR=/app/logs
+ENV CACHE_FILE=/app/llm_cache.json
+ENV CACHE_ENABLED=true
+ENV GIT_PYTHON_REFRESH=quiet
+ENV OUTPUT_DIR=/app/output
+
+# Default command (can be overridden by docker-compose)
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
diff --git a/README.md b/README.md
@@ -63,6 +63,29 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
 
 ## 🚀 Getting Started
 
+### Option 1: Using Docker (Recommended)
+
+1. Clone this repository
+
+2. Configure your environment variables in the `.env` file:
+   ```bash
+   # Copy the sample .env file
+   cp .env.sample .env
+
+   # Edit the .env file with your credentials
+   # GEMINI_PROJECT_ID=your-project-id
+   # GITHUB_TOKEN=your-github-token
+   ```
+
+3. Run the application using Docker Compose:
+   ```bash
+   docker-compose up -d
+   ```
+
+4. Access the Streamlit web interface at http://localhost:8501
+
+### Option 2: Manual Installation
+
 1. Clone this repository
 
 2. Install dependencies: 
@@ -82,22 +105,22 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
    ```bash
    python utils/call_llm.py
    ```
-
-7. Generate a complete codebase tutorial by running the main script:
-    ```bash
-    # Analyze a GitHub repository
-    python main.py --repo https://github.com/username/repo --include "*.py" "*.js" --exclude "tests/*" --max-size 50000
-
-    # Or, analyze a local directory
-    python main.py --dir /path/to/your/codebase --include "*.py" --exclude "*test*"
-    ```
-    - `--repo` or `--dir` - Specify either a GitHub repo URL or a local directory path (required, mutually exclusive)
-    - `-n, --name` - Project name (optional, derived from URL/directory if omitted)
-    - `-t, --token` - GitHub token (or set GITHUB_TOKEN environment variable)
-    - `-o, --output` - Output directory (default: ./output)
-    - `-i, --include` - Files to include (e.g., "*.py" "*.js")
-    - `-e, --exclude` - Files to exclude (e.g., "tests/*" "docs/*")
-    - `-s, --max-size` - Maximum file size in bytes (default: 100KB)
+4. Run the Streamlit web interface:
+   ```bash
+   streamlit run app.py
+   ```
+
+   Or generate a complete codebase tutorial directly using the command line:
+   ```bash
+   python main.py https://github.com/username/repo --include "*.py" "*.js" --exclude "tests/*" --max-size 50000
+   ```
+   - `repo_url` - URL of the GitHub repository (required)
+   - `-n, --name` - Project name (optional, derived from URL if omitted)
+   - `-t, --token` - GitHub token (or set GITHUB_TOKEN environment variable)
+   - `-o, --output` - Output directory (default: ./output)
+   - `-i, --include` - Files to include (e.g., "*.py" "*.js")
+   - `-e, --exclude` - Files to exclude (e.g., "tests/*" "docs/*")
+   - `-s, --max-size` - Maximum file size in bytes (default: 100KB)
 
 The application will crawl the repository, analyze the codebase structure, generate tutorial content, and save the output in the specified directory (default: ./output).
 

diff --git a/app.py b/app.py
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,39 @@
+version: '3'
+
+services:
+  tutorial-generator:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "8501:8501"
+    volumes:
+      - ./output:/app/output
+      - ./logs:/app/logs
+      - ./llm_cache.json:/app/llm_cache.json
+    env_file:
+      - .env
+    environment:
+      - PYTHONUNBUFFERED=1
+      - PYTHONDONTWRITEBYTECODE=1
+      - STREAMLIT_SERVER_PORT=8501
+      - STREAMLIT_SERVER_ADDRESS=0.0.0.0
+      - STREAMLIT_SERVER_HEADLESS=true
+      - LOG_DIR=/app/logs
+      - CACHE_FILE=/app/llm_cache.json
+      - CACHE_ENABLED=true
+      - OUTPUT_DIR=/app/output
+    restart: unless-stopped
+    # Ensure the container has write permissions to the output directory
+    user: "${UID:-1000}:${GID:-1000}"
+    # Create output directory with proper permissions
+    command: >
+      bash -c "
+        mkdir -p /app/output &&
+        chmod -R 777 /app/output &&
+        mkdir -p /app/logs &&
+        chmod -R 777 /app/logs &&
+        touch /app/llm_cache.json &&
+        chmod 666 /app/llm_cache.json &&
+        streamlit run app.py --server.port=8501 --server.address=0.0.0.0
+      "
diff --git a/requirements.txt b/requirements.txt
@@ -5,3 +5,8 @@ gitpython>=3.1.0
 google-cloud-aiplatform>=1.25.0
 google-genai>=1.9.0
 python-dotenv>=1.0.0
+streamlit>=1.32.0
+markdown>=3.4.0
+pdfkit>=1.0.0
+weasyprint>=59.0
+pymdown-extensions>=10.0.0
diff --git a/test_markdown_converter.py b/test_markdown_converter.py
@@ -0,0 +1,55 @@
+import os
+from utils.markdown_converter import markdown_to_html, markdown_to_pdf, create_combined_markdown, get_file_contents
+
+# Test directory
+output_dir = "output/GIM-BACK"
+
+# If the directory doesn't exist, try to find it
+if not os.path.exists(output_dir):
+    output_base_dir = "output"
+    if os.path.exists(output_base_dir) and os.path.isdir(output_base_dir):
+        project_dirs = [d for d in os.listdir(output_base_dir) 
+                       if os.path.isdir(os.path.join(output_base_dir, d))]
+        print(f"Available project directories: {project_dirs}")
+        if project_dirs:
+            output_dir = os.path.join(output_base_dir, project_dirs[0])
+            print(f"Using first available directory: {output_dir}")
+
+# Check if output directory exists
+if os.path.exists(output_dir) and os.path.isdir(output_dir):
+    print(f"Output directory exists: {output_dir}")
+
+    # Get file contents
+    file_contents = get_file_contents(output_dir, '.md')
+    print(f"Found {len(file_contents)} markdown files")
+
+    # Create combined markdown
+    combined_content, combined_file_path = create_combined_markdown(
+        file_contents, 
+        os.path.join(output_dir, "test_combined.md")
+    )
+
+    if combined_content and combined_file_path:
+        print(f"Created combined markdown file: {combined_file_path}")
+
+        # Convert to HTML
+        html_content = markdown_to_html(combined_content)
+        if html_content:
+            html_file_path = os.path.join(output_dir, "test_combined.html")
+            with open(html_file_path, "w", encoding="utf-8") as f:
+                f.write(html_content)
+            print(f"Created HTML file: {html_file_path}")
+        else:
+            print("Failed to convert to HTML")
+
+        # Convert to PDF
+        pdf_file_path = os.path.join(output_dir, "test_combined.pdf")
+        pdf_path = markdown_to_pdf(combined_content, pdf_file_path)
+        if pdf_path and os.path.exists(pdf_path):
+            print(f"Created PDF file: {pdf_path}")
+        else:
+            print("Failed to convert to PDF")
+    else:
+        print("Failed to create combined markdown")
+else:
+    print(f"Output directory does not exist: {output_dir}")
diff --git a/test_markdown_render.py b/test_markdown_render.py
@@ -0,0 +1,37 @@
+import os
+
+# Test directory
+output_dir = "output/GIM-BACK"
+
+# If the directory doesn't exist, try to find it
+if not os.path.exists(output_dir):
+    output_base_dir = "output"
+    if os.path.exists(output_base_dir) and os.path.isdir(output_base_dir):
+        project_dirs = [d for d in os.listdir(output_base_dir) 
+                       if os.path.isdir(os.path.join(output_base_dir, d))]
+        print(f"Available project directories: {project_dirs}")
+        if project_dirs:
+            output_dir = os.path.join(output_base_dir, project_dirs[0])
+            print(f"Using first available directory: {output_dir}")
+
+# Check if output directory exists
+if os.path.exists(output_dir) and os.path.isdir(output_dir):
+    print(f"Output directory exists: {output_dir}")
+
+    # List files in the directory
+    files = sorted(os.listdir(output_dir))
+    print(f"Files in directory: {files}")
+
+    # Read and print the content of each file
+    for file in files:
+        file_path = os.path.join(output_dir, file)
+        if os.path.isfile(file_path):
+            try:
+                with open(file_path, "r", encoding="utf-8") as f:
+                    content = f.read()
+                print(f"\n--- {file} ---")
+                print(f"First 100 characters: {content[:100]}...")
+            except Exception as e:
+                print(f"Error reading file {file}: {str(e)}")
+else:
+    print(f"Output directory does not exist: {output_dir}")
diff --git a/test_output_dir.py b/test_output_dir.py
@@ -0,0 +1,47 @@
+import os
+
+# Test directory detection logic
+output_base_dir = "output"
+project_name = "GIM-BACK"
+
+# Test with non-existent directory
+non_existent_dir = "output/NON-EXISTENT"
+print(f"\nTesting with non-existent directory: {non_existent_dir}")
+if os.path.exists(non_existent_dir) and os.path.isdir(non_existent_dir):
+    print(f"Directory exists: {non_existent_dir}")
+else:
+    print(f"Directory does not exist: {non_existent_dir}")
+
+    # Try to find it in the output base directory
+    if os.path.exists(output_base_dir) and os.path.isdir(output_base_dir):
+        project_dirs = [d for d in os.listdir(output_base_dir) 
+                       if os.path.isdir(os.path.join(output_base_dir, d))]
+
+        print(f"Available project directories: {project_dirs}")
+    else:
+        print(f"Output base directory does not exist: {output_base_dir}")
+
+print("\nTesting with existing directory:")
+
+print(f"Checking if output directory exists: {output_base_dir}")
+if os.path.exists(output_base_dir) and os.path.isdir(output_base_dir):
+    print(f"Output base directory exists: {output_base_dir}")
+
+    # List all directories in the output base directory
+    project_dirs = [d for d in os.listdir(output_base_dir) 
+                   if os.path.isdir(os.path.join(output_base_dir, d))]
+
+    print(f"Found project directories: {project_dirs}")
+
+    if project_name and project_name in project_dirs:
+        # Found the project directory
+        actual_output_dir = os.path.join(output_base_dir, project_name)
+        print(f"Found project directory: {actual_output_dir}")
+
+        # List files in the project directory
+        files = os.listdir(actual_output_dir)
+        print(f"Files in project directory: {files}")
+    else:
+        print(f"Project directory '{project_name}' not found in {output_base_dir}")
+else:
+    print(f"Output base directory does not exist: {output_base_dir}")
diff --git a/utils/call_llm.py b/utils/call_llm.py
@@ -17,11 +17,16 @@
 file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
 logger.addHandler(file_handler)
 
-# Simple cache configuration
-cache_file = "llm_cache.json"
+# Cache configuration from environment variables
+cache_file = os.getenv("CACHE_FILE", "llm_cache.json")
+cache_enabled = os.getenv("CACHE_ENABLED", "true").lower() == "true"
 
-# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
-def call_llm(prompt: str, use_cache: bool = True) -> str:
+# By default, we use Google Gemini 2.5 pro, as it shows great performance for code understanding
+def call_llm(prompt: str, use_cache: bool = None) -> str:
+    # Determine if cache should be used (parameter overrides environment variable)
+    if use_cache is None:
+        use_cache = cache_enabled
+
     # Log the prompt
     logger.info(f"PROMPT: {prompt}")
 
@@ -33,55 +38,63 @@ def call_llm(prompt: str, use_cache: bool = True) -> str:
             try:
                 with open(cache_file, 'r') as f:
                     cache = json.load(f)
-            except:
-                logger.warning(f"Failed to load cache, starting with empty cache")
+            except Exception as e:
+                logger.warning(f"Failed to load cache, starting with empty cache: {e}")
 
         # Return from cache if exists
         if prompt in cache:
-            logger.info(f"RESPONSE: {cache[prompt]}")
+            logger.info(f"RESPONSE (cached): {cache[prompt]}")
             return cache[prompt]
 
     # Call the LLM if not in cache or cache disabled
-    client = genai.Client(
-        vertexai=True, 
-        # TODO: change to your own project id and location
-        project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
-        location=os.getenv("GEMINI_LOCATION", "us-central1")
-    )
-    # You can comment the previous line and use the AI Studio key instead:
-    # client = genai.Client(
-    #     api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
-    # )
-    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
-    response = client.models.generate_content(
-        model=model,
-        contents=[prompt]
-    )
-    response_text = response.text
-
-    # Log the response
-    logger.info(f"RESPONSE: {response_text}")
-
-    # Update cache if enabled
-    if use_cache:
-        # Load cache again to avoid overwrites
-        cache = {}
-        if os.path.exists(cache_file):
+    try:
+        # Check if using API key or Vertex AI
+        api_key = os.getenv("GEMINI_API_KEY")
+        if api_key:
+            # Use API key authentication
+            client = genai.Client(api_key=api_key)
+        else:
+            # Use Vertex AI authentication
+            client = genai.Client(
+                vertexai=True,
+                project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
+                location=os.getenv("GEMINI_LOCATION", "us-central1")
+            )
+
+        model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
+        response = client.models.generate_content(
+            model=model,
+            contents=[prompt]
+        )
+        response_text = response.text
+
+        # Log the response
+        logger.info(f"RESPONSE: {response_text}")
+
+        # Update cache if enabled
+        if use_cache:
+            # Load cache again to avoid overwrites
+            cache = {}
+            if os.path.exists(cache_file):
+                try:
+                    with open(cache_file, 'r') as f:
+                        cache = json.load(f)
+                except Exception as e:
+                    logger.warning(f"Failed to reload cache: {e}")
+
+            # Add to cache and save
+            cache[prompt] = response_text
             try:
-                with open(cache_file, 'r') as f:
-                    cache = json.load(f)
-            except:
-                pass
+                with open(cache_file, 'w') as f:
+                    json.dump(cache, f)
+            except Exception as e:
+                logger.error(f"Failed to save cache: {e}")
 
-        # Add to cache and save
-        cache[prompt] = response_text
-        try:
-            with open(cache_file, 'w') as f:
-                json.dump(cache, f)
-        except Exception as e:
-            logger.error(f"Failed to save cache: {e}")
+        return response_text
 
-    return response_text
+    except Exception as e:
+        logger.error(f"Error calling Gemini API: {e}")
+        raise Exception(f"Failed to generate content with Gemini: {e}")
 
 # # Use Anthropic Claude 3.7 Sonnet Extended Thinking
 # def call_llm(prompt, use_cache: bool = True):

diff --git a/utils/markdown_converter.py b/utils/markdown_converter.py
@@ -0,0 +1,266 @@
+import os
+import tempfile
+import subprocess
+import logging
+import base64
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+def markdown_to_html(markdown_content):
+    """
+    Convert markdown content to HTML with proper rendering of code blocks and Mermaid diagrams.
+    
+    Args:
+        markdown_content (str): The markdown content to convert
+        
+    Returns:
+        str: The HTML content
+    """
+    try:
+        # Create a temporary file for the markdown content
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_md:
+            temp_md.write(markdown_content)
+            temp_md_path = temp_md.name
+
+        # Create a temporary file for the HTML output
+        with tempfile.NamedTemporaryFile(suffix='.html', delete=False) as temp_html:
+            temp_html_path = temp_html.name
+
+        # Convert markdown to HTML using pandoc
+        cmd = [
+            'pandoc',
+            temp_md_path,
+            '-o', temp_html_path,
+            '--standalone',
+            '--highlight-style=tango',
+            '--toc',
+            '--toc-depth=3',
+            '--number-sections',
+            '-f', 'markdown+yaml_metadata_block+raw_html+fenced_divs+mermaid',
+            '--embed-resources',
+            '--mathjax',
+            '--template=default',
+            '--css', 'https://cdn.jsdelivr.net/npm/github-markdown-css/github-markdown.min.css',
+            '--include-in-header', '-'
+        ]
+
+        # Add Mermaid script to the header
+        mermaid_script = """
+        <script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
+        <script>
+            document.addEventListener('DOMContentLoaded', function() {
+                mermaid.initialize({
+                    startOnLoad: true,
+                    theme: 'default',
+                    securityLevel: 'loose',
+                    flowchart: { useMaxWidth: false, htmlLabels: true }
+                });
+            });
+        </script>
+        <style>
+            .markdown-body {
+                box-sizing: border-box;
+                min-width: 200px;
+                max-width: 980px;
+                margin: 0 auto;
+                padding: 45px;
+            }
+            @media (max-width: 767px) {
+                .markdown-body {
+                    padding: 15px;
+                }
+            }
+            pre {
+                background-color: #f6f8fa;
+                border-radius: 3px;
+                padding: 16px;
+                overflow: auto;
+            }
+            code {
+                font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, Courier, monospace;
+                background-color: rgba(27, 31, 35, 0.05);
+                border-radius: 3px;
+                padding: 0.2em 0.4em;
+                margin: 0;
+            }
+            pre code {
+                background-color: transparent;
+                padding: 0;
+            }
+            .mermaid {
+                text-align: center;
+                margin: 20px 0;
+            }
+        </style>
+        """
+
+        # Run the command with the Mermaid script as input to --include-in-header
+        process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        _, stderr = process.communicate(input=mermaid_script)
+
+        if process.returncode != 0:
+            logger.error(f"Error converting markdown to HTML: {stderr}")
+            return None
+
+        # Read the HTML content
+        with open(temp_html_path, 'r', encoding='utf-8') as f:
+            html_content = f.read()
+
+        # Clean up temporary files
+        os.unlink(temp_md_path)
+        os.unlink(temp_html_path)
+
+        return html_content
+
+    except Exception as e:
+        logger.error(f"Error in markdown_to_html: {str(e)}")
+        return None
+
+def create_combined_markdown(files_dict, output_path=None):
+    """
+    Combine multiple markdown files into a single markdown file.
+    
+    Args:
+        files_dict (dict): Dictionary mapping filenames to their content
+        output_path (str, optional): Path to save the combined markdown file
+        
+    Returns:
+        tuple: (combined_content, output_path)
+    """
+    try:
+        # Start with index.md if it exists
+        combined_content = ""
+        if 'index.md' in files_dict:
+            combined_content += files_dict['index.md'] + "\n\n---\n\n"
+
+        # Add all numbered files in order
+        numbered_files = sorted([f for f in files_dict.keys() 
+                                if f.startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) 
+                                and f.endswith('.md')])
+
+        for file in numbered_files:
+            combined_content += files_dict[file] + "\n\n---\n\n"
+
+        # Add any remaining files
+        for file in files_dict:
+            if file != 'index.md' and file not in numbered_files and file.endswith('.md'):
+                combined_content += files_dict[file] + "\n\n---\n\n"
+
+        # Save to file if output_path is provided
+        if output_path:
+            os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(combined_content)
+
+        return combined_content, output_path
+
+    except Exception as e:
+        logger.error(f"Error in create_combined_markdown: {str(e)}")
+        return None, None
+
+def html_to_pdf(html_content, output_path=None):
+    """
+    Convert HTML content to PDF using wkhtmltopdf.
+    
+    Args:
+        html_content (str): The HTML content to convert
+        output_path (str, optional): Path to save the PDF
+        
+    Returns:
+        str: The path to the generated PDF
+    """
+    try:
+        # Create a temporary file for the HTML content
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as temp_html:
+            temp_html.write(html_content)
+            temp_html_path = temp_html.name
+
+        # Create a temporary file for the PDF output if not provided
+        if output_path is None:
+            temp_pdf = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False)
+            temp_pdf.close()
+            output_path = temp_pdf.name
+
+        # Convert HTML to PDF using wkhtmltopdf
+        cmd = [
+            'wkhtmltopdf',
+            '--enable-local-file-access',
+            '--javascript-delay', '1000',  # Wait for JavaScript to execute (for Mermaid)
+            '--no-stop-slow-scripts',
+            '--margin-top', '20',
+            '--margin-right', '20',
+            '--margin-bottom', '20',
+            '--margin-left', '20',
+            '--page-size', 'A4',
+            '--encoding', 'UTF-8',
+            '--footer-center', '[page]/[topage]',
+            temp_html_path,
+            output_path
+        ]
+
+        # Run the command
+        process = subprocess.run(cmd, capture_output=True, text=True)
+
+        # Clean up temporary files
+        os.unlink(temp_html_path)
+
+        if process.returncode != 0:
+            logger.error(f"Error converting HTML to PDF: {process.stderr}")
+            return None
+
+        return output_path
+
+    except Exception as e:
+        logger.error(f"Error in html_to_pdf: {str(e)}")
+        return None
+
+def markdown_to_pdf(markdown_content, output_path=None):
+    """
+    Convert markdown content to PDF.
+    
+    Args:
+        markdown_content (str): The markdown content to convert
+        output_path (str, optional): Path to save the PDF
+        
+    Returns:
+        str: The path to the generated PDF
+    """
+    # Convert markdown to HTML
+    html_content = markdown_to_html(markdown_content)
+    if not html_content:
+        return None
+
+    # Convert HTML to PDF
+    return html_to_pdf(html_content, output_path)
+
+def get_file_contents(directory, file_pattern=None):
+    """
+    Get the contents of all files in a directory.
+    
+    Args:
+        directory (str): The directory to search
+        file_pattern (str, optional): A pattern to match filenames
+        
+    Returns:
+        dict: Dictionary mapping filenames to their content
+    """
+    try:
+        files_dict = {}
+        for file in os.listdir(directory):
+            if file_pattern and not file.endswith(file_pattern):
+                continue
+
+            file_path = os.path.join(directory, file)
+            if os.path.isfile(file_path):
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        files_dict[file] = f.read()
+                except Exception as e:
+                    logger.error(f"Error reading file {file}: {str(e)}")
+
+        return files_dict
+
+    except Exception as e:
+        logger.error(f"Error in get_file_contents: {str(e)}")
+        return {}
diff --git a/utils/markdown_to_pdf.py b/utils/markdown_to_pdf.py