Skip to content

Commit

Permalink
Switch to more recent Claude XML format
Browse files Browse the repository at this point in the history
Refs #15

Refs #16 (comment)
  • Loading branch information
simonw committed Sep 9, 2024
1 parent db4a164 commit d016523
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 45 deletions.
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,22 +104,26 @@ Contents of file3.txt
---
```
### XML Output
### Claude XML Output
Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.
To structure the output in this way, use the optional `--cxml` flag, which will produce output like this:
```xml
<documents>
<document path="my_directory/file1.txt">
<document index="1">
<source>my_directory/file1.txt</source>
<document_content>
Contents of file1.txt
</document_content>
</document>
<document path="my_directory/file2.txt">
<document index="2">
<source>my_directory/file2.txt</source>
<document_content>
Contents of file2.txt
</document_content>
</document>
...
</documents>
```

Expand Down
14 changes: 11 additions & 3 deletions files_to_prompt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import click

global_index = 1


def should_ignore(path, gitignore_rules):
for rule in gitignore_rules:
Expand Down Expand Up @@ -39,9 +41,14 @@ def print_default(path, content):


def print_as_xml(path, content):
click.echo(f'<document path="{path}">')
global global_index
click.echo(f'<document index="{global_index}">')
click.echo(f"<source>{path}</source>")
click.echo("<document_content>")
click.echo(content)
click.echo("</document_content>")
click.echo("</document>")
global_index += 1


def process_path(
Expand Down Expand Up @@ -151,6 +158,9 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
...
</documents>
"""
# Reset global_index for pytest
global global_index
global_index = 1
gitignore_rules = []
for path in paths:
if not os.path.exists(path):
Expand All @@ -159,7 +169,6 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
gitignore_rules.extend(read_gitignore(os.path.dirname(path)))
if claude_xml and path == paths[0]:
click.echo("<documents>")

process_path(
path,
include_hidden,
Expand All @@ -168,6 +177,5 @@ def cli(paths, include_hidden, ignore_gitignore, ignore_patterns, claude_xml):
ignore_patterns,
claude_xml,
)

if claude_xml:
click.echo("</documents>")
55 changes: 18 additions & 37 deletions tests/test_files_to_prompt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pytest

from click.testing import CliRunner

Expand Down Expand Up @@ -190,53 +191,33 @@ def test_binary_file_warning(tmpdir):
)


def test_xml_format_dir(tmpdir):
@pytest.mark.parametrize(
"args", (["test_dir"], ["test_dir/file1.txt", "test_dir/file2.txt"])
)
def test_xml_format_dir(tmpdir, args):
runner = CliRunner()
with tmpdir.as_cwd():
os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1")
f.write("Contents of file1.txt")
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")

result = runner.invoke(cli, ["test_dir", "--cxml"])
assert result.exit_code == 0
actual = result.output
expected = """
<documents>
<document path="test_dir/file1.txt">
Contents of file1
</document>
<document path="test_dir/file2.txt">
Contents of file2
</document>
</documents>
"""
assert expected.strip() == actual.strip()


def test_cxml_format_multiple_paths(tmpdir):
runner = CliRunner()
with tmpdir.as_cwd():
os.makedirs("test_dir")
with open("test_dir/file1.txt", "w") as f:
f.write("Contents of file1")
with open("test_dir/file2.txt", "w") as f:
f.write("Contents of file2")

result = runner.invoke(
cli, ["test_dir/file1.txt", "test_dir/file2.txt", "--cxml"]
)

f.write("Contents of file2.txt")
result = runner.invoke(cli, args + ["--cxml"])
assert result.exit_code == 0
actual = result.output
expected = """
<documents>
<document path="test_dir/file1.txt">
Contents of file1
<document index="1">
<source>test_dir/file1.txt</source>
<document_content>
Contents of file1.txt
</document_content>
</document>
<document path="test_dir/file2.txt">
Contents of file2
<document index="2">
<source>test_dir/file2.txt</source>
<document_content>
Contents of file2.txt
</document_content>
</document>
</documents>
"""
Expand Down

0 comments on commit d016523

Please sign in to comment.