Skip to content

Commit

Permalink
🕸️ Add html example and handling
Browse files Browse the repository at this point in the history
  • Loading branch information
asim-shrestha committed Sep 13, 2024
1 parent c440ded commit 6e77f31
Show file tree
Hide file tree
Showing 7 changed files with 357 additions and 10 deletions.
4 changes: 3 additions & 1 deletion bananalyzer/data/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
"urls",
"enqueue",
"infinite-scroll",
"synthetic",
"images",
]


Expand Down Expand Up @@ -117,7 +119,7 @@ class Example(BaseModel):
description="Local path of a HAR, S3 URL of a HAR directory's tar.gz, or remote URL of MHTML",
default=None,
)
source: Literal["mhtml", "hosted", "har"] = Field(
source: Literal["html", "mhtml", "hosted", "har"] = Field(
description="Source of the website"
)
category: str = Field(description="Category of the website")
Expand Down
11 changes: 7 additions & 4 deletions bananalyzer/runner/website_responder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,19 @@ def get_url(self, example: Example) -> str:
raise NotImplementedError("get_url not implemented")


class MHTMLWebsiteResponder(WebsiteResponder):
class StaticFileResponder(WebsiteResponder):
"""
Return local MHTML files as the response. MHTML is a format that embeds all resources in a single file.
Ideally this would be hosted by a webserver but browsers like chrome don't support MHTML
"""

def __init__(self, data_path: Path):
def __init__(self, data_path: Path, file_name: str):
super().__init__()
self.data_path = data_path
self.file_name = file_name

def get_url(self, example: Example) -> str:
mhtml_path = self.data_path / example.id / "index.mhtml"
mhtml_path = self.data_path / example.id / self.file_name
return f"file://{mhtml_path.as_posix()}"


Expand All @@ -44,7 +45,9 @@ def get_url(self, example: Example) -> str:

def get_website_responder(example: Example) -> WebsiteResponder:
if example.source == "mhtml":
return MHTMLWebsiteResponder(get_examples_path())
return StaticFileResponder(get_examples_path(), "index.mhtml")
if example.source == "html":
return StaticFileResponder(get_examples_path(), "index.html")
elif example.source == "har":
return HostedWebsiteResponder() # HTTP requests are routed to the HAR file
elif example.source == "hosted":
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bananalyzer"
version = "0.10.4"
version = "0.10.5"


description = "Open source AI Agent evaluation framework for web tasks 🐒🍌"
Expand Down
181 changes: 181 additions & 0 deletions static/examples.json
Original file line number Diff line number Diff line change
Expand Up @@ -39341,5 +39341,186 @@
]
}
]
},
{
"id": "z8Yp7sWv0sRhdnNryRLc9",
"url": "https://claude.site/artifacts/82e9d819-b977-44f8-99f8-618c21459090",
"source": "html",
"category": "synthetic",
"subcategory": "synthetic",
"type": "listing_detail",
"goal": "Extract the information within the table component",
"schema_": {
"some_value": {
"type": "string"
},
"another_value": {
"type": "number"
},
"a_third_value": {
"type": "string"
}
},
"tags": [
"synthetic",
"pagination"
],
"evals": [
{
"type": "json_match",
"expected": [
{
"some_value": "Apple",
"another_value": 10,
"a_third_value": "Red"
},
{
"some_value": "Banana",
"another_value": 20,
"a_third_value": 42
},
{
"some_value": "Cherry",
"another_value": 30,
"a_third_value": "Sweet"
},
{
"some_value": "Date",
"another_value": 40,
"a_third_value": 7
},
{
"some_value": "Elderberry",
"another_value": 50,
"a_third_value": "Purple"
},
{
"some_value": "Fig",
"another_value": 60,
"a_third_value": 3.14
},
{
"some_value": "Grape",
"another_value": 70,
"a_third_value": "Green"
},
{
"some_value": "Honeydew",
"another_value": 80,
"a_third_value": 18
},
{
"some_value": "Imbe",
"another_value": 90,
"a_third_value": "Citrus"
},
{
"some_value": "Jackfruit",
"another_value": 100,
"a_third_value": 55
},
{
"some_value": "Kiwi",
"another_value": 110,
"a_third_value": "Fuzzy"
},
{
"some_value": "Lemon",
"another_value": 120,
"a_third_value": 2.5
},
{
"some_value": "Mango",
"another_value": 130,
"a_third_value": "Tropical"
},
{
"some_value": "Nectarine",
"another_value": 140,
"a_third_value": 9
},
{
"some_value": "Orange",
"another_value": 150,
"a_third_value": "Citrus"
},
{
"some_value": "Papaya",
"another_value": 160,
"a_third_value": "Tropical"
},
{
"some_value": "Quince",
"another_value": 170,
"a_third_value": 4.8
},
{
"some_value": "Raspberry",
"another_value": 180,
"a_third_value": "Berry"
},
{
"some_value": "Strawberry",
"another_value": 190,
"a_third_value": 22
},
{
"some_value": "Tangerine",
"another_value": 200,
"a_third_value": "Orange"
},
{
"some_value": "Ugli Fruit",
"another_value": 210,
"a_third_value": "Citrus"
},
{
"some_value": "Vanilla",
"another_value": 220,
"a_third_value": 1.2
},
{
"some_value": "Watermelon",
"another_value": 230,
"a_third_value": "Large"
},
{
"some_value": "Xigua",
"another_value": 240,
"a_third_value": 33
},
{
"some_value": "Yuzu",
"another_value": 250,
"a_third_value": "Sour"
},
{
"some_value": "Zucchini",
"another_value": 260,
"a_third_value": 6
},
{
"some_value": "Apricot",
"another_value": 270,
"a_third_value": "Orange"
},
{
"some_value": "Blackberry",
"another_value": 280,
"a_third_value": 15
},
{
"some_value": "Coconut",
"another_value": 290,
"a_third_value": "Tropical"
},
{
"some_value": "Dragonfruit",
"another_value": 300,
"a_third_value": 8.4
}
]
}
]
}
]
4 changes: 2 additions & 2 deletions static/schemas.json
Original file line number Diff line number Diff line change
Expand Up @@ -825,15 +825,15 @@
"type": "string"
},
"title": {
"description": "The job title/role/department of the individual within the school. Example: Social Studies Teacher",
"description": "The specific job title/role/department of the individual within the school. Example: Social Studies Teacher",
"type": "string"
},
"email": {
"description": "The email address of the individual",
"type": "string"
},
"phone_number": {
"description": "Phone number of the individual within the school. Do not include irrelevant text.",
"description": "The specific phone number of the individual within the school. Do not include irrelevant text. Do not use the main school phone number.",
"type": "string"
}
},
Expand Down
Loading

0 comments on commit 6e77f31

Please sign in to comment.