Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] FEAT New HTTP Target #392

Closed
wants to merge 44 commits into from
Closed
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
e240b73
initial commit adding files and docs
jbolor21 Aug 29, 2024
1366454
Merge branch 'main' of https://github.com/Azure/PyRIT into pr-351
Sep 19, 2024
528e50f
minor change adding prompt placeholder to url
Sep 23, 2024
d9ddf10
adding comments
Sep 23, 2024
702acc4
addressing feedback
Sep 24, 2024
cabcb62
using net_utility instead of requests
Sep 24, 2024
1619795
updated notebook
Sep 24, 2024
3882c5b
minor formatting changes
Sep 24, 2024
8e94997
fixed http request and requests call
Sep 24, 2024
f2528f4
showing with bing image creator
Sep 24, 2024
e20b7f1
formatting
Sep 24, 2024
e1bc8e9
formatting py too
Sep 24, 2024
78acd70
rename target and remove extra param
Sep 24, 2024
9bfa2c1
addressing feedback no longer seperating http parts out
Sep 25, 2024
1e90154
adding in http version
Sep 25, 2024
92a2735
Merge branch 'main' of https://github.com/Azure/PyRIT into pr-352
Sep 25, 2024
78a6501
adding prompt placeholder variable
Sep 26, 2024
c5833da
added regex pattern logic
Sep 26, 2024
01d6dcc
fixed small bug
Sep 26, 2024
38a5ea5
adding better regex logic, fixing bugs, aoai example
Sep 26, 2024
a3fda4f
fixing url encoding for body
Sep 26, 2024
4d4668d
removing extra testing
Sep 26, 2024
bfb8368
remade py file from ipynb
Sep 26, 2024
bdf56da
Merge branch 'main' of https://github.com/Azure/PyRIT into pr-351
Sep 28, 2024
0fcf639
got parsing function working
Sep 29, 2024
88c13da
Merge branch 'main' of https://github.com/Azure/PyRIT into pr-351
Oct 1, 2024
cd244c9
prompt sending orchestrator working, fixed formatting for json
Oct 1, 2024
c73dd7d
fixing regex pattern
Oct 1, 2024
8e988b1
variable key pattern instead of hardcoded
Oct 2, 2024
0f991ab
addressing feedback
Oct 2, 2024
6de8fa1
formatting
Oct 2, 2024
d8866a4
Merge branch 'main' of https://github.com/Azure/PyRIT into pr-351
Oct 3, 2024
368b523
adding in hardcoded http parser
Oct 6, 2024
9910969
added factory
Oct 7, 2024
0c9cfa7
made new subdirectory, added unit tests, cleanup
Oct 8, 2024
149f914
refactor, cleanup, pre-commit
Oct 9, 2024
7cc5f32
cleanup:
Oct 9, 2024
e1a4c70
cleanup and comments
Oct 9, 2024
5ff9eae
removing extra file
Oct 9, 2024
d0fb9b6
merge conflict
Oct 9, 2024
d899557
merge conflict
Oct 9, 2024
8e26432
changed to use converters in notebook rather than converting in targe…
Oct 9, 2024
e7db3d6
formating
Oct 9, 2024
9ce9d8c
adding TLS flag and edit logic
Oct 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
836 changes: 836 additions & 0 deletions doc/code/targets/http_target.ipynb

Large diffs are not rendered by default.

111 changes: 111 additions & 0 deletions doc/code/targets/http_target.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.16.4
# kernelspec:
# display_name: pyrit2
# language: python
# name: python3
# ---

# # This notebook shows how to interact with the HTTP Target:

# As a simple example google search is used to show the interaction (this won't result in a successful search because of the anti-bot rules but shows how to use it in a simple case)
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved

# +
import os
import urllib.parse

from pyrit.models import PromptTemplate
from pyrit.orchestrator import PromptSendingOrchestrator
from pyrit.prompt_target import HTTP_Target
from pyrit.models import PromptRequestPiece


## Add the prompt you want to send to the URL
prompt = "apple"
url = "https://www.google.com/search?q={PROMPT}"
# Add the prompt to the body of the request

with HTTP_Target(http_request={}, url=url, body={}, url_encoding="url", body_encoding="+", method="GET") as target_llm:
request = PromptRequestPiece(
role="user",
original_value=prompt,
).to_prompt_request_response()

resp = await target_llm.send_prompt_async(prompt_request=request) # type: ignore
print(resp)
print
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved


# +
import os
import urllib.parse

from pyrit.models import PromptTemplate
from pyrit.orchestrator import PromptSendingOrchestrator
from pyrit.prompt_target import HTTP_Target
from pyrit.models import PromptRequestPiece
# -

# Bing Image Creator which does not have an API is harder to use
#
# The HTTP request to make needs to be captured and put here in the "http_req" variable (the values you need to get from DevTools or Burp include the Cookie)

# +
http_req = f"""
Host: www.bing.com
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
Origin: https://www.bing.com
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.120 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: navigate
Sec-Fetch-User: ?1
Sec-Fetch-Dest: document
Referer: https://www.bing.com/images/create?FORM=GENILP

"""

## Add the prompt you want to send to the URL
prompt = "apple"
url = "https://www.bing.com/images/create?q={PROMPT}&rt=4&FORM=GENCRE"

# Add the prompt to the body of the request

body = "q={PROMPT}&qs=ds"
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
response_var = None
with HTTP_Target(http_request=http_req, url=url, body=body, url_encoding="url", method="POST") as target_llm:
# Questions: do i need to call converter on prompt before calling target? ie url encode rather than handling in target itself?
nina-msft marked this conversation as resolved.
Show resolved Hide resolved
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
request = PromptRequestPiece(
role="user",
original_value=prompt,
).to_prompt_request_response()

resp = await target_llm.send_prompt_async(prompt_request=request) # type: ignore
response_var = resp



# +
from bs4 import BeautifulSoup
html_content = response_var.request_pieces[0].original_value
parsed_hmtl_soup = BeautifulSoup(html_content, 'html.parser')

print(parsed_hmtl_soup.prettify())

jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
#TODO: parse & turn this into a parsing function: as an example this is the image
# <div data-c="/images/create/async/results/1-66f2fad6d7834081a343ac05ae3c1784?q=apple&amp;IG=52AE84FF96F948909718523E5DB8AF89&amp;IID=images.as" data-mc="/images/create/async/mycreation?requestId=1-66f2fad6d7834081a343ac05ae3c1784" data-nfurl="" id="gir">


# +
# Just same thing using orchestrator
http_prompt_target = HTTP_Target(http_request=http_resp, url=url, body=body)
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved

with PromptSendingOrchestrator(prompt_target=http_prompt_target) as orchestrator:
response = await orchestrator.send_prompts_async(prompt_list=[prompt]) # type: ignore
print(response[0])
2 changes: 2 additions & 0 deletions pyrit/prompt_target/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pyrit.prompt_target.prompt_chat_target.ollama_chat_target import OllamaChatTarget
from pyrit.prompt_target.azure_openai_completion_target import AzureOpenAICompletionTarget
from pyrit.prompt_target.prompt_shield_target import PromptShieldTarget
from pyrit.prompt_target.http_target import HTTPTarget


__all__ = [
Expand All @@ -38,4 +39,5 @@
"limit_requests_per_minute",
"TextTarget",
"OllamaChatTarget",
"HTTPTarget"
]
129 changes: 129 additions & 0 deletions pyrit/prompt_target/http_target.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
from typing import Any, Union
import requests
from pyrit.prompt_target import PromptTarget
from pyrit.memory import MemoryInterface
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
from pyrit.models import construct_response_from_request, PromptRequestPiece, PromptRequestResponse
import urllib.parse

logger = logging.getLogger(__name__)


class HTTPTarget(PromptTarget):
"""
HTTP_Target is for endpoints that do not have an API and instead require HTTP request(s) to send a prompt
Parameters:
url (str): URL to send request to
http_request (str): the header parameters as a request (ie from Burp)
parse_function (function): function to parse HTTP response
body (str): HTTP request body
method (str): HTTP method (eg POST or GET)
memory : memory interface
url_encoding (str): if the prompt is included in the URL, this flag sets how to encode the prompt (ie URL encoding). Defaults to none
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
"""

def __init__(
self,
http_request: str = None,
parse_function: callable = None, #TODO: this would be where the parse function will go
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
memory: Union[MemoryInterface, None] = None,
url_encoding: str = None,
body_encoding: str = None
) -> None:
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved

super().__init__(memory=memory)
self.http_request = http_request
self.parse_function = parse_function
self.url_encoding = url_encoding,
self.body_encoding = body_encoding

async def send_prompt_async(self, *, prompt_request: PromptRequestResponse) -> PromptRequestResponse:
"""
Sends prompt to HTTP endpoint and returns the response
"""

self._validate_request(prompt_request=prompt_request)
request = prompt_request.request_pieces[0]

header_dict, http_body, url, http_method, http_vsn = self.parse_http_request()

#Make the actual HTTP request:

# Add Prompt into URL (if the URL takes it)
if "{PROMPT}" in url:
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
if self.url_encoding == "url": #TODO: get rid of & move to converters
prompt_url_safe = urllib.parse.quote(request.original_value)
self.url = url.replace("{PROMPT}", prompt_url_safe)
else:
self.url = url.replace("{PROMPT}", request.original_value)

# Add Prompt into request body (if the body takes it)
if "{PROMPT}" in http_body:
if self.url_encoding:
encoded_prompt = request.original_value.replace(" ", "+")
http_body.replace("{PROMPT}", encoded_prompt)

#TODO: include vsn here
response = requests.request(
url=url,
headers=header_dict,
data=http_body,
method=http_method,
allow_redirects=True # using Requests so we can leave this flag on, rather than httpx
)

response_entry = construct_response_from_request(request=request, response_text_pieces=[str(response.content)], response_type="text")
return response_entry


def parse_http_request(self):
"""
Parses the HTTP request string into a dictionary of headers
Returns:
headers_dict (dict): dictionary of all http header values
body (str): string with body data
"""

headers_dict = {}
if not self.http_request:
return {}, "", "", "", ""

body = ""

# Split the request into headers and body by finding the double newlines (\n\n)
request_parts = self.http_request.strip().split("\n\n", 1)

# Parse out the header components
header_lines = request_parts[0].strip().split("\n")
http_req_info_line = header_lines[0].split(" ") # get 1st line like POST /url_ending HTTP_VSN
header_lines = header_lines[1:] # rest of the raw request is the headers info

# Loop through each line and split into key-value pairs
for line in header_lines:
key, value = line.split(":", 1)
headers_dict[key.strip()] = value.strip()

if len(request_parts) > 1:
body = request_parts[1]
headers_dict["Content-Length"] = str(len(body))

# Capture info from 1st line of raw request
http_method = http_req_info_line[0]
url = "https://" + headers_dict["Host"] + http_req_info_line[1] #TODO add http vs https based on vsn
http_vsn = http_req_info_line[1]

return headers_dict, body, url, http_method, http_vsn


def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
request_pieces: list[PromptRequestPiece] = prompt_request.request_pieces

if len(request_pieces) != 1:
raise ValueError("This target only supports a single prompt request piece.")
if request_pieces[0].original_value_data_type != "text": #TODO: should this be text or http_request?
raise ValueError(
f"This target only supports text prompt input. Got: {type(request_pieces[0].original_value_data_type)}"
jbolor21 marked this conversation as resolved.
Show resolved Hide resolved
)
Loading