Azure · jbolor21 · Aug 29, 2024 · Sep 19, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/doc/code/targets/http_target.ipynb b/doc/code/targets/http_target.ipynb
diff --git a/doc/code/targets/http_target.py b/doc/code/targets/http_target.py
@@ -0,0 +1,111 @@
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.16.4
+#   kernelspec:
+#     display_name: pyrit2
+#     language: python
+#     name: python3
+# ---
+
+# # This notebook shows how to interact with the HTTP Target: 
+
+# As a simple example google search is used to show the interaction (this won't result in a successful search because of the anti-bot rules but shows how to use it in a simple case)
+
+# +
+import os
+import urllib.parse
+
+from pyrit.models import PromptTemplate
+from pyrit.orchestrator import PromptSendingOrchestrator
+from pyrit.prompt_target import HTTP_Target
+from pyrit.models import PromptRequestPiece
+
+
+## Add the prompt you want to send to the URL
+prompt = "apple"
+url = "https://www.google.com/search?q={PROMPT}"
+# Add the prompt to the body of the request
+
+with HTTP_Target(http_request={}, url=url, body={}, url_encoding="url", body_encoding="+", method="GET") as target_llm:
+    request = PromptRequestPiece(
+        role="user",
+        original_value=prompt,
+    ).to_prompt_request_response()
+
+    resp = await target_llm.send_prompt_async(prompt_request=request)  # type: ignore
+    print(resp)
+    print
+
+
+# +
+import os
+import urllib.parse
+
+from pyrit.models import PromptTemplate
+from pyrit.orchestrator import PromptSendingOrchestrator
+from pyrit.prompt_target import HTTP_Target
+from pyrit.models import PromptRequestPiece
+# -
+
+# Bing Image Creator which does not have an API is harder to use
+#
+# The HTTP request to make needs to be captured and put here in the "http_req" variable (the values you need to get from DevTools or Burp include the Cookie)
+
+# +
+http_req = f"""
+Host: www.bing.com
+Origin: https://www.bing.com
+Content-Type: application/x-www-form-urlencoded
+User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.120 Safari/537.36
+Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7
+Sec-Fetch-Site: same-origin
+Sec-Fetch-Mode: navigate
+Sec-Fetch-User: ?1
+Sec-Fetch-Dest: document
+Referer: https://www.bing.com/images/create?FORM=GENILP
+
+"""
+
+## Add the prompt you want to send to the URL
+prompt = "apple"
+url = "https://www.bing.com/images/create?q={PROMPT}&rt=4&FORM=GENCRE"
+
+# Add the prompt to the body of the request
+
+body = "q={PROMPT}&qs=ds"
+response_var = None
+with HTTP_Target(http_request=http_req, url=url, body=body, url_encoding="url", method="POST") as target_llm:
+    # Questions: do i need to call converter on prompt before calling target? ie url encode rather than handling in target itself?
+    request = PromptRequestPiece(
+        role="user",
+        original_value=prompt,
+    ).to_prompt_request_response()
+
+    resp = await target_llm.send_prompt_async(prompt_request=request)  # type: ignore
+    response_var = resp
+
+
+
+# +
+from bs4 import BeautifulSoup
+html_content = response_var.request_pieces[0].original_value
+parsed_hmtl_soup = BeautifulSoup(html_content, 'html.parser')
+
+print(parsed_hmtl_soup.prettify())
+
+#TODO: parse & turn this into a parsing function: as an example this is the image 
+# <div data-c="/images/create/async/results/1-66f2fad6d7834081a343ac05ae3c1784?q=apple&amp;IG=52AE84FF96F948909718523E5DB8AF89&amp;IID=images.as" data-mc="/images/create/async/mycreation?requestId=1-66f2fad6d7834081a343ac05ae3c1784" data-nfurl="" id="gir">
+
+
+# +
+# Just same thing using orchestrator
+http_prompt_target = HTTP_Target(http_request=http_resp, url=url, body=body)
+
+with PromptSendingOrchestrator(prompt_target=http_prompt_target) as orchestrator:
+    response = await orchestrator.send_prompts_async(prompt_list=[prompt])  # type: ignore
+    print(response[0])
diff --git a/pyrit/prompt_target/__init__.py b/pyrit/prompt_target/__init__.py
@@ -17,6 +17,7 @@
 from pyrit.prompt_target.prompt_chat_target.ollama_chat_target import OllamaChatTarget
 from pyrit.prompt_target.azure_openai_completion_target import AzureOpenAICompletionTarget
 from pyrit.prompt_target.prompt_shield_target import PromptShieldTarget
+from pyrit.prompt_target.http_target import HTTPTarget
 
 
 __all__ = [
@@ -38,4 +39,5 @@
     "limit_requests_per_minute",
     "TextTarget",
     "OllamaChatTarget",
+    "HTTPTarget"
 ]
diff --git a/pyrit/prompt_target/http_target.py b/pyrit/prompt_target/http_target.py
@@ -0,0 +1,129 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+from typing import Any, Union
+import requests
+from pyrit.prompt_target import PromptTarget
+from pyrit.memory import MemoryInterface
+from pyrit.models import construct_response_from_request, PromptRequestPiece, PromptRequestResponse
+import urllib.parse
+
+logger = logging.getLogger(__name__)
+
+
+class HTTPTarget(PromptTarget):
+    """
+    HTTP_Target is for endpoints that do not have an API and instead require HTTP request(s) to send a prompt
+    Parameters:
+        url (str): URL to send request to
+        http_request (str): the header parameters as a request (ie from Burp)
+        parse_function (function): function to parse HTTP response
+        body (str): HTTP request body
+        method (str): HTTP method (eg POST or GET)
+        memory : memory interface
+        url_encoding (str): if the prompt is included in the URL, this flag sets how to encode the prompt (ie URL encoding). Defaults to none
+    """
+
+    def __init__(
+        self,
+        http_request: str = None,
+        parse_function: callable = None, #TODO: this would be where the parse function will go
+        memory: Union[MemoryInterface, None] = None,
+        url_encoding: str = None,
+        body_encoding: str = None
+    ) -> None:
+
+        super().__init__(memory=memory)
+        self.http_request = http_request
+        self.parse_function = parse_function
+        self.url_encoding = url_encoding, 
+        self.body_encoding = body_encoding
+
+    async def send_prompt_async(self, *, prompt_request: PromptRequestResponse) -> PromptRequestResponse:
+        """
+        Sends prompt to HTTP endpoint and returns the response
+        """
+
+        self._validate_request(prompt_request=prompt_request)
+        request = prompt_request.request_pieces[0]
+
+        header_dict, http_body, url, http_method, http_vsn = self.parse_http_request()
+
+        #Make the actual HTTP request:
+
+        # Add Prompt into URL (if the URL takes it)
+        if "{PROMPT}" in url:
+            if self.url_encoding == "url": #TODO: get rid of & move to converters
+                prompt_url_safe = urllib.parse.quote(request.original_value)
+                self.url = url.replace("{PROMPT}", prompt_url_safe)
+            else: 
+                self.url = url.replace("{PROMPT}", request.original_value)
+
+        # Add Prompt into request body (if the body takes it)
+        if "{PROMPT}" in http_body:
+            if self.url_encoding:
+                encoded_prompt = request.original_value.replace(" ", "+")
+                http_body.replace("{PROMPT}", encoded_prompt)
+
+        #TODO: include vsn here
+        response = requests.request(
+            url=url,
+            headers=header_dict,
+            data=http_body, 
+            method=http_method,
+            allow_redirects=True # using Requests so we can leave this flag on, rather than httpx
+        )
+
+        response_entry = construct_response_from_request(request=request, response_text_pieces=[str(response.content)], response_type="text")
+        return response_entry
+
+
+    def parse_http_request(self):
+        """
+        Parses the HTTP request string into a dictionary of headers
+        Returns:
+            headers_dict (dict): dictionary of all http header values
+            body (str): string with body data
+        """
+
+        headers_dict = {}
+        if not self.http_request:
+            return {}, "", "", "", ""
+
+        body = ""
+
+        # Split the request into headers and body by finding the double newlines (\n\n)
+        request_parts = self.http_request.strip().split("\n\n", 1)
+
+        # Parse out the header components
+        header_lines = request_parts[0].strip().split("\n")
+        http_req_info_line = header_lines[0].split(" ") # get 1st line like POST /url_ending HTTP_VSN
+        header_lines = header_lines[1:] # rest of the raw request is the headers info
+
+        # Loop through each line and split into key-value pairs
+        for line in header_lines:
+            key, value = line.split(":", 1)
+            headers_dict[key.strip()] = value.strip()
+
+        if len(request_parts) > 1:
+            body = request_parts[1]
+            headers_dict["Content-Length"] = str(len(body))
+
+        # Capture info from 1st line of raw request
+        http_method = http_req_info_line[0]
+        url = "https://" + headers_dict["Host"] + http_req_info_line[1] #TODO add http vs https based on vsn
+        http_vsn = http_req_info_line[1]
+
+        return headers_dict, body, url, http_method, http_vsn
+
+
+    def _validate_request(self, *, prompt_request: PromptRequestResponse) -> None:
+        request_pieces: list[PromptRequestPiece] = prompt_request.request_pieces
+
+        if len(request_pieces) != 1:
+            raise ValueError("This target only supports a single prompt request piece.")
+        if request_pieces[0].original_value_data_type != "text": #TODO: should this be text or http_request?
+            raise ValueError(
+                f"This target only supports text prompt input. Got: {type(request_pieces[0].original_value_data_type)}"
+            )