Skip to content

Commit

Permalink
Merge pull request #9 from prompt-security/suggested_fixes_1
Browse files Browse the repository at this point in the history
Some textual and stylistic fixes followin user reports
  • Loading branch information
vitaly-ps authored Apr 15, 2024
2 parents 44c5bbc + 0c68c80 commit a16fa27
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 17 deletions.
6 changes: 3 additions & 3 deletions ps_fuzz/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ def get_attributes(self):
def print_as_table(self):
attributes = self.get_attributes()
print_table(
title = "Configuration",
title = "Current configuration",
headers = ["Option", "Value"],
data = [[key, value] for key, value in attributes.items() if key != "system_prompt"] # print all except the system prompt
)
print(f"{colorama.Style.BRIGHT}System prompt:{colorama.Style.RESET_ALL}")
print(f"{colorama.Style.BRIGHT}Current system prompt:{colorama.Style.RESET_ALL}")
#print(f"{colorama.Style.DIM}{wrap_text(self.system_prompt, width=70)}{colorama.Style.RESET_ALL}")
print(f"{colorama.Style.DIM}{self.system_prompt}{colorama.Style.RESET_ALL}")

Expand Down Expand Up @@ -180,7 +180,7 @@ def parse_cmdline_args():
parser.add_argument('-n', '--num-attempts', type=int, default=None, help="Number of different attack prompts")
parser.add_argument('-t', '--num-threads', type=int, default=None, help="Number of worker threads")
parser.add_argument('-a', '--attack-temperature', type=float, default=None, help="Temperature for attack model")
parser.add_argument('-d', '--debug-level', type=int, default=None, help="Debug level")
parser.add_argument('-d', '--debug-level', type=int, default=None, help="Debug level (0-2)")
parser.add_argument("-b", '--batch', action='store_true', help="Run the fuzzer in unattended (batch) mode, bypassing the interactive steps")
parser.add_argument('system_prompt_file', type=str, nargs='?', default=None, help="Filename containing the system prompt")
return parser.parse_args()
Expand Down
6 changes: 3 additions & 3 deletions ps_fuzz/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def main():
# Run interactive shell that allows to change configuration or run some tasks
if args.batch:
run_fuzzer(app_config)
sys.exit(0)

interactive_shell(app_config)
else:
interactive_shell(app_config)
print(f"{BRIGHT}{colorama.Fore.CYAN}Thank you for trying out the Prompt Security Fuzzer!{RESET}")

if __name__ == "__main__":
main()
39 changes: 28 additions & 11 deletions ps_fuzz/prompt_injection_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,13 @@ def fuzz_prompt_injections(client_config: ClientConfig, attack_config: AttackCon
title = "Test results",
headers = [
"",
"Test",
"Attack Type",
"Broken",
"Resilient",
"Errors",
"Strength",
],
data = [
data = sorted([
[
ERROR if test.status.error_count > 0 else RESILIENT if isResilient(test.status) else VULNERABLE,
f"{test.test_name + ' ':.<{50}}",
Expand All @@ -111,7 +111,7 @@ def fuzz_prompt_injections(client_config: ClientConfig, attack_config: AttackCon
simpleProgressBar(test.status.resilient_count, test.status.total_count, GREEN if isResilient(test.status) else RED),
]
for test in tests
],
], key=lambda x: x[1]),
footer_row = [
ERROR if all(test.status.error_count > 0 for test in tests) else RESILIENT if all(isResilient(test.status) for test in tests) else VULNERABLE,
f"{'Total (# tests): ':.<50}",
Expand All @@ -129,7 +129,10 @@ def fuzz_prompt_injections(client_config: ClientConfig, attack_config: AttackCon
resilient_tests_count = sum(isResilient(test.status) for test in tests)
total_tests_count = len(tests)
resilient_tests_percentage = resilient_tests_count / total_tests_count * 100 if total_tests_count > 0 else 0
print(f"Your system prompt was resilient in {int(resilient_tests_percentage)}% ({resilient_tests_count} out of total {total_tests_count}) tests.")
print(f"Your system prompt passed {int(resilient_tests_percentage)}% ({resilient_tests_count} out of {total_tests_count}) of attack simulations.")
print()
print(f"To learn about the various attack types, please consult the help section and the Prompt Security Fuzzer GitHub README.")
print(f"You can also get a list of all available attack types by running the command '{BRIGHT}prompt-security-fuzzer --list-attacks{RESET}'.")

# Print detailed test progress logs (TODO: select only some relevant representative entries and output to a "report" file, which is different from a debug .log file!)
"""
Expand All @@ -146,18 +149,32 @@ def run_interactive_chat(app_config: AppConfig):
# Print current app configuration
app_config.print_as_table()
target_system_prompt = app_config.system_prompt
target_client = ClientLangChain(app_config.target_provider, model=app_config.target_model, temperature=0)
interactive_chat(client=target_client, system_prompts=[target_system_prompt])
try:
target_client = ClientLangChain(app_config.target_provider, model=app_config.target_model, temperature=0)
interactive_chat(client=target_client, system_prompts=[target_system_prompt])
except ModuleNotFoundError as e:
logger.warning(f"Error accessing the Target LLM provider {app_config.target_provider} with model '{app_config.target_model}': {colorama.Fore.RED}{e}{colorama.Style.RESET_ALL}")
return

def run_fuzzer(app_config: AppConfig):
# Print current app configuration
app_config.print_as_table()
target_system_prompt = app_config.system_prompt
target_client = ClientLangChain(app_config.target_provider, model=app_config.target_model, temperature=0)
try:
target_client = ClientLangChain(app_config.target_provider, model=app_config.target_model, temperature=0)
except ModuleNotFoundError as e:
logger.warning(f"Error accessing the Target LLM provider {app_config.target_provider} with model '{app_config.target_model}': {colorama.Fore.RED}{e}{colorama.Style.RESET_ALL}")
return
client_config = ClientConfig(target_client, [target_system_prompt])
attack_config = AttackConfig(
attack_client = ClientLangChain(app_config.attack_provider, model=app_config.attack_model, temperature=app_config.attack_temperature),
attack_prompts_count = app_config.num_attempts
)

try:
attack_config = AttackConfig(
attack_client = ClientLangChain(app_config.attack_provider, model=app_config.attack_model, temperature=app_config.attack_temperature),
attack_prompts_count = app_config.num_attempts
)
except ModuleNotFoundError as e:
logger.warning(f"Error accessing the Attack LLM provider {app_config.attack_provider} with model '{app_config.attack_model}': {colorama.Fore.RED}{e}{colorama.Style.RESET_ALL}")
return

# Run the fuzzer
fuzz_prompt_injections(client_config, attack_config, threads_count=app_config.num_threads)

0 comments on commit a16fa27

Please sign in to comment.