Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for duplicates immediately before creating issue #41

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 50 additions & 20 deletions scripts/fuzzer_helper.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import json
import requests
import sys
import os
import subprocess
import reduce_sql
import fuzzer_helper
import urllib.parse
import re


USERNAME = 'fuzzerofducks'
Expand Down Expand Up @@ -42,6 +41,12 @@ def issue_url():
return 'https://api.github.com/repos/%s/%s/issues' % (REPO_OWNER, REPO_NAME)


def issues_by_title_url(issue_title):
base_url = "https://api.github.com/search/issues"
query_string = urllib.parse.quote(f"repo:{REPO_OWNER}/{REPO_NAME} {issue_title} in:title is:open")
return f"{base_url}?q={query_string}"


def get_token():
if 'FUZZEROFDUCKSKEY' not in os.environ:
print("FUZZEROFDUCKSKEY not found in environment variables")
Expand Down Expand Up @@ -80,7 +85,7 @@ def make_github_issue(title, body):
raise Exception("Failed to create issue")


def get_github_issues(page: int) -> list[dict]:
def get_github_issues_per_page(page: int) -> list[dict]:
session = create_session()
url = issue_url() + '?per_page=100&page=' + str(page)
r = session.get(url)
Expand All @@ -91,6 +96,18 @@ def get_github_issues(page: int) -> list[dict]:
return json.loads(r.content.decode('utf8'))


def get_github_issues_by_title(issue_title) -> list[dict]:
session = create_session()
url = issues_by_title_url(issue_title)
r = session.get(url)
if r.status_code != 200:
print('Failed to query the issues')
print('Response:', r.content.decode('utf8'))
raise Exception("Failed to query the issues")
issue_list = r.json().get("items", [])
return issue_list


def close_github_issue(number):
session = create_session()
url = issue_url() + '/' + str(number)
Expand Down Expand Up @@ -150,7 +167,7 @@ def run_shell_command_batch(shell, cmd):
return (stdout, stderr, res.returncode, False)


def test_reproducibility(shell, issue, current_errors, perform_check):
def is_reproducible_issue(shell, issue) -> bool:
extract = extract_issue(issue['body'], issue['number'])
labels = issue['labels']
label_timeout = False
Expand All @@ -161,33 +178,39 @@ def test_reproducibility(shell, issue, current_errors, perform_check):
# failed extract: leave the issue as-is
return True
sql = extract[0] + ';'
error = extract[1]
if perform_check is True and label_timeout is False:
if label_timeout is False:
print(f"Checking issue {issue['number']}...")
(stdout, stderr, returncode, is_timeout) = run_shell_command_batch(shell, sql)
if is_timeout:
label_github_issue(issue['number'], 'timeout')
else:
if returncode == 0:
return False
if not fuzzer_helper.is_internal_error(stderr):
if not is_internal_error(stderr):
return False
# issue is still reproducible
current_errors[error] = issue
return True


def extract_github_issues(shell, perform_check) -> dict[str, dict]:
current_errors: dict[str, dict] = dict()
def get_github_issues_list() -> list[dict]:
issues: list[dict] = []
for p in range(1, 10):
issues: list[dict] = get_github_issues(p)
for issue in issues:
# check if the github issue is still reproducible
if not test_reproducibility(shell, issue, current_errors, perform_check):
# the issue appears to be fixed - close the issue
print(f"Failed to reproduce issue {issue['number']}, closing...")
close_github_issue(int(issue['number']))
return current_errors
issues = issues + get_github_issues_per_page(p)
return issues


# closes non-reproducible issues; returns reproducible issues
def close_non_reproducible_issues(shell) -> dict[str, dict]:
reproducible_issues: dict[str, dict] = {}
for issue in get_github_issues_list():
if not is_reproducible_issue(shell, issue):
# the issue appears to be fixed - close the issue
print(f"Failed to reproduce issue {issue['number']}, closing...")
close_github_issue(int(issue['number']))
else:
reproducible_issues[issue['title']] = issue
# retun open issues as dict, so they can be searched by title, which is the exception message without trace
return reproducible_issues


def file_issue(cmd, exception_msg, stacktrace, fuzzer, seed, hash):
Expand Down Expand Up @@ -220,7 +243,14 @@ def is_internal_error(error):
return False


def sanitize_stacktrace(err):
err = re.sub(r'../duckdb\((.*)\)', r'\1', err)
err = re.sub(r'[\+\[]?0x[0-9a-fA-F]+\]?', '', err)
err = re.sub(r'/lib/x86_64-linux-gnu/libc.so(.*)\n', '', err)
return err.strip()


def split_exception_trace(exception_msg_full: str) -> tuple[str, str]:
# exception message does not contain newline, so split after first newline
exception_msg, _, stack_trace = exception_msg_full.partition('\n')
return (exception_msg.strip(), stack_trace.strip())
return (exception_msg.strip(), sanitize_stacktrace(stack_trace))
1 change: 0 additions & 1 deletion scripts/reduce_sql.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import re
import subprocess
import time
import os
Expand Down
36 changes: 18 additions & 18 deletions scripts/run_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,26 @@ def run_shell_command(cmd):
return (stdout, stderr, res.returncode)


# first get a list of all github issues, and check if we can still reproduce them
def is_known_issue(exception_msg):
existing_issues = fuzzer_helper.get_github_issues_by_title(exception_msg)
if existing_issues:
print("Skip filing duplicate issue")
print(
"Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/"
+ str(existing_issues[0]['number'])
)
return True
else:
return False


if no_git_checks:
current_errors: dict[str, dict] = dict()
else:
current_errors: dict[str, dict] = fuzzer_helper.extract_github_issues(shell, perform_checks)
# ==========================================
# START OF SCRIPT
# ==========================================

# Don't go on and fuzz if perform checks = true
if perform_checks:
fuzzer_helper.close_non_reproducible_issues(shell)
exit(0)

last_query_log_file = 'sqlsmith.log'
Expand Down Expand Up @@ -190,12 +201,7 @@ def run_shell_command(cmd):
print("=========================================")

# check if this is a duplicate issue
if exception_msg in current_errors:
print("Skip filing duplicate issue")
print(
"Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/"
+ str(current_errors[exception_msg]['number'])
)
if is_known_issue(exception_msg):
exit(0)

print("=========================================")
Expand All @@ -212,15 +218,9 @@ def run_shell_command(cmd):
exception_msg, stacktrace = fuzzer_helper.split_exception_trace(stderr)

# check if this is a duplicate issue
if exception_msg in current_errors:
print("Skip filing duplicate issue")
print(
"Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/"
+ str(current_errors[exception_msg]['number'])
)
if is_known_issue(exception_msg):
exit(0)


print(f"================MARKER====================")
print(f"After reducing: the below sql causes an internal error \n `{cmd}`")
print(f"{exception_msg}")
Expand Down
12 changes: 6 additions & 6 deletions scripts/run_sqlancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,22 +129,22 @@
print(reduced_test_case)

(stdout, stderr, returncode) = reduce_sql.run_shell_command(shell, reduced_test_case)
error_msg, _ = fuzzer_helper.split_exception_trace(stderr)
error_msg, trace = fuzzer_helper.split_exception_trace(stderr)

print('----------------------------------------------')
print("Fetching github issues")
print('----------------------------------------------')

# first get a dictinary of all github issues, and check if we can still reproduce them
current_errors = fuzzer_helper.extract_github_issues(shell)
# get a dictinary with all open github issues (close the non-reproducible ones)
open_issues = fuzzer_helper.close_non_reproducible_issues(shell)

# check if this is a duplicate issue
if error_msg in current_errors:
if error_msg in open_issues:
print("Skip filing duplicate issue")
print(
"Issue already exists: https://github.com/duckdb/duckdb-fuzzer/issues/"
+ str(current_errors[error_msg]['number'])
+ str(open_issues[error_msg]['number'])
)
exit(0)

fuzzer_helper.file_issue(reduced_test_case, error_msg, "SQLancer", seed, git_hash)
fuzzer_helper.file_issue(reduced_test_case, error_msg, trace, "SQLancer", seed, git_hash)
Loading