Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored profiling, adjusted lint and updated pytest default logging #111

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ jobs:
- name: flake8
run: |
poetry run flake8 focus_validator/
- name: bandit
run: |
poetry run bandit -r focus_converter/ -ll
64 changes: 64 additions & 0 deletions focus_validator/utils/profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import cProfile
import csv
import functools
import io
import pstats


class Profiler:
def __init__(self, csv_format=False):
self.csv_format = csv_format

def __call__(self, func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
# Wrap and execute profile
profiler = cProfile.Profile()
profiler.enable()
result = func(*args, **kwargs)
profiler.disable()
profiling_result = pstats.Stats(profiler)

generate_csv_file(args, profiling_result)

generate_console_output(profiler)

return result

def generate_console_output(profiler):
s = io.StringIO()
sortby = "cumulative"
ps = pstats.Stats(profiler, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

def generate_csv_file(args, profiling_result):
if self.csv_format:
# Determine the filename based on class and method name
csv_filename = generate_file_name(args)
with open(csv_filename, "w", newline="") as f:
w = csv.writer(f)
# Write the headers
headers = [
"ncalls",
"tottime",
"percall",
"cumtime",
"percall",
"filename:lineno(function)",
]
w.writerow(headers)

# Write each row
for row in profiling_result.stats.items():
func_name, (cc, nc, tt, ct, callers) = row
w.writerow([nc, tt, tt / nc, ct, ct / cc, func_name])

def generate_file_name(args):
class_name = args[0].__class__.__name__ if args else "global"
method_name = func.__name__
base_filename = f"{class_name}_{method_name}_profile"
csv_filename = f"{base_filename}.csv"
return csv_filename

return wrapper
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ requests = "*"
pandera = "^0.16"
polars = "^0.20.3"
ddt = "^1.7.1"
bandit = "^1.7.6"

[tool.poetry.group.dev.dependencies]
black = {extras = ["d"], version = "^23.7.0"}
Expand All @@ -42,3 +43,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
focus-validator = "focus_validator.main:main"

[tool.pytest.ini_options]
log_cli = true
log_cli_level = "INFO"
60 changes: 17 additions & 43 deletions tests/test_performance_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import time
import unittest
from ddt import ddt, data, unpack
from focus_validator.utils.profiler import Profiler

from tests.samples.csv_random_data_generate_at_scale import generate_and_write_fake_focuses
from focus_validator.validator import Validator
Expand All @@ -17,19 +18,7 @@
@ddt
class TestPerformanceProfiler(unittest.TestCase):

def profile_to_csv(self, profiling_result, csv_file):
with open(csv_file, 'w', newline='') as f:
w = csv.writer(f)
# Write the headers
headers = ['ncalls', 'tottime', 'percall', 'cumtime', 'percall', 'filename:lineno(function)']
w.writerow(headers)

# Write each row
for row in profiling_result.stats.items():
func_name, (cc, nc, tt, ct, callers) = row
w.writerow([nc, tt, tt/nc, ct, ct/cc, func_name])

def execute_profiler(self, file_name, performance_threshold):
def measure_validator(self, file_name, performance_threshold):
# Set the environment variable for logging level
env = os.environ.copy()
env["LOG_LEVEL"] = "INFO"
Expand All @@ -48,44 +37,29 @@ def execute_profiler(self, file_name, performance_threshold):
column_namespace=None,
)

# Set up the profiler
profiler = cProfile.Profile()
profiler.enable()

# The original performance testing code
# The measure execution
start_time = time.time()
validator.validate()
self.run_and_profile_validator(validator)
end_time = time.time()
duration = end_time - start_time
logging.info(f"File: {file_name} Duration: {duration} seconds")

# Stop the profiler
profiler.disable()

# Save profiling data to a file
profiling_result = pstats.Stats(profiler)
profile_file_name = "profiling_data_" + file_name
self.profile_to_csv(profiling_result, profile_file_name)

# Optionally print out profiling report to the console
s = io.StringIO()
sortby = 'cumulative' # Can be changed to 'time', 'calls', etc.
ps = pstats.Stats(profiler, stream=s).sort_stats(sortby)
ps.print_stats(10)
logging.info(s.getvalue())

#Execution time check
self.assertLess(duration, performance_threshold, f"Performance test exceeded threshold. Duration: {duration} seconds")

@Profiler(csv_format=True)
def run_and_profile_validator(self, validator):
validator.validate()

@data(
# ("fake_focuses500000.csv", 60.0, 500000, "validate_500000_records"),
# ("fake_focuses500000.csv", 110.0, 500000, "validate_500000_records"),
# ("fake_focuses250000.csv", 60.0, 250000, "validate_250000_records"),
# ("fake_focuses100000.csv", 30.0, 100000, "validate_100000_records"),
# ("fake_focuses50000.csv", 15.0, 50000, "validate_50000_records"),
# ("fake_focuses10000.csv", 7.0, 10000, "validate_10000_records"),
# ("fake_focuses5000.csv", 3.0, 5000, "validate_5000_records"),
("fake_focuses2000.csv", 3.0, 2000, "validate_2000_records"),
("fake_focuses2000.csv", 3.0, 1000, "validate_1000_records")
# ("fake_focuses100000.csv", 20.0, 100000, "validate_100000_records"),
# ("fake_focuses50000.csv", 11.0, 50000, "validate_50000_records"),
# ("fake_focuses10000.csv", 2.5, 10000, "validate_10000_records"),
# ("fake_focuses5000.csv", 1.8, 5000, "validate_5000_records"),
("fake_focuses2000.csv", 1.0, 2000, "validate_2000_records"),
("fake_focuses2000.csv", 1.0, 1000, "validate_1000_records")
)
@unpack
def test_param_validator_performance(self, file_name, performance_threshold, number_of_records, case_id):
Expand All @@ -94,9 +68,9 @@ def test_param_validator_performance(self, file_name, performance_threshold, num
env = os.environ.copy()
env["LOG_LEVEL"] = "INFO"

logging.info("Generating file with {number_of_records} records.")
logging.info(f"Generating file with {number_of_records} records.")
generate_and_write_fake_focuses(file_name, number_of_records)
self.execute_profiler(str(file_name), performance_threshold)
self.measure_validator(str(file_name), performance_threshold)

logging.info("Cleaning up test file.")
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
Expand Down
102 changes: 22 additions & 80 deletions tests/test_progressive_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,76 +3,17 @@
import subprocess
import time
import unittest

from ddt import ddt, data, unpack
from focus_validator.utils.profiler import Profiler
from tests.samples.csv_random_data_generate_at_scale import generate_and_write_fake_focuses

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [%(funcName)s] - %(message)s')


@ddt
class TestProgressivePerformance(unittest.TestCase):
@classmethod
def setUpClass(cls):
#Generate 1000 fake focuses to a CSV file
cls.csv_filename_1000 = 'fake_focuses1000.csv'
cls.csv_filename_10000 = 'fake_focuses10000.csv'
cls.csv_filename_50000 = 'fake_focuses50000.csv'
cls.csv_filename_100000 = 'fake_focuses100000.csv'
cls.csv_filename_250000 = 'fake_focuses250000.csv'
cls.csv_filename_500000 = 'fake_focuses500000.csv'

logging.info("Generating file with 1,000 records")
cls.generate_test_file(str(cls.csv_filename_1000), 1000)

# logging.info("Generating file with 10,0000 records")
# cls.generate_test_file(str(cls.csv_filename_10000), 10000)

# logging.info("Generating file with 50,0000 records")
# cls.generate_test_file(str(cls.csv_filename_50000), 50000)

# logging.info("Generating file with 100,0000 records")
# cls.generate_test_file(str(cls.csv_filename_100000), 100000)

# logging.info("Generating file with 250,0000 records")
# cls.generate_test_file(str(cls.csv_filename_250000), 250000)

# logging.info("Generating file with 500,0000 records")
# cls.generate_test_file(str(cls.csv_filename_500000), 500000)

@classmethod
def tearDownClass(cls):
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

if os.path.exists(os.path.join(base_dir, 'fake_focuses.csv')):
os.remove(os.path.join(base_dir, 'fake_focuses.csv'))

if os.path.exists(os.path.join(base_dir, str(cls.csv_filename_1000))):
os.remove(os.path.join(base_dir, str(cls.csv_filename_1000)))

if os.path.exists(os.path.join(base_dir, str(cls.csv_filename_10000))):
os.remove(os.path.join(base_dir, str(cls.csv_filename_10000)))

if os.path.exists(os.path.join(base_dir, str(cls.csv_filename_50000))):
os.remove(os.path.join(base_dir, str(cls.csv_filename_50000)))

if os.path.exists(os.path.join(base_dir, str(cls.csv_filename_100000))):
os.remove(os.path.join(base_dir, str(cls.csv_filename_100000)))

if os.path.exists(os.path.join(base_dir, str(cls.csv_filename_250000))):
os.remove(os.path.join(base_dir, str(cls.csv_filename_250000)))

if os.path.exists(os.path.join(base_dir, str(cls.csv_filename_500000))):
os.remove(os.path.join(base_dir, str(cls.csv_filename_500000)))

@classmethod
def generate_test_file(cls, csv_filename, number_of_records):
#Generate fake focuses to a CSV file
# fake_focuses = generate_fake_focus(number_of_records)

# write_fake_focuses_to_csv(fake_focuses, csv_filename)
generate_and_write_fake_focuses(csv_filename, number_of_records)


@Profiler(csv_format=True)
def run_validator(self, args):
# Get the current directory of this test file
test_dir = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -86,25 +27,26 @@ def run_validator(self, args):
command = ['poetry', 'run', 'python', os.path.join(app_dir, 'main.py')] + args
return subprocess.run(command, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)

def test_1000_record_csv_performance(self):
self.execute_performance(str(self.csv_filename_1000), 25.0)
@data(
# ("fake_focuses500000.csv", 115.0, 500000, "validate_500000_records"),
# ("fake_focuses250000.csv", 65.0, 250000, "validate_250000_records"),
# ("fake_focuses100000.csv", 25.0, 100000, "validate_100000_records"),
# ("fake_focuses50000.csv", 13.0, 50000, "validate_50000_records"),
# ("fake_focuses10000.csv", 5.0, 10000, "validate_10000_records"),
# ("fake_focuses5000.csv", 3.5, 5000, "validate_5000_records"),
("fake_focuses2000.csv", 2.5, 2000, "validate_2000_records"),
("fake_focuses2000.csv", 2.6, 1000, "validate_1000_records")
)
@unpack
def test_param_main_performance(self, file_name, performance_threshold, number_of_records, case_id):
with self.subTest(case_id=case_id):
generate_and_write_fake_focuses(file_name, number_of_records)
self.execute_performance(file_name, performance_threshold)
if os.path.exists(file_name):
os.remove(file_name)

# def test_10000_record_csv_performance(self):
# self.execute_performance(str(self.csv_filename_10000), 25.0)

# def test_50000_record_csv_performance(self):
# self.execute_performance(str(self.csv_filename_50000), 150.0)

# def test_100000_record_csv_performance(self):
# self.execute_performance(str(self.csv_filename_100000), 300.0)

# def test_250000_record_csv_performance(self):
# self.execute_performance(str(self.csv_filename_250000), 300.0)

# def test_500000_record_csv_performance(self):
# self.execute_performance(str(self.csv_filename_500000), 300.0)

def execute_performance(self, file_name, performance_threshold):

# Get the current directory of this test file
test_dir = os.path.dirname(os.path.abspath(__file__))

Expand Down
Loading