Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added pggrill workload. #2

Draft
wants to merge 30 commits into
base: svilen-rl-autovac
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7dd18bc
Added pggrill.py
mdashti Apr 2, 2024
05e04cb
Added .gitignore
mdashti Apr 2, 2024
cd6336f
Separate iibench benchmark code from RL code.
mdashti Apr 2, 2024
c8c5c6d
Added README and requirements.txt
mdashti Apr 2, 2024
c302abc
Updated README.md
mdashti Apr 2, 2024
1a821ff
Modified arg parsing.
mdashti Apr 3, 2024
2985ea6
Removed `pggrill.py`
mdashti Apr 3, 2024
1051b40
Revert "Removed `pggrill.py`"
mdashti Apr 3, 2024
ec686e7
Added defaults for model file names.
mdashti Apr 3, 2024
2e2c374
Merge branch 'moe-rl-autovac2' into moe-rl-autovac3
mdashti Apr 3, 2024
edf18f9
Moved benchmark back.
mdashti Apr 3, 2024
961f516
Merge branch 'moe-rl-autovac2' into moe-rl-autovac3
mdashti Apr 3, 2024
9b3a8b1
Moved `pggrill.py`
mdashti Apr 3, 2024
f3f9a56
Moved iibench driver.
mdashti Apr 3, 2024
26e8dda
Use a dict instead of parameters.
mdashti Apr 4, 2024
5709711
Moved iibench driver.
mdashti Apr 3, 2024
407d5dc
Use a dict instead of parameters.
mdashti Apr 4, 2024
4b55c48
Merge branch 'moe-rl-autovac2' into moe-rl-autovac3
mdashti Apr 4, 2024
ce92759
Refactoring.
mdashti Apr 4, 2024
24ba801
Refactoring for pggrill.
mdashti Apr 4, 2024
dee8a48
Fixed the remaining issues with pggrill.
mdashti Apr 4, 2024
83ce78e
Merged branch 'svilen-rl-autovac' into moe-rl-autovac3
mdashti Apr 4, 2024
4effb29
Merge branch 'svilen-rl-autovac' into moe-rl-autovac3
mdashti Apr 4, 2024
4f6a6db
Moved `pg_stat_reset`
mdashti Apr 5, 2024
4662a1a
Commented out env.
mdashti Apr 5, 2024
e6977c6
Renamed `pggrill.py` to `pggrill_driver.py`
mdashti Apr 5, 2024
0922fde
Added a workload driver selector.
mdashti Apr 5, 2024
5404b62
Moved agent_thread to a separate file.
mdashti Apr 5, 2024
9ef7733
Added some comments.
mdashti Apr 5, 2024
b79ea94
Added support for `done_flag` to `pggrill`.
mdashti Apr 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 34 additions & 19 deletions bench/ibench/autovac_driver.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import importlib
import os
import argparse
import sys
Expand All @@ -6,14 +7,13 @@
from learning.rl_glue import RLGlue
from learning.rl import Agent, default_network_arch

from workloads.iibench_driver import run_with_params

from tqdm.auto import tqdm

from executors.simulated_vacuum import SimulatedVacuum
from executors.pg_stat_and_vacuum import PGStatAndVacuum

def benchmark(resume_id, experiment_duration, model_type, model1_filename, model2_filename, instance_url, instance_user, instance_password, instance_dbname):
def benchmark(workload_driver, resume_id, experiment_duration, model_type, model1_filename, model2_filename, instance_url, instance_user, instance_password, instance_dbname):
run_with_params = workload_driver.run_with_params
id = 0
for initial_size in tqdm([10000, 100000, 1000000]):
for update_speed in tqdm([500, 1000, 2000, 4000, 8000, 16000, 32000, 64000]):
Expand All @@ -34,27 +34,28 @@ def benchmark(resume_id, experiment_duration, model_type, model1_filename, model
initial_size, update_speed, 5, experiment_duration, True, False, False,
model1_filename, True)

# Control with RL model #1
run_with_params(False, tag2, instance_url, instance_user, instance_password, instance_dbname,
initial_size, update_speed, 5, experiment_duration, True, False, False,
model2_filename, True)
# # Control with RL model #1
# if model2_filename and model2_filename != "":
# run_with_params(False, tag2, instance_url, instance_user, instance_password, instance_dbname,
# initial_size, update_speed, 5, experiment_duration, True, False, False,
# model2_filename, True)

# Control with PID
run_with_params(False, tag3, instance_url, instance_user, instance_password, instance_dbname,
initial_size, update_speed, 5, experiment_duration, True, True, False,
"", True)
# # Control with PID
# run_with_params(False, tag3, instance_url, instance_user, instance_password, instance_dbname,
# initial_size, update_speed, 5, experiment_duration, True, True, False,
# "", True)

# Control with default autovacuum
run_with_params(False, tag4, instance_url, instance_user, instance_password, instance_dbname,
initial_size, update_speed, 5, experiment_duration, False, False, False,
"", True)
# # Control with default autovacuum
# run_with_params(False, tag4, instance_url, instance_user, instance_password, instance_dbname,
# initial_size, update_speed, 5, experiment_duration, False, False, False,
# "", True)

gnuplot_cmd = ("gnuplot -e \"outfile='graph%s.png'; titlestr='Query latency graph (%s)'; filename1='%s_latencies.txt'; filename2='%s_latencies.txt'; filename3='%s_latencies.txt'; filename4='%s_latencies.txt'\" gnuplot_script.txt"
% (tag_suffix, tag_suffix, tag1, tag2, tag3, tag4))
print("Gnuplot command: ", gnuplot_cmd)
os.system(gnuplot_cmd)

def learn(resume_id, experiment_duration, model_type, model1_filename, model2_filename, instance_url, instance_user, instance_password, instance_dbname):
def learn(workload_driver, resume_id, experiment_duration, model_type, model1_filename, model2_filename, instance_url, instance_user, instance_password, instance_dbname):
agent_configs = {
'network_arch': default_network_arch,

Expand Down Expand Up @@ -91,7 +92,17 @@ def learn(resume_id, experiment_duration, model_type, model1_filename, model2_fi
'approx_bytes_per_tuple': 100,
'is_replay': is_replay,
'replay_filename_mask': 'replay_n%d.txt',
'state_history_length': 10
'state_history_length': 10,
'workload_driver': workload_driver,
# START pggrill only
'initial_size_range': [1000_000, 1000_000],
'update_speed_range': [100, 100_000],
'num_cols_range': [0, 0],
'num_indexes_range': [0, 0],
'num_partitions_range': [0, 0],
'updated_percentage_range': [1, 50],
'num_workers_range': [1, 50],
# END pggrill only
}

experiment_configs = {
Expand All @@ -107,6 +118,7 @@ def learn(resume_id, experiment_duration, model_type, model1_filename, model2_fi
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Run the AutoVacuum reinforcement learning driver.")
parser.add_argument('--cmd', type=str, choices=['benchmark', 'learn'], help='Command to execute (benchmark or learn)')
parser.add_argument('--workload-type', type=str, choices=['pggrill', 'iibench'], required=True, help='Type of the workload driver (pggrill or iibench)')
parser.add_argument('--max-episodes', type=int, default=100, help='Maximum number of episodes for the experiment')
parser.add_argument('--resume-id', type=int, default=0, help='Identifier to resume from a previous state')
parser.add_argument('--experiment-duration', type=int, default=120, help='Duration of the experiment in seconds')
Expand All @@ -121,6 +133,9 @@ def learn(resume_id, experiment_duration, model_type, model1_filename, model2_fi
args = parser.parse_args()

cmd = args.cmd

# Load the specified workload driver dynamically
workload_driver = importlib.import_module(f"workloads.{args.workload_type}_driver")

max_episodes = args.max_episodes
resume_id = args.resume_id
Expand All @@ -134,8 +149,8 @@ def learn(resume_id, experiment_duration, model_type, model1_filename, model2_fi
instance_dbname = args.instance_dbname

if cmd == "benchmark":
benchmark(resume_id, experiment_duration, model_type, model1_filename, model2_filename, instance_url, instance_user, instance_password, instance_dbname)
benchmark(workload_driver, resume_id, experiment_duration, model_type, model1_filename, model2_filename, instance_url, instance_user, instance_password, instance_dbname)
elif cmd == "learn":
learn(resume_id, experiment_duration, model_type, model1_filename, model2_filename, instance_url, instance_user, instance_password, instance_dbname)
learn(workload_driver, resume_id, experiment_duration, model_type, model1_filename, model2_filename, instance_url, instance_user, instance_password, instance_dbname)
else:
print("Invalid command")
19 changes: 13 additions & 6 deletions bench/ibench/executors/pg_stat_and_vacuum.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import time
import psycopg2
from workloads.iibench_driver import run_with_default_settings
from multiprocessing import Barrier, Process
from executors.vacuum_experiment import VacuumExperiment

Expand All @@ -11,11 +10,16 @@ def startExp(self, env_info):
self.db_host = env_info['db_host']
self.db_user = env_info['db_user']
self.db_pwd = env_info['db_pwd']
self.table_name = env_info['table_name']
self.workload_driver = env_info['workload_driver']

#print("Environment info (for PGStatAndVacuum):")
#for x in self.env_info:
# print ('\t', x, ':', self.env_info[x])
params = self.workload_driver.collectExperimentParams(self.env_info)
self.table_name = params['table_name']

# print("Environment info (for PGStatAndVacuum):")
# for x in self.env_info:
# print ('\t', x, ':', self.env_info[x])
# for x in params:
# print ('\t', x, ':', params[x])

self.is_replay = env_info['is_replay']
self.replay_filename = env_info['replay_filename_mask'] % env_info['experiment_id']
Expand All @@ -28,7 +32,7 @@ def startExp(self, env_info):
% (len(self.replay_buffer), self.replay_filename))
else:
barrier = Barrier(2)
self.workload_thread = Process(target=run_with_default_settings, args=(barrier, self.env_info))
self.workload_thread = Process(target=self.workload_driver.run_with_default_settings, args=(barrier, self.env_info))
self.workload_thread.start()
# We wait until the workload is initialized and ready to start
barrier.wait()
Expand All @@ -37,6 +41,9 @@ def startExp(self, env_info):
self.conn.set_session(autocommit=True)
self.cursor = self.conn.cursor()

print("Resetting stats...")
self.cursor.execute("SELECT pg_stat_reset()")

print("Disabling autovacuum...")
self.cursor.execute("alter table %s set ("
"autovacuum_enabled = off,"
Expand Down
6 changes: 4 additions & 2 deletions bench/ibench/executors/simulated_vacuum.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from workloads.iibench_driver import collectExperimentParams
from executors.vacuum_experiment import VacuumExperiment

class SimulatedVacuum(VacuumExperiment):
def startExp(self, env_info):
self.env_info = env_info
self.initial_size, self.update_speed = collectExperimentParams(self.env_info)
self.workload_driver = self.env_info['workload_driver']
params = self.workload_driver.collectExperimentParams(self.env_info)
self.initial_size = params['initial_size']
self.update_speed = params['update_speed']

#print("Environment info (for SimulatedVacuum):")
#for x in self.env_info:
Expand Down
Loading