diff --git a/collection/collect.py b/collection/collect.py index a47f149..b238bc3 100644 --- a/collection/collect.py +++ b/collection/collect.py @@ -13,7 +13,7 @@ MAX_STARS = None # File to load/save the data -FILE = '../docs/projects.xlsx' +FILE = '../resources/projects.xlsx' def load(): diff --git a/collection/download.py b/collection/download.py index eae4103..fcaa99a 100644 --- a/collection/download.py +++ b/collection/download.py @@ -4,7 +4,7 @@ import pandas as pd # File to load the data with repositories -REPO_FILE = '../docs/annotated.xlsx' +REPO_FILE = '../resources/annotated.xlsx' # Dir to clone/update repositories REPO_DIR = os.path.abspath('../repos') diff --git a/collection/reset.py b/collection/reset.py index aaa0ca8..865ee81 100644 --- a/collection/reset.py +++ b/collection/reset.py @@ -4,7 +4,7 @@ import pandas as pd # File to load the data with repositories -REPO_FILE = '../docs/annotated.xlsx' +REPO_FILE = '../resources/annotated.xlsx' # Dir to clone/update repositories REPO_DIR = os.path.abspath('../repos') diff --git a/extraction/extract-db.py b/extraction/extract-db.py index 63abd83..f22f573 100644 --- a/extraction/extract-db.py +++ b/extraction/extract-db.py @@ -4,7 +4,7 @@ import pandas as pd # File to load the data with repositories -REPO_FILE = '../docs/annotated.xlsx' +REPO_FILE = '../resources/annotated.xlsx' # Dir to clone/update repositories REPO_DIR = os.path.abspath('../repos') diff --git a/docs/annotated.xlsx b/resources/annotated.xlsx similarity index 100% rename from docs/annotated.xlsx rename to resources/annotated.xlsx diff --git a/resources/create-database.sql b/resources/create-database.sql new file mode 100644 index 0000000..96e257a --- /dev/null +++ b/resources/create-database.sql @@ -0,0 +1,55 @@ +CREATE DATABASE db-mining; + +CREATE TABLE project ( + project_id INTEGER PRIMARY KEY AUTOINCREMENT, + owner TEXT, + name TEXT, + language TEXT, + domain TEXT +); + +CREATE TABLE project_version ( + version_id INTEGER PRIMARY KEY AUTOINCREMENT, + version TEXT, + last BOOLEAN, + project_id INTEGER, + FOREIGN KEY (project_id) REFERENCES project (project_id) ON DELETE RESTRICT +); + +CREATE TABLE database_type ( + type_id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT +); + + +CREATE TABLE database ( + database_id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT, + type_id INTEGER, + FOREIGN KEY (type_id) REFERENCES database_type (type_id) ON DELETE RESTRICT +); + +CREATE TABLE project_database ( + project_id INTEGER NOT NULL, + database_id INTEGER NOT NULL, + PRIMARY KEY (project_id, database_id), + FOREIGN KEY (project_id) REFERENCES project (project_id) ON DELETE RESTRICT, + FOREIGN KEY (database_id) REFERENCES database (database_id) ON DELETE RESTRICT +); + +CREATE TABLE strategy ( + strategy_id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT, + name TEXT +); + +CREATE TABLE project_strategy ( + project_id INTEGER NOT NULL, + strategy_id INTEGER NOT NULL, + PRIMARY KEY (project_id, strategy_id), + FOREIGN KEY (project_id) REFERENCES project (project_id) ON DELETE RESTRICT, + FOREIGN KEY (strategy_id) REFERENCES strategy (strategy_id) ON DELETE RESTRICT +); + + + diff --git a/resources/db-mining.db b/resources/db-mining.db new file mode 100644 index 0000000..f329ce5 Binary files /dev/null and b/resources/db-mining.db differ diff --git a/docs/filtered.xlsx b/resources/filtered.xlsx similarity index 100% rename from docs/filtered.xlsx rename to resources/filtered.xlsx diff --git a/docs/heuristics-db-connection.xlsx b/resources/heuristics-db-connection.xlsx similarity index 100% rename from docs/heuristics-db-connection.xlsx rename to resources/heuristics-db-connection.xlsx diff --git a/docs/popular-dbs.xlsx b/resources/popular-dbs.xlsx similarity index 85% rename from docs/popular-dbs.xlsx rename to resources/popular-dbs.xlsx index 2e2768b..9433264 100644 Binary files a/docs/popular-dbs.xlsx and b/resources/popular-dbs.xlsx differ diff --git a/docs/projects.xlsx b/resources/projects.xlsx similarity index 100% rename from docs/projects.xlsx rename to resources/projects.xlsx