From d04eb7e95582cd672482d76981b6bd8566df6b3b Mon Sep 17 00:00:00 2001 From: Vanessa Braganholo Date: Tue, 3 Sep 2019 17:36:54 -0700 Subject: [PATCH] Issue #14: Creates SQLite database to keep data extracted from projects. Renames doc folder to resources. --- collection/collect.py | 2 +- collection/download.py | 2 +- collection/reset.py | 2 +- extraction/extract-db.py | 2 +- {docs => resources}/annotated.xlsx | Bin resources/create-database.sql | 55 ++++++++++++++++++ resources/db-mining.db | Bin 0 -> 45056 bytes {docs => resources}/filtered.xlsx | Bin .../heuristics-db-connection.xlsx | Bin {docs => resources}/popular-dbs.xlsx | Bin 46321 -> 46318 bytes {docs => resources}/projects.xlsx | Bin 11 files changed, 59 insertions(+), 4 deletions(-) rename {docs => resources}/annotated.xlsx (100%) create mode 100644 resources/create-database.sql create mode 100644 resources/db-mining.db rename {docs => resources}/filtered.xlsx (100%) rename {docs => resources}/heuristics-db-connection.xlsx (100%) rename {docs => resources}/popular-dbs.xlsx (85%) rename {docs => resources}/projects.xlsx (100%) diff --git a/collection/collect.py b/collection/collect.py index a47f149..b238bc3 100644 --- a/collection/collect.py +++ b/collection/collect.py @@ -13,7 +13,7 @@ MAX_STARS = None # File to load/save the data -FILE = '../docs/projects.xlsx' +FILE = '../resources/projects.xlsx' def load(): diff --git a/collection/download.py b/collection/download.py index eae4103..fcaa99a 100644 --- a/collection/download.py +++ b/collection/download.py @@ -4,7 +4,7 @@ import pandas as pd # File to load the data with repositories -REPO_FILE = '../docs/annotated.xlsx' +REPO_FILE = '../resources/annotated.xlsx' # Dir to clone/update repositories REPO_DIR = os.path.abspath('../repos') diff --git a/collection/reset.py b/collection/reset.py index aaa0ca8..865ee81 100644 --- a/collection/reset.py +++ b/collection/reset.py @@ -4,7 +4,7 @@ import pandas as pd # File to load the data with repositories -REPO_FILE = '../docs/annotated.xlsx' +REPO_FILE = '../resources/annotated.xlsx' # Dir to clone/update repositories REPO_DIR = os.path.abspath('../repos') diff --git a/extraction/extract-db.py b/extraction/extract-db.py index 63abd83..f22f573 100644 --- a/extraction/extract-db.py +++ b/extraction/extract-db.py @@ -4,7 +4,7 @@ import pandas as pd # File to load the data with repositories -REPO_FILE = '../docs/annotated.xlsx' +REPO_FILE = '../resources/annotated.xlsx' # Dir to clone/update repositories REPO_DIR = os.path.abspath('../repos') diff --git a/docs/annotated.xlsx b/resources/annotated.xlsx similarity index 100% rename from docs/annotated.xlsx rename to resources/annotated.xlsx diff --git a/resources/create-database.sql b/resources/create-database.sql new file mode 100644 index 0000000..96e257a --- /dev/null +++ b/resources/create-database.sql @@ -0,0 +1,55 @@ +CREATE DATABASE db-mining; + +CREATE TABLE project ( + project_id INTEGER PRIMARY KEY AUTOINCREMENT, + owner TEXT, + name TEXT, + language TEXT, + domain TEXT +); + +CREATE TABLE project_version ( + version_id INTEGER PRIMARY KEY AUTOINCREMENT, + version TEXT, + last BOOLEAN, + project_id INTEGER, + FOREIGN KEY (project_id) REFERENCES project (project_id) ON DELETE RESTRICT +); + +CREATE TABLE database_type ( + type_id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT +); + + +CREATE TABLE database ( + database_id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT, + type_id INTEGER, + FOREIGN KEY (type_id) REFERENCES database_type (type_id) ON DELETE RESTRICT +); + +CREATE TABLE project_database ( + project_id INTEGER NOT NULL, + database_id INTEGER NOT NULL, + PRIMARY KEY (project_id, database_id), + FOREIGN KEY (project_id) REFERENCES project (project_id) ON DELETE RESTRICT, + FOREIGN KEY (database_id) REFERENCES database (database_id) ON DELETE RESTRICT +); + +CREATE TABLE strategy ( + strategy_id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT, + name TEXT +); + +CREATE TABLE project_strategy ( + project_id INTEGER NOT NULL, + strategy_id INTEGER NOT NULL, + PRIMARY KEY (project_id, strategy_id), + FOREIGN KEY (project_id) REFERENCES project (project_id) ON DELETE RESTRICT, + FOREIGN KEY (strategy_id) REFERENCES strategy (strategy_id) ON DELETE RESTRICT +); + + + diff --git a/resources/db-mining.db b/resources/db-mining.db new file mode 100644 index 0000000000000000000000000000000000000000..f329ce55bfe5304e100edc8096b7810636fbf35e GIT binary patch literal 45056 zcmeI)!EWP37{GD6PLtGa@B)V%RuzNx&_r#J5Dx&^uDgmzlf}(STNYG#8&k|Cb~kal zRGiY~1vqd-;>bhr259d*0XL3t1>4EEcG^HM*+|vjYGTiLW<2woZ>(C0a`0Kx4IJ^6 zKj_=XBtI;~WP^!(d~THm$IddC!<`u(OUX3vU>E<}>+c15GrF&~7-g+sZnNxk~9o-ZHNRK@kW&bPxCz3@NBBYPD1ar#IOEotV% z*_`!pu2?8Pq2>=XzGWpN4Oc>5E0njl)mOWjm2l<^hOY0)P@g7qnSRWiob+T`G6k23 z>#Ta*nQ4v5I*oIB_An6l+wG=VZ^i8LN|$%otFv)gCUbL}X8+&aQm;N*$wv-{F~juM zHgsN$9PikfNopB}nUj?WcPFe_9p6(6<-2#)i_J9cv_mVIHj-XOad4X0R#|0009ILKmY**5I_I{1a7N9 z^!%?He=6Z08v+O*fB*srAbq{!$0w_#!#0|E#jfB*sr zAb%rbTc}>~&pGCiCBm9Buc!AR`sbSH9(+dmxp50qt z(Ui@ue?01kXE!uWdGEP%UOTgUBj>H0rrdk!b{%o-_eOoMxT-1TlYxEub?Wo~)x`~p zmJvVz0R#|0009ILKmY**5I|s=0;%W!Wp;yWBY*$`2q1s}0tg_000IagfWQI;^r&rq z|9^qPG>rfP2q1s}0tg_000IagfWYzv`22tQb3hLeKmY**5I_I{1Q0*~0R#|Ov;fcl zix#GR1Q0*~0R#|0009ILKmY**mM_5f|Cc`p^Z)?_5I_I{1Q0*~0R#|00D(md{0p+O Bb>#p6 literal 0 HcmV?d00001 diff --git a/docs/filtered.xlsx b/resources/filtered.xlsx similarity index 100% rename from docs/filtered.xlsx rename to resources/filtered.xlsx diff --git a/docs/heuristics-db-connection.xlsx b/resources/heuristics-db-connection.xlsx similarity index 100% rename from docs/heuristics-db-connection.xlsx rename to resources/heuristics-db-connection.xlsx diff --git a/docs/popular-dbs.xlsx b/resources/popular-dbs.xlsx similarity index 85% rename from docs/popular-dbs.xlsx rename to resources/popular-dbs.xlsx index 2e2768b53692defa121ccef81ce50f106982edf0..943326439a9ef94bb2c8c21bd29666e099d301b3 100644 GIT binary patch delta 3054 zcmVqQQ0BUt8tvM5{m1h8L z??}J!-$AzHJPX10Wei@27(`e`%=q@}fzLh#;*796>` z=EZqSdvdpvu7RiNPdunh^`_BdA=U8lmsRl}q>5|hzYPJ{k=*_NHWzAmoqM}+@74khRMoDCUp*-<&*GW)bBwxW99;P+Jv+VY{SJ0X1 z``99Dh^ ztBp2&rj6_}({nxdxc3L4s}A@&NunOZwWMouiuO%zUi8vT&m^uokXUS$XbA z=hkHs#THtWs49xc^NL8bvrp*h-U>AJY>c`d*2wsc_08|l##QIlAvX=4s$2L~<0ubL z&wrG-zBm4YS3(PQV>q#~HHzFSlb#GBe+RjVIzhJby^wl_LZA>V$48eDN_-2IUSRog z;##rqxz@rj+%kyDqRa!O9QCG3|BrsH>oGmN0u{>+b9u`lbWnCTq|LDasq5g{!1t7F z9>h8FfTCYPW_{2#q&M`fps2hca*9P6R8M4;`|TUMWo+xG36?W-64gZXI#pw}vp@|^ zP6Ys-zh#EAl3T|Ce~Lh?<$swM`a8H7NgwL(wZ8|J)Bh|6k4c$^`SCy5zIq0yuy*$8 zA$d7g|H|JsY4-G7LF*J^r^&O}|A^9Z2U!b)88SD!{0KJT@4+dfs}GVc`OZf#Op@Wm z89b(C^^j3p&S3XaR{5#+NXII>Llmq#{Cwlx{rLyF!yK{hfACXu_rTJXzG0NCGyD{t zi9ei9{qfYjJ^(<98G)a|h&!Bn?${&LNJr!|(Gh;uk$dk)KH~7Rsg9B=xw|d$w*jm| zfVsRRbWbA-0{>vs2)3=$2opg2+|y3b0U(2kDk0fk?}GX5=oJ!ZCv;X89Wq8rp-}SJ z+)~q#(GsgIe zCUt1GQ$#{ruZi9(GJ3_L1x_^Si|C!!Tm=%a(HOhB&X()2tw@XC?-M`mv~;xarzm`j z%1l=Y-&cvaiW>VAIf}W88_0av@)k2xSdJf`YT9Y(e@xiYAPD$XEo^|tG1eM&5=6Km z<_*p8*SvwK=winkp)-eJ9qz(Ezy9&%^TW@d;!l$W{N=j8t|!0Tjt*=&qdZXjYk2_K z^Ko!!Cs+qliT%oG2}KJ`O0&hFwGv4kI%0yPF_(I9P&N6DmjKs+;H1g~sceK3(u&!% z#tik`DAva58LRGFFhw87jCPU?*&Ba(xfS3yd6_bHqN`C}X z_Hlq{r#jmL1Wzqfk`1YZrpgSdM5+grh%uL`xrSHe+A;{V{x@8uBiDkqT&s;^C~48# zqgbN71bT{?byw=l#kXGdCAPnSi zp?h78&TCg*f}n)rw?IAXJiLh6O;OjGqf;f4I_v~S8)+=0-KdHZTU`JF8p=i(a$N@N zUtfR!c{5o6*b+*#_P26e29~`FcOt53rDCa@SiQ&#b!CU)vV5a z1WYImDEke*l4(xRx`~Cbd#2lJZpyOsIELzSlxCiBSvLz|-`EdKojEQc+0a^$jfJo~ z=(eJnUTk^>IC0Hgn{m$xdFDjXg3AYtGZ>q>Pf&C zY$;T(JVtt(h3XvFZ<-T?NGybE(74Fh3LQ((wEX%wB_+x|b&D2F>K4tLXpz!TFh*!n ziJfZ3+7)6ruA8eVS_oUXThM*gjLz*3?FL zFk+e$Oo<7h3ZlV@S)3Sbw$L$Iu?9oW!tLl7_Bzdh2yHOLe1sG>7#e(pMaSSHG$#b} z5klH^ny)Qh3>I7Hbb@H1d2|qakvjA%wM;~OU6;a+>$*k9xUP3OiG@(b5MS5Vwt--x zTj+FR=$c%&j(?$A5y8HC6))!11raOm@z>W{s6dEsN76vYFw7fa zrxQZYWE*t|)e7%w2vyPuQwd%zR3OAPmPN;)5~Pl0o?;>FhR`w)%ybJooe=u(L8w-& zS3_8nJ2W~>On4>*We^h<9fO#VI@Ad!CWNpXLUSMn<$o>gz#Rg8R8~$D`B}NC4fHHj zYv!vVR0%Cyw0V{Z26kpEKNN%)Vlua2x|h%lK3mnWVa^9 z24(6P%zuHj!!{_iv&2H$vb^-SH0T)P@07$FF#i6Iz0+!YStnj{`DiQeX$IaM*7TdV zfZwk9)&6@|TKBtR^5lq)}fdB~rmozmioy-_R&yZnW|8=S(1B3s3~NO!ha z!L1OyAV~<4$SQ%G<|2lBmD%UsTddfX9^3L3@PDAn)Q`OdJR73lU7Eq(-CmNX^!KEA z%FgBBm>$@J#|a^GLdKNjDEQ0B@la{2@(XcW{{sL3|NjF3 zP?M;1Et7Ds1+ybh3jwozX3jqaqZgSgPqR>_UIGLrQ<}`Pv8XKq1bc%)hqL0U8UzIZ zp1);=lL4<=0XvgjuR;N9lc29b0SS}huR#H$lO(W00n?Lbut5TCdy`?a7n8xTGy;8m zlVP(LlMb;o0*!r>t$iAkUa=|x8Iy^zMFQQKlOeJjlk2fG0Th!UvQPmMlX0?80uY{) zFrFoo=&~vT8=sS5vlo*qvorxJlVGz<0y?3SVY3&L#j`X5N}`i6o*R=Ev@`)vlS{NC w8>1JQD^CFc0CfTY01*HH00000000000000vlZmuG0l$;Tv?B%yssI2004Dy}u>b%7 delta 3084 zcmV+n4D<8u=>qZT0aOP82R@OxfieFG;p0RcF84&Iy^ITW_12;JM?HxxS(vz4Cl-?5E*OM&~cR+pKdEN zHjt2bY;ey;k{J&~81F8A{pEba`2CWxdjtTZ$c#?PDYYz7cVxigj7?+&OUw9xC4VgW z$`TVNxDg#8a_BABwnJ;cbToDdDSr(HYg<~Ag4OdtM!5swq{kBIi;hm>fHu^xfg3Qt zpC_|s!vqjZ+Ee+UK@Bw2>D6jv94~utd+P*f%i$Zs&#_@}4>y77#Z?2XITNgvX8>*O zNWbvku`S1W7J}`|7`zV2w>Vi-l~BVYD;~a)Rd~b-pTHdZr_mgcmexuN!FwZGaOCD1 z7w0YQ$=y!62v5_Wcu<+@4b)>H%Z5tQWJUx_wjs|p_&nt^?ZN8UvEv%n#Zk`J95s_N z3nzaZ-H^X$$_W$}=nH&>5!WeRifb%8H1j?)tWN@*x7K(hLSRc3q?m9`tY&Erbc71F z8deMIM5y>>Rs093;#&D{K>&6ncmKZ)25{x+L5|mwBlv$+yoZ#0#A`?p$N*^R`n^&a z@V<;SPn|E1WfYWQ6oqD9#)VlHeqiQ>>zRLr?^kXTFD`>P{0h$SFs&J$Ww+10g3b)z z|BmSc-X1KDozAp*qVZ_&KcVU74Bd zL7dwjQ1mOvtnVZ?q&M`paVb#6IDX8z1{`$+tDi|&`#*A zEIMS2ltQ88vALzDBcml&e_KW{YB_XpuMNFx$SbN0L-wVo&}fN_mRN1kd_&@V95&i% z>9~Ndgl+00*rX0!G8B=})@!2oii}>dXn_+=`XYL#HCKTIY&6EMuCwJjY%9{@_xr?8 zJ1re8{3#0GqB7G}!uM4ouA;_1MUG;w;<6o(;k@N7W~i_nKR(s8f78;Lu%$r|@T*$b z0FPs=HR>dYa6`-+n&Gc`15we%jyFPQ4#PU!g@1nib$?w?ez_eT z*m6dBp!nDF0J7)f;LuL64yJM~u8fvYw7{e^TMSw&k<_6hCP*4{sRsvDlizp=a2*Iv zs!Wi|MmQm@m`!WUEKtvFZJeI5>b?b2^l{8+Cz()am9A=G6NZaaLQ`dik}lF*jP(SF zxe$tn29teXN`FM_y$=IKJJs11Ab4t-l59vNG*xCuB~m@0M2xvi%{9C#*Ooz`^}pdN z9k~{?o&vC6_LmwiUBUrRph+j{~8k=9}3R*G=k<7oc-*ZSm*hEw47z zo`q!GhMlx;fY-0mgR?sR{KR)1=B_M7R1X zVyg=vKttIGL$1qU{p;)RKW`=r09!(d*8WzG%fPZ%;Z8&~tyC;^6RQ__p)Q)#NTz6i zkED*J$bX83unVuPG+;#5t0adDQL{OrTg6HWn=J7W(xF9Xj_b58C$SK!LA%Cuw$jI$sHNrTg~d+N5F*AfU@7gKjID>BXi;&}vPKt7c|WKYw~BXbTH1I&))9t4{DOL>TA}nj2ud zi*{2Wy{cQ*tDXdW!Inbh%44LrS*Xr&{iZoVh{Qsu291l1t&FI`-H6-g*Pbt2yNTD*N_`+h* z34id8)#W4>!p=MvI+mb&)qYUAH)j*($)r#T8t+&ZodB1GniEWz#6sE~H6NxW>OOVb z7K)sf%;{6Qgs#Hiu`(qxT4K?{zXoUwYoD4Tbd}RuxWp~+C4@z3aoOucr=0n_jI>RO zjE$rbrW8)P?$|WNT9eL7w=&dv)V+>Mm!EVnyyiH-C#FpYU(m16iG)rkD=l<5Ev?7b z>r@0o9kij8cJPK49fLU3yl`!D0)YP7p0Lj}BrlQipz}mWhb3>r&WpUAO2M*Yz$Z zu@I^l;_Ld_HV{m73!P33U6bq9F@IDmBG^~2;>EnWAY#Qm{`y)A6$tU|NE+z4{A*6| z1wshB)6g6UhIu3GbVBHvY@-gLTH##{p-LKID#5FT3WT`EvgjC8g4D6hQ!Iqt5LyO; znQmdH6GHzz2-S-9Y6xp`hen5q3D2aU3}V8fV-OQkhdRN;gb;Q^Xb!}nynlrqxI>_i z%F2l%KPxx2fu4nG&3rY4DxrmoHqY{9*I39FLJM;N(vF2Si-k~y5YrrMp+gD3hAqJS zwop>}Z9s41N!&;qaB8i8HE=3lg~%>bg zA8qA5&A_|Entt;Z@cT7~YREcKeyVw}qOjMD1c-%>a^>eQ5BYPnQ(8Q+H;Uz8m%q?= zgHza0WUF`=>CP4_xD|pIBnd$hStW4OT*Pp%GW*cx(|q!-L`!UWAQ@9`S=IJU1MRKTs7O^1Nyj!jShx@&?bG2gQQEOD#U{*|UU^ zeAs;Ve*gdg|NjF3P?ISzEwgE5-arL-xF`ZnvrML50t1r7L$j`^Edm5$*r?yL->Mn} z1;9o_%8HZzu3G^=lUlDr0dJF?uR#F~li#mF0jHB8utEaYb(8OP7L$9hDgtqPlM%BQ zlc0MFlgO|t0)c#!5wjPQuzU)W$gmKTn|%-q0000000000005DbK(R#v9gdShu@#e? zu`dGQnUgS_8Z0X>t3v_1jClf|?n1`MhI00027Qqc?m diff --git a/docs/projects.xlsx b/resources/projects.xlsx similarity index 100% rename from docs/projects.xlsx rename to resources/projects.xlsx