-
Notifications
You must be signed in to change notification settings - Fork 0
/
issue_clean.sh
41 lines (37 loc) · 1.29 KB
/
issue_clean.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#! /usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
: '
mkdir -p data/github_clean/filtered_github_data
# cleans post-2018 data
FILES="$(ls data/github_raw/filtered_github_data/ | grep "partitions" | grep -v ".gstmp")"
for file in $FILES
do
/usr/bin/env python issue_clean.py "data/github_raw/filtered_github_data/$file"
echo "$file has been cleaned"
#rm -rf "data/github_raw/filtered_github_data_large/$file"
done
mkdir -p data/github_clean/github_data_pre_18
# cleans pre-2018 data
FILES="$(ls data/github_raw/github_data_pre_18 | grep "github_data_pre" | grep -v ".gstmp" | shuf)"
for file in $FILES
do
/usr/bin/env python issue_clean.py "data/github_raw/github_data_pre_18/$file"
echo "$file has been cleaned"
#rm -rf "data/github_raw/github_data_pre_18/$file"
done
'
mkdir -p data/github_clean/github_data_2324
# cleans post 2023 data
FILES="$(ls data/github_raw/github_data_2324 | grep "Event" | grep -v ".gstmp" | shuf)"
for file in $FILES
do
if [[ $file = *'push'* ]]; then
/usr/bin/env python new_issue_clean.py "data/github_raw/github_data_2324/$file"
else
cp "data/github_raw/github_data_2324/$file" "data/github_clean/github_data_2324/$file"
fi
echo "$file has been cleaned"
#rm -rf "data/github_raw/github_data_2324/$file"
done