-
Notifications
You must be signed in to change notification settings - Fork 1
/
phase2.py
105 lines (90 loc) · 3.88 KB
/
phase2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Revisions:
# 1) Separation of paragraphs
# 2) The period symbol (.) and comma symbol (,) need to be close to the previous word without space
# 3) Highlighting areas for a phrase
from pathlib import Path
import pandas as pd
from bs4 import Comment, BeautifulSoup as Soup
import os
from os import listdir
def find_csv_filenames(path_to_dir, suffix=".csv"):
filenames = listdir(path_to_dir)
return [filename for filename in filenames if filename.endswith(suffix)]
def processcsv(file):
# V2 - Modify variable for different files
file_to_open = input_folder / file
# V2 - To detect empty cells as new paragraph
df = pd.read_csv(file_to_open, header=0).fillna(value="PARA")
text = """"""
close = '</span>'
# define dictionary for HTML code of text colours
thisdict = {
"1": """<span style="background-color: Tomato">""",
"2": """<span style="background-color: SkyBlue">""",
"3": """<span style="background-color: BurlyWood">""",
"4": """<span style="background-color: MediumOrchid">""",
"5": """<span style="background-color: Chartreuse">""",
"6": """<span style="background-color: plum">""",
"7": """<span style="background-color: gold">""",
"8": """<span style="background-color: red">""",
"9": """<span style="background-color: royalblue">"""
}
# prepare HTML code for text bssed on event_type id and concatenate to "text"
for i in range(len(df)):
text += """<P>"""
if "1" in df["event_type"][i]:
text += thisdict["1"] + df["paragraph"][i] + close + ' '
elif "2" in df["event_type"][i]:
text += thisdict["2"] + df["paragraph"][i] + close + ' '
elif "3" in df["event_type"][i]:
text += thisdict["3"] + df["paragraph"][i] + close + ' '
elif "4" in df["event_type"][i]:
text += thisdict["4"] + df["paragraph"][i] + close + ' '
elif "5" in df["event_type"][i]:
text += thisdict["5"] + df["paragraph"][i] + close + ' '
elif "6" in df["event_type"][i]:
text += thisdict["6"] + df["paragraph"][i] + close + ' '
elif "7" in df["event_type"][i]:
text += thisdict["7"] + df["paragraph"][i] + close + ' '
elif "8" in df["event_type"][i]:
text += thisdict["8"] + df["paragraph"][i] + close + ' '
elif "9" in df["event_type"][i]:
text += thisdict["9"] + df["paragraph"][i] + close + ' '
elif df["event_type"][i] == "PARA":
text += """<p></p>"""
else:
if df["paragraph"][i].isalpha() == True:
text += str(df["paragraph"][i]) + ' '
# V2 - no spaces for punctuation
else:
text = text.rstrip()
text += str(df["paragraph"][i]) + ' '
text += """</P>"""
html = open(template, 'r')
htmlcode = html.read()
soup = Soup(htmlcode, 'html.parser')
insert = Soup(text, 'html.parser')
# remove exisiting paragraphs
for i in soup.find_all('p'):
i.decompose()
# insert "text" into header
for i in soup.find_all('h2'):
if "Text" in i.text:
i.insert_after(insert)
# remove comments from HTML file
div = soup.find('body')
for element in div(text=lambda text: isinstance(text, Comment)):
element.extract()
# V2 - save edits to HTML file
output = file.replace("input", "output")
output = output.replace(".csv",".html")
output = os.path.join(output_folder, output)
Html_file = open(output, "w")
Html_file.write(str(soup))
Html_file.close()
output_folder = Path.cwd() / "./phase_2_output"
template = Path.cwd() / "./template-v2.html"
input_folder = Path.cwd() / "./phase_2_input"
filenames = find_csv_filenames(input_folder)
for file in filenames:
processcsv(file)