Skip to content

Commit f3fe5a8

Browse files
committed
new xml files
1 parent 5f22e7a commit f3fe5a8

File tree

1,023 files changed

+2021151
-513
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,023 files changed

+2021151
-513
lines changed

mannd_pkochar_project.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,27 @@ def logprob(self, target_word):
7676

7777
mannd_pkochar_dic = pyphen.Pyphen(lang='en_US')
7878

79-
def calculate_word_features(excerpt):
80-
words = word_tokenize(excerpt.lower())
79+
80+
def map_word_features(xml_filename, pos_tag_list):
81+
tags = Counter()
82+
with open(xml_filename, "r") as file:
83+
element = ET.parse(file)
84+
POS_xpath = "./document/sentences/sentence/tokens/token/POS"
85+
[element.findall(token_xpath)]
86+
87+
# total counts
88+
n = sum(tags.values())
89+
90+
# avoid div by 0
91+
if (n == 0):
92+
print("0 sucks")
93+
return [0 for dep in pos_tag_list]
94+
95+
# return frequency of each tag in the file
96+
return [tags[tag] / n for tag in pos_tag_list]
97+
98+
99+
def calculate_word_features(words):
81100
counter = Counter(words)
82101
word_lengths = [len(k) for k, v in counter.items() for x in range(0, v)]
83102
median_word = median(word_lengths)

xml/test/test_excerpt_1.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)