Skip to content
This repository has been archived by the owner on May 8, 2024. It is now read-only.

Commit

Permalink
chore: write down TODOs
Browse files Browse the repository at this point in the history
  • Loading branch information
ninpnin committed Feb 9, 2024
1 parent dc604ee commit 3b63a44
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions scripts/resegment.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def get_labels(texts):
else:
labels.append("intro")

# TODO: return a dict
return labels

def get_text(elem):
Expand Down Expand Up @@ -60,10 +61,10 @@ def main(args):
paragraphs.append(get_text(seg))
elif tag != "pb":
paragraphs.append(get_text(elem))
print("len", len(paragraphs))
print("len", len(list(set(paragraphs))))

labels = get_labels(paragraphs)

# TODO: actually change the tags in the ParlaClarin files
b = etree.tostring(
root, pretty_print=True, encoding="utf-8", xml_declaration=True
)
Expand Down

0 comments on commit 3b63a44

Please sign in to comment.