Skip to content

Commit

Permalink
resolved duplicating issue on "Expanded Abbreviations Mess"
Browse files Browse the repository at this point in the history
  • Loading branch information
leeloren committed Nov 8, 2024
1 parent c570cee commit d380f0a
Show file tree
Hide file tree
Showing 2 changed files with 1,625 additions and 1,623 deletions.
29 changes: 15 additions & 14 deletions chapter4.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -88,26 +88,27 @@ process_line <- function(line) {
expan_exists <- xml_find_first(choice, ".//expan")
if (!is.na(abbr_exists) && !is.na(expan_exists)) {
# If <abbr> and <expan> tags exist, replace the <choice> element with the <reg> text inside <expan>
expan_reg_text <- xml_text(xml_find_first(choice, ".//expan//reg"))
# Ensure to replace the entire <choice> content with <expan><reg> text only once
xml_set_text(choice, expan_reg_text)
# If <abbr> and <expan> tags exist, extract only the <reg> text from <expan>
reg_text <- xml_text(xml_find_first(choice, ".//expan/reg"))
xml_set_text(choice, reg_text)
} else {
# If no <abbr>/<expan>, process the <orig>/<reg> tags
# For non-abbreviation choices, keep original behavior
orig_text <- xml_text(xml_find_first(choice, ".//orig"))
# Remove the <reg> elements, making sure only the <orig> text remains
reg_elements <- xml_find_all(choice, ".//reg")
xml_remove(reg_elements)
# Set the text of the <choice> element to the text of the <orig> tag
xml_set_text(choice, orig_text)
}
}
# Return the processed text of the line
# Now we want to remove any <reg> elements entirely
reg_elements <- xml_find_all(line, ".//reg")
xml_remove(reg_elements)
# Replace <lb/> tags with HTML line break
lb_elements <- xml_find_all(line, ".//lb")
for (lb in lb_elements) {
xml_set_text(lb, "<br>")
}
# Get the text of the line
return(xml_text(line))
}
Expand Down
Loading

0 comments on commit d380f0a

Please sign in to comment.