Skip to content

Commit

Permalink
Merge pull request #408 from hakonhagland/kw_refs
Browse files Browse the repository at this point in the history
Script to extract keyword URIs
  • Loading branch information
blattms authored Nov 11, 2024
2 parents d55b14a + 9f3179b commit 6294c86
Show file tree
Hide file tree
Showing 14 changed files with 439 additions and 294 deletions.
1 change: 1 addition & 0 deletions scripts/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ fodt-extract-xml-tag = "fodt.splitter:extract_xml_tag"
fodt-fix-ignored-keywords = "fodt.fix_ignored:fix_ignored"
fodt-fix-footer-style = "fodt.fix_footer_style:fix_footer_style"
fodt-fix-letter-k-footer = "fodt.fix_letter_k_footer:fix_letter_k_footer"
fodt-gen-kw-uri-map = "fodt.keyword_linker:gen_kw_uri_map"
fodt-remove-bookmarks-from-master-styles = "fodt.remove_bookmarks:remove_bookmarks_from_master_styles"
fodt-remove-chapters = "fodt.splitter:remove_chapters"
fodt-remove-elements = "fodt.splitter:remove_elements"
Expand Down
22 changes: 11 additions & 11 deletions scripts/python/src/fodt/add_keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from fodt.constants import ClickOptions, Directories, FileExtensions, KeywordStatus, Regex
from fodt.create_subdocument import CreateSubDocument3
from fodt.helpers import Helpers
from fodt import helpers
from fodt.remove_subsections import RemoveSubSections
from fodt.templates import Templates
from fodt.xml_helpers import XMLHelper
Expand Down Expand Up @@ -241,8 +241,8 @@ def __init__(
status: KeywordStatus,
appendix: bool
) -> None:
self.maindir = Helpers.get_maindir(maindir)
self.keyword_dir = Helpers.get_keyword_dir(keyword_dir, self.maindir)
self.maindir = helpers.get_maindir(maindir)
self.keyword_dir = helpers.get_keyword_dir(keyword_dir)
self.keyword = keyword
self.chapter = chapter
self.section = section
Expand All @@ -259,16 +259,16 @@ def __init__(

def add_keyword(self) -> None:
self.documentdir = Path(self.maindir) / Directories.chapters
keyw_list = Helpers.read_keyword_order_v2(self.keyword_dir, self.chapter, self.section)
#keyw_list = Helpers.read_keyword_order(self.documentdir, self.chapter, self.section)
keyw_list = helpers.read_keyword_order_v2(self.keyword_dir, self.chapter, self.section)
#keyw_list = helpers.read_keyword_order(self.documentdir, self.chapter, self.section)
keywords = set(keyw_list)
if self.keyword in keywords:
logging.info(f"Keyword {self.keyword} already exists. Aborting.")
return
keywords.add(self.keyword)
keyw_list = sorted(list(keywords))
#Helpers.write_keyword_order(self.documentdir, self.chapter, self.section, keyw_list)
Helpers.write_keyword_order_v2(self.keyword_dir, self.chapter, self.section, keyw_list)
#helpers.write_keyword_order(self.documentdir, self.chapter, self.section, keyw_list)
helpers.write_keyword_order_v2(self.keyword_dir, self.chapter, self.section, keyw_list)
logging.info(f"Added keyword {self.keyword} to chapter {self.chapter}, section {self.section}.")
return

Expand Down Expand Up @@ -296,9 +296,9 @@ def update_appendixA(self) -> None:
def update_chapter_document(self) -> None:
logging.info(f"Updating chapter document {self.chapter}.")
filename = self.documentdir / f"{self.chapter}.{FileExtensions.fodt}"
source_file = Helpers.create_backup_document(filename)
source_file = helpers.create_backup_document(filename)
dest_file = filename
replace_callback = Helpers.replace_section_callback
replace_callback = helpers.replace_section_callback
# NOTE: This will remove the subsection the first time it is called, and then
# if it is called again (for example using add-keyword), it will remove
# the inserted <text:section> tag that was inserted by the first call.
Expand Down Expand Up @@ -369,12 +369,12 @@ def add_keyword(
appendix: bool
) -> None:
logging.basicConfig(level=logging.INFO)
(chapter, section) = Helpers.split_section(section)
(chapter, section) = helpers.split_section(section)
try:
status = KeywordStatus[status.upper()]
except ValueError:
raise ValueError(f"Invalid status value: {status}.")
add_keyword = AddKeyword(maindir, keyword_dir, keyword, chapter, section, title, status, appendix)

if __name__ == "__main__":
add_keyword()
add_keyword()
1 change: 1 addition & 0 deletions scripts/python/src/fodt/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class FileExtensions():

class FileNames():
keywords = "keywords.txt"
kw_uri_map = "kw_uri_map.txt"
main_document = "main.fodt"
master_styles_fn = "master-styles.xml"
office_attr_fn = "office_attrs.txt"
Expand Down
12 changes: 6 additions & 6 deletions scripts/python/src/fodt/create_subdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from fodt.automatic_styles_filter import AutomaticStylesFilter2, AutomaticStylesFilter3
from fodt.constants import Directories, FileExtensions, FileNames, MetaSections
from fodt.exceptions import InputException
from fodt.helpers import Helpers
from fodt import helpers
from fodt.xml_helpers import XMLHelper
from fodt.styles_filter import StylesFilter

Expand Down Expand Up @@ -36,7 +36,7 @@ def create_documents(self, parts: list[str], bookmark_info: list[str]|None = Non
logging.info(f"Created FODT subdocument {outputfile}")

def create_subsection_template(self, part: str, part_extra: str|None) -> str:
template = Helpers.read_keyword_template()
template = helpers.read_keyword_template()
part_bookmark = f"{part}_{part_extra}" if part_extra is not None else part
template = re.sub(r"###KEYWORD_NAME_BOOKMARK###", part_bookmark, template)
template = re.sub(r"###KEYWORD_NAME###", part, template)
Expand Down Expand Up @@ -165,8 +165,8 @@ def __init__(self, maindir: str, chapter: str, section: str) -> None:
self.outputdir = self.documentdir / Directories.subsections
self.is_chapter = False
parts = self.get_parts()
keyw_list = Helpers.read_keyword_order(self.documentdir, chapter, section)
self.keywords = Helpers.keywords_inverse_map(keyw_list)
keyw_list = helpers.read_keyword_order(self.documentdir, chapter, section)
self.keywords = helpers.keywords_inverse_map(keyw_list)
self.add_keyword = False
self.create_documents(parts)

Expand Down Expand Up @@ -202,7 +202,7 @@ def __init__(
self.is_chapter = False
parts = [self.keyword]
parts_extra = [f"{self.chapter}_{self.section}"]
keyw_list = Helpers.read_keyword_order_v2(self.keyword_dir, chapter, section)
self.keywords = Helpers.keywords_inverse_map(keyw_list)
keyw_list = helpers.read_keyword_order_v2(self.keyword_dir, chapter, section)
self.keywords = helpers.keywords_inverse_map(keyw_list)
self.add_keyword = True
self.create_documents(parts, parts_extra)
10 changes: 5 additions & 5 deletions scripts/python/src/fodt/extract_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from fodt.automatic_styles_filter import AutomaticStylesFilter4
from fodt.exceptions import ParsingException
from fodt.helpers import Helpers
from fodt import helpers
from fodt.styles_filter import StylesFilter
from fodt.xml_helpers import XMLHelper

Expand Down Expand Up @@ -200,7 +200,7 @@ def extract(self) -> tuple[str, str, set[str]]:
parser = xml.sax.make_parser()
handler = ExtractAndRemoveHandler(self.chapter, self.section)
parser.setContentHandler(handler)
fn = Helpers.chapter_fodt_file_path(self.maindir, self.chapter)
fn = helpers.chapter_fodt_file_path(self.maindir, self.chapter)
parser.parse(fn)
return (handler.get_section(), handler.get_doc(), handler.get_styles())

Expand Down Expand Up @@ -228,7 +228,7 @@ def extract(self) -> str:
self.create_section_file(section_txt, styles)

def write_updated_chapter(self, doc: str) -> None:
fn = Helpers.chapter_fodt_file_path(self.maindir, self.chapter)
fn = helpers.chapter_fodt_file_path(self.maindir, self.chapter)
with open(fn, "w", encoding='utf8') as f:
f.write(doc)
logging.info(f"Wrote updated chapter file to {fn}.")
Expand All @@ -250,10 +250,10 @@ def write_updated_chapter(self, doc: str) -> None:
def extract_section(maindir: str, section: str) -> None:
"""Extract the appendix from a FODT file."""
logging.basicConfig(level=logging.INFO)
(chapter, section) = Helpers.split_section(section)
(chapter, section) = helpers.split_section(section)
logging.info(f"Extracting section {section} from chapter {chapter}.")
extractor = ExtractSection(maindir, chapter, section)
extractor.extract()

if '__name__' == '__main__':
extract_section()
extract_section()
6 changes: 3 additions & 3 deletions scripts/python/src/fodt/extract_subsections.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from fodt.constants import AutomaticStyles, Directories, FileNames, FileExtensions
from fodt.exceptions import HandlerDoneException, InputException, ParsingException
from fodt.helpers import Helpers
from fodt import helpers
from fodt.xml_helpers import XMLHelper

class PartsHandler(xml.sax.handler.ContentHandler):
Expand Down Expand Up @@ -179,10 +179,10 @@ def __init__(
section: int,
) -> None:
parser = xml.sax.make_parser()
keyword_file = Helpers.keyword_file(outputdir, chapter, section)
keyword_file = helpers.keyword_file(outputdir, chapter, section)
predefined_keywords = None
if keyword_file.exists():
predefined_keywords = Helpers.read_keyword_order(outputdir, chapter, section)
predefined_keywords = helpers.read_keyword_order(outputdir, chapter, section)
handler = PartsHandler(outputdir, chapter, section, predefined_keywords)
parser.setContentHandler(handler)
try:
Expand Down
Loading

0 comments on commit 6294c86

Please sign in to comment.