From 9f0847aac5506c08cca66d66a22f23b053fc60cb Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Tue, 13 Feb 2024 11:43:47 -0500 Subject: [PATCH 1/9] Add test word: principem --- tests/word_syllabification_tests.csv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/word_syllabification_tests.csv b/tests/word_syllabification_tests.csv index 23d550b..f49651e 100644 --- a/tests/word_syllabification_tests.csv +++ b/tests/word_syllabification_tests.csv @@ -87,4 +87,5 @@ in,in, maior,ma-ior, amplius,am-pli-us, adincresco,ad-in-cre-sco, -compressans,com-pres-sans \ No newline at end of file +compressans,com-pres-sans, +principem,prin-ci-pem \ No newline at end of file From 3553dca77a38977f325cd82c508684ebf299e080 Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Tue, 13 Feb 2024 11:58:11 -0500 Subject: [PATCH 2/9] Handle case of 'nc' consonant group --- volpiano_display_utilities/latin_word_syllabification.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/volpiano_display_utilities/latin_word_syllabification.py b/volpiano_display_utilities/latin_word_syllabification.py index 1a488ad..11463c2 100644 --- a/volpiano_display_utilities/latin_word_syllabification.py +++ b/volpiano_display_utilities/latin_word_syllabification.py @@ -203,9 +203,11 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: in which case we split as [vowel] + [i + vowel] 2. 1 consonant between vowel groups: keep the syllable boundary where it is (consonant is part of second syllable) - 3. 2 consonants between vowel groups: split the first consonant to the + 3. 2 consonants between vowel groups: split the first consonant to the first syllable, unless the two consonants form a consonant group, in - which case keep the group on the second syllable + which case keep the group on the second syllable. EXCEPTION: + the consonant group "nc" is only a group in sequences of three or more + consonants, so "nc" is split as "n-c" in this case. 4. 3+ consonants between vowel groups: add the first consonant or consonant group to the first syllable @@ -235,7 +237,7 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: elif num_ltrs_btw_vow_grps == 1: split_case = "1 consonant between vowels" elif num_ltrs_btw_vow_grps == 2: - if ltrs_btw_vow_grps not in _CONSONANT_GROUPS: + if ltrs_btw_vow_grps not in _CONSONANT_GROUPS or ltrs_btw_vow_grps == "nc": syl_bound = 1 split_case = "2 consonants between vowels" else: From e0c512230c23e301d015ac539a3869744f8f4113 Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Wed, 14 Feb 2024 10:15:23 -0500 Subject: [PATCH 3/9] Add 'st' consonant group --- tests/word_syllabification_tests.csv | 12 ++++++------ .../latin_word_syllabification.py | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/word_syllabification_tests.csv b/tests/word_syllabification_tests.csv index f49651e..3328f2f 100644 --- a/tests/word_syllabification_tests.csv +++ b/tests/word_syllabification_tests.csv @@ -32,7 +32,7 @@ conscientiam,con-sci-en-ti-am, monstrat,mon-strat, brachium,bra-chi-um, uulto,uul-to, -xpistus,xpis-tus, +xpistus,xpi-stus, yesse,yes-se, languorem,lan-guo-rem, coniugem,con-iu-gem, @@ -44,11 +44,11 @@ iniunxit,in-iun-xit, coniunctos,con-iunc-tos, ihericho,ihe-ri-co, extinguere,ex-tin-gue-re, -iniusticias,in-ius-ti-ci-as, +iniusticias,in-iu-sti-ci-as, unguenti,un-guen-ti, unguebat,un-gue-bat, adiuvemur,ad-iu-ve-mur, -subiecisti,sub-ie-cis-ti, +subiecisti,sub-ie-ci-sti, adiutorium,ad-iu-to-ri-um, mercenarijs,mer-ce-na-ri-js, iherosolimam,ihe-ro-so-li-mam, @@ -65,8 +65,8 @@ constitui,con-sti-tu-i, apposuit,ap-po-su-it, iubente,iu-ben-te, tuis,tu-is, -maiestatis,ma-ies-ta-tis, -iusticie,ius-ti-ci-e, +maiestatis,ma-ie-sta-tis, +iusticie,iu-sti-ci-e, iudeos,iu-de-os, iudex,iu-dex, polacuit,po-la-cu-it, @@ -78,7 +78,7 @@ dei,de-i, iudicantem,iu-di-can-tem, voluit,vo-lu-it, fidei,fi-de-i, -fuisti,fu-is-ti, +fuisti,fu-i-sti, iam,iam, iacula,ia-cu-la, iubilemus,iu-bi-le-mus, diff --git a/volpiano_display_utilities/latin_word_syllabification.py b/volpiano_display_utilities/latin_word_syllabification.py index 11463c2..bca4843 100644 --- a/volpiano_display_utilities/latin_word_syllabification.py +++ b/volpiano_display_utilities/latin_word_syllabification.py @@ -51,6 +51,7 @@ "fr", "gl", "gr", + "st", } # Prefix groups are groups of characters that serve as common prefixes. For details, From cd99ad46742fe312c3477186511302a40c7e0563 Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Wed, 14 Feb 2024 21:06:07 -0500 Subject: [PATCH 4/9] Add additional test words with nasal consonants --- tests/word_syllabification_tests.csv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/word_syllabification_tests.csv b/tests/word_syllabification_tests.csv index 3328f2f..d6e7973 100644 --- a/tests/word_syllabification_tests.csv +++ b/tests/word_syllabification_tests.csv @@ -88,4 +88,6 @@ maior,ma-ior, amplius,am-pli-us, adincresco,ad-in-cre-sco, compressans,com-pres-sans, -principem,prin-ci-pem \ No newline at end of file +principem,prin-ci-pem, +redemptor,re-demp-tor, +imperator,im-pe-ra-tor \ No newline at end of file From 01efbec4f5863fb4016f9e777fc84e90a081f492 Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Fri, 16 Feb 2024 09:41:31 -0500 Subject: [PATCH 5/9] Account for nasalized consonants in syllabification logic --- .../latin_word_syllabification.py | 83 ++++++++++++------- 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/volpiano_display_utilities/latin_word_syllabification.py b/volpiano_display_utilities/latin_word_syllabification.py index bca4843..1f7202e 100644 --- a/volpiano_display_utilities/latin_word_syllabification.py +++ b/volpiano_display_utilities/latin_word_syllabification.py @@ -36,8 +36,6 @@ "gn", "qu", "gu", - "nc", - "mp", "sc", "pl", "pr", @@ -54,6 +52,9 @@ "st", } + +_NASALIZED_CONSONANTS: set = {"m", "n"} + # Prefix groups are groups of characters that serve as common prefixes. For details, # see README. _PREFIX_GROUPS: set = {"ab", "ob", "ad", "per", "sub", "in", "con"} @@ -206,11 +207,21 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: where it is (consonant is part of second syllable) 3. 2 consonants between vowel groups: split the first consonant to the first syllable, unless the two consonants form a consonant group, in - which case keep the group on the second syllable. EXCEPTION: - the consonant group "nc" is only a group in sequences of three or more - consonants, so "nc" is split as "n-c" in this case. - 4. 3+ consonants between vowel groups: add the first consonant or - consonant group to the first syllable + which case keep the group on the second syllable. + 4. 3+ consonants between vowel groups: group the final two consonants of + a 3-consonant sequence between vowel groups, if possible, and place preceding + consonants in the preceding syllable. If these cannot be grouped or there + are more than three consonants between vowel groups, group the + first two consonants, if possible, and add following consonants to the + following syllable. If neither the final two nor first two consonants can + be grouped, split the syllable after the first consonant. + + EXCEPTION: If the first consonant of a sequence of 2 or more consonants between + vowels is a nasalized consonant ("m" or "n"), we don't treat it as a consonant + for the purposes of the cases above. In practice, this means we only need to check + for the existence of a nasalized consonant at the start of a sequence of 3 or more + consonants between vowels (in the two consonant case, an initial "m" or "n" in the + sequence is already added to the preceding syllable). Two additional special cases exist. "X" is treated as a double consonant "ks" and the letter terminates the previous syllable. In cases where "i" @@ -229,33 +240,45 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: num_ltrs_btw_vow_grps = len(ltrs_btw_vow_grps) # Default case: syllable boundary immediately follows previous # vowel group. - syl_bound = 0 if num_ltrs_btw_vow_grps == 0: - split_case = "Hiatus" - elif ltrs_btw_vow_grps[0] == "x": + return 0, "Hiatus" + if ltrs_btw_vow_grps[0] == "x": + return 1, "X is double consonant" + if num_ltrs_btw_vow_grps == 1: + return 0, "1 consonant between vowels" + # If the first letter of the consonant sequence is a nasalized consonant, + # we add it to the prior syllable and treat the remaining consonants + # as if they were the only consonants between the vowel groups. + if ltrs_btw_vow_grps[0] in _NASALIZED_CONSONANTS: syl_bound = 1 - split_case = "X is double consonant" - elif num_ltrs_btw_vow_grps == 1: - split_case = "1 consonant between vowels" - elif num_ltrs_btw_vow_grps == 2: - if ltrs_btw_vow_grps not in _CONSONANT_GROUPS or ltrs_btw_vow_grps == "nc": - syl_bound = 1 - split_case = "2 consonants between vowels" - else: - split_case = "2 consonants between vowels (consonant group)" + ltrs_btw_vow_grps = ltrs_btw_vow_grps[1:] + num_ltrs_btw_vow_grps -= 1 + split_case_nasal_cons_tag = " (first consonant nasaslized)" + num_consonants = num_ltrs_btw_vow_grps + 1 + # If there is only one consonant remaining, we treat it as the only + # consonant between the vowel groups and add it to the following syllable. + if num_ltrs_btw_vow_grps == 1: + return syl_bound, "2 consonants between vowels" + split_case_nasal_cons_tag else: - # in situations where 3 or more consonants are between consecutive vowels, - # group the final two consonants, if possible (amplius -> am-pl-ius). If not, - # group the first two consonants (coniunctos -> con-iunc-tos), if possible. If - # neither the final two nor first two consonants can be grouped, split after - # the first consonant. - if ltrs_btw_vow_grps[1:] in _CONSONANT_GROUPS: - syl_bound = num_ltrs_btw_vow_grps - 2 - elif ltrs_btw_vow_grps[:2] in _CONSONANT_GROUPS: - syl_bound = 2 + num_consonants = num_ltrs_btw_vow_grps + syl_bound = 0 + split_case_nasal_cons_tag = "" + if num_ltrs_btw_vow_grps == 2: + if ltrs_btw_vow_grps not in _CONSONANT_GROUPS: + syl_bound += 1 + split_case = f"{num_consonants} consonants between vowels{split_case_nasal_cons_tag}" else: - syl_bound = 1 - split_case = "3+ consonants between vowels" + split_case = f"{num_consonants} consonants (consonant group) between vowels{split_case_nasal_cons_tag}" + elif ltrs_btw_vow_grps == "str": + split_case = f"{num_consonants} consonants ('str' group) between vowels{split_case_nasal_cons_tag}" + elif ltrs_btw_vow_grps[1:] in _CONSONANT_GROUPS: + syl_bound += 1 + split_case = f"{num_consonants} consonants (consonant group) between vowels{split_case_nasal_cons_tag}" + elif ltrs_btw_vow_grps[:2] in _CONSONANT_GROUPS: + split_case = f"{num_consonants} consonants (consonant group) between vowels{split_case_nasal_cons_tag}" + else: + syl_bound += 1 + split_case = f"{num_consonants} consonants between vowels{split_case_nasal_cons_tag}" return syl_bound, split_case From f1e2a774d4376a20182b5f1c452655f3f802f167 Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Fri, 16 Feb 2024 09:52:58 -0500 Subject: [PATCH 6/9] Update readme for nasalized consonants and new consonant groups --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3900d37..0219c52 100644 --- a/README.md +++ b/README.md @@ -141,12 +141,12 @@ flowchart TD direction LR H1[Consonant added before second vowel] ~~~ H2["'X' added after first vowel"] end - subgraph c2 [Two consonants between vowels] + subgraph c2 [Two consonants between vowels (5)] direction LR H3["Consonants are consonant cluster (4)"] -->|Yes| H4[Cluster added before second vowel] H3 -->|No| H5[First consonant added after first vowel, \n second consonant added before second vowel] end - subgraph c3 [Three-plus consonants between vowels] + subgraph c3 [Three-plus consonants between vowels (5)] direction LR H6["First two consonants are consonant cluster"] -->|Yes| H7[Cluster added after first vowel, \n remaining consosnants added before second vowel] H6 -->|No| H8[First consonant added after first vowel, \n remaining consonants added before second vowel] @@ -183,11 +183,12 @@ flowchart TD - "ch", "ph", "th", and "rh" - "gn" - "qu" and "gu" (when "u" serving as semivowel) - - "nc" - - "mp" - - "sc" + - "sc" and "st" - "p", "b", "t", "d", "c", "f", or "g" + "l" - "p", "b", "t", "d", "c", "f", or "g" + "r" + - "str" + +*Note (5)*: Nasalized consonants ("m" and "n") "attach" to the previous vowel when they begin sequences of two or more consonants. The remaining consonants are then treated as if they were the only consonants between vowels. For example, if the three-consonant sequence "mpr" occurs between two vowel groups, the "m" attaches to the preceding vowel group, and the remaining consonants "pr" are treated according by the logic of a two-consonant sequence between vowel groups. Certain exceptions to these general rules will occur. The module provides a means of overriding the default syllabification for a specific word in `cantus_text_syllabification.py`. Exceptions can be added to the `EXCEPTIONS_DICT` dictionary. From 722944cb91cfdec03aa729feeb9b7bb8f8b400cf Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Fri, 16 Feb 2024 16:14:02 -0500 Subject: [PATCH 7/9] Simplify logging in word syllabification --- .../latin_word_syllabification.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/volpiano_display_utilities/latin_word_syllabification.py b/volpiano_display_utilities/latin_word_syllabification.py index 1f7202e..7ae4b40 100644 --- a/volpiano_display_utilities/latin_word_syllabification.py +++ b/volpiano_display_utilities/latin_word_syllabification.py @@ -237,7 +237,7 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: and split_case is a string describing the case used to determine the syllable boundary (passed to logger). """ - num_ltrs_btw_vow_grps = len(ltrs_btw_vow_grps) + num_ltrs_btw_vow_grps: int = len(ltrs_btw_vow_grps) # Default case: syllable boundary immediately follows previous # vowel group. if num_ltrs_btw_vow_grps == 0: @@ -249,36 +249,33 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: # If the first letter of the consonant sequence is a nasalized consonant, # we add it to the prior syllable and treat the remaining consonants # as if they were the only consonants between the vowel groups. + num_consonants = num_ltrs_btw_vow_grps if ltrs_btw_vow_grps[0] in _NASALIZED_CONSONANTS: syl_bound = 1 ltrs_btw_vow_grps = ltrs_btw_vow_grps[1:] num_ltrs_btw_vow_grps -= 1 - split_case_nasal_cons_tag = " (first consonant nasaslized)" - num_consonants = num_ltrs_btw_vow_grps + 1 # If there is only one consonant remaining, we treat it as the only # consonant between the vowel groups and add it to the following syllable. if num_ltrs_btw_vow_grps == 1: - return syl_bound, "2 consonants between vowels" + split_case_nasal_cons_tag + return syl_bound, "2 consonants between vowels" else: - num_consonants = num_ltrs_btw_vow_grps syl_bound = 0 - split_case_nasal_cons_tag = "" if num_ltrs_btw_vow_grps == 2: if ltrs_btw_vow_grps not in _CONSONANT_GROUPS: syl_bound += 1 - split_case = f"{num_consonants} consonants between vowels{split_case_nasal_cons_tag}" + split_case = f"{num_consonants} consonants between vowels" else: - split_case = f"{num_consonants} consonants (consonant group) between vowels{split_case_nasal_cons_tag}" + split_case = f"{num_consonants} consonants (consonant group) between vowels" elif ltrs_btw_vow_grps == "str": - split_case = f"{num_consonants} consonants ('str' group) between vowels{split_case_nasal_cons_tag}" + split_case = f"{num_consonants} consonants ('str' group) between vowels" elif ltrs_btw_vow_grps[1:] in _CONSONANT_GROUPS: syl_bound += 1 - split_case = f"{num_consonants} consonants (consonant group) between vowels{split_case_nasal_cons_tag}" + split_case = f"{num_consonants} consonants (consonant group) between vowels" elif ltrs_btw_vow_grps[:2] in _CONSONANT_GROUPS: - split_case = f"{num_consonants} consonants (consonant group) between vowels{split_case_nasal_cons_tag}" + split_case = f"{num_consonants} consonants (consonant group) between vowels" else: syl_bound += 1 - split_case = f"{num_consonants} consonants between vowels{split_case_nasal_cons_tag}" + split_case = f"{num_consonants} consonants between vowels" return syl_bound, split_case From 70bc74e671703df468cb093f58cfbbcb7fce3996 Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Wed, 21 Feb 2024 09:07:28 -0500 Subject: [PATCH 8/9] Add type hint to num_consonants --- volpiano_display_utilities/latin_word_syllabification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/volpiano_display_utilities/latin_word_syllabification.py b/volpiano_display_utilities/latin_word_syllabification.py index 7ae4b40..ab9fdbd 100644 --- a/volpiano_display_utilities/latin_word_syllabification.py +++ b/volpiano_display_utilities/latin_word_syllabification.py @@ -249,7 +249,7 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: # If the first letter of the consonant sequence is a nasalized consonant, # we add it to the prior syllable and treat the remaining consonants # as if they were the only consonants between the vowel groups. - num_consonants = num_ltrs_btw_vow_grps + num_consonants: int = num_ltrs_btw_vow_grps if ltrs_btw_vow_grps[0] in _NASALIZED_CONSONANTS: syl_bound = 1 ltrs_btw_vow_grps = ltrs_btw_vow_grps[1:] From a29e090eb3ef2ac691effe66d4d499a5b9c6423c Mon Sep 17 00:00:00 2001 From: Dylan Hillerbrand Date: Wed, 21 Feb 2024 17:15:37 -0500 Subject: [PATCH 9/9] Add additional type hints to latin_word_syllabification.py --- volpiano_display_utilities/latin_word_syllabification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/volpiano_display_utilities/latin_word_syllabification.py b/volpiano_display_utilities/latin_word_syllabification.py index ab9fdbd..d27204a 100644 --- a/volpiano_display_utilities/latin_word_syllabification.py +++ b/volpiano_display_utilities/latin_word_syllabification.py @@ -251,7 +251,7 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: # as if they were the only consonants between the vowel groups. num_consonants: int = num_ltrs_btw_vow_grps if ltrs_btw_vow_grps[0] in _NASALIZED_CONSONANTS: - syl_bound = 1 + syl_bound: int = 1 ltrs_btw_vow_grps = ltrs_btw_vow_grps[1:] num_ltrs_btw_vow_grps -= 1 # If there is only one consonant remaining, we treat it as the only @@ -263,7 +263,7 @@ def _get_syl_bound_position(ltrs_btw_vow_grps: str) -> Tuple[int, str]: if num_ltrs_btw_vow_grps == 2: if ltrs_btw_vow_grps not in _CONSONANT_GROUPS: syl_bound += 1 - split_case = f"{num_consonants} consonants between vowels" + split_case: str = f"{num_consonants} consonants between vowels" else: split_case = f"{num_consonants} consonants (consonant group) between vowels" elif ltrs_btw_vow_grps == "str":