Skip to content

Commit

Permalink
namemerge: wip
Browse files Browse the repository at this point in the history
  • Loading branch information
unhammer committed Apr 26, 2023
1 parent 79019f8 commit b187465
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 2 deletions.
4 changes: 4 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ TARGETS_PREFIX2_FAST = \
$(PREFIX2).t3x.bin \
capstag.autoseq.bin \
capstag.rlx.bin \
merge-names.rlx.bin \
prefs/$(PREFIX2).xml

TARGETS_PREFIX2 = \
Expand Down Expand Up @@ -142,6 +143,9 @@ $(LANG1).autoseq.bin: $(BASENAME).$(LANG1).lsx
capstag.autoseq.bin: $(BASENAME).capstag.lsx
lsx-comp lr $< $@

merge-names.rlx.bin: merge-names.rlx
cg-comp $< $@

###############################################################################
## Bilingual dictionaries
###############################################################################
Expand Down
6 changes: 4 additions & 2 deletions apertium-nno-nob.nno-nob.dix
Original file line number Diff line number Diff line change
Expand Up @@ -273562,10 +273562,12 @@ eigen => egen, leite => lete, band => bånd, fram => frem, hemming => hemning, o
<e> <re>[A-ZÆØÅ]+[a-zæøå]*\-</re><i>basert</i><par n="adj"/></e>
<e> <re>[A-ZÆØÅ]+[a-zæøå]*\-</re><p><l>ulykke<s n="n"/><s n="f"/></l><r>ulykke</r></p><par n=":n_m_RL_f"/></e>
<e> <re>[0-9]+</re><p><l>-årig</l><r>-årig</r></p><par n="adj"/></e>

<!-- For generating forms that were "analysed" by CG -->
<e lm="np cgguess regex"><par n="letterdash+"/><par n="cgguess__np"/></e>

<!-- For generating forms that were "analysed" by merge-names.rlx: -->
<e lm="np cgguess regex"><par n="letterdash+"/><i><b/></i><par n="letterdash+"/><par n="cgguess__np"/></e>

<!-- For analysing name-compound-parts ending in dash (letting the second part be looked up in .dix, something CG can't do): -->
<!-- not using letter pardefs since that makes the final transducer huge -->
<e lm="np-guio guess regex"> <re>[A-ZÆØÅ][a-zæøå]+\-</re> <p><l><s n="np"/><s n="guess"/></l> <r><s n="np"/><s n="guess"/></r></p></e>
Expand Down
65 changes: 65 additions & 0 deletions merge-names.rlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
DELIMITERS = "." "?" sent ;

# Example input:
# "<Rolv>"
# "Rolv" np ant m Aa @subst→
# "<Ravlo>"
# "Ravlo" np cog Aa @app
# "<.>"
# "." sent clb aa
# "<Rolv>"
# "Rolv" np ant m Aa @subst→
# "<Molv>"
# "Molv" np ant m Aa @subst→
# "<Ravlo>"
# "Ravlo" np cog Aa @app
# "<.>"
# "." sent clb aa
#
# Expected output:
# "<Rolv Ravlo>"
# "Rolv Ravlo" np cog cgguess Aa @app
# "<.>"
# "." sent clb aa
# "<Rolv Molv Ravlo>"
# "Rolv Molv Ravlo" np cog cgguess Aa @app
# "<.>"
# "." sent clb aa




LIST wf = "<(.*)>"r ;
LIST bf = "([^<].*)"r ;
LIST fn = (np ant) ;
LIST ln = (np cog) ;
LIST aa = /^\([Aa]+\)$/r ;
LIST syn = /^\(@.*\)$/r ;

MERGECOHORTS:f+l ("<$1 $3>"v "$2 $4"v np cog cgguess VSTR:$5 VSTR:$6)
wf + bf + fn
WITH
(1 wf + bf + ln + aa + syn)
;

# TODO: The below looks like it should work, but doesn't, for some reason we need two rules:
# MERGECOHORTS:f+l+l ("<$1 $3 $5>"v "$2 $4 $6"v np cog cgguess VSTR:$7 VSTR:$8)
# wf + bf + fn
# WITH
# (1 wf + bf LINK 0 fn OR ln)
# (2 wf + bf + ln + aa + syn)
# ;

MERGECOHORTS:f+l+l ("<$1 $3 $5>"v "$2 $4 $6"v np cog cgguess VSTR:$7 VSTR:$8)
wf + bf + fn
WITH
(1 wf + bf + ln)
(2 wf + bf + ln + aa + syn)
;

MERGECOHORTS:f+f+l ("<$1 $3 $5>"v "$2 $4 $6"v np cog cgguess VSTR:$7 VSTR:$8)
wf + bf + fn
WITH
(1 wf + bf + fn)
(2 wf + bf + ln + aa + syn)
;
73 changes: 73 additions & 0 deletions modes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,79 @@
</pipeline>
</mode>

<mode name="foo" install="no" gendebug="yes">
<pipeline>
<program name="lt-proc -w -e">
<file name="nob-nno.automorf.bin"/>
</program>
<program name="cg-proc" debug-suff="seg">
<file name="nob-nno.seg.rlx.bin"/>
</program>
<program name="cg-proc">
<file name="nob-nno.rlx.bin"/>
</program>
<program name="cg-proc" debug-suff="capstag">
<file name="capstag.rlx.bin"/>
</program>
<program name="cg-proc" debug-suff="syntax">
<file name="nob-nno.syn.rlx.bin"/>
</program>
<program name="apertium-tagger -p -g $2">
<file name="nob-nno.prob"/>
</program>
<program name="cg-proc -n" debug-suff="namemerge">
<file name="merge-names.rlx.bin"/>
</program>
<program name="apertium-pretransfer"/>
<program name="lsx-proc -w">
<file name="nob-nno.autoseq.bin"/>
</program>
<program name="lsx-proc -w" debug-suff="capsautoseq">
<file name="capstag.autoseq.bin"/>
</program>
<program name="lt-proc -b">
<file name="nob-nno.autobil.bin"/>
</program>
<program name="cg-proc" debug-suff="biprefs">
<file name="nob-nno.biprefs.rlx.bin"/>
</program>
<program name="lrx-proc -m">
<file name="nob-nno.autolex.bin"/>
</program>
<program name="apertium-transfer -b" debug-suff="refsyn">
<file name="apertium-nno-nob.nob-nno.refsyn.t1x"/>
<file name="nob-nno.refsyn.t1x.bin"/>
</program>
<program name="apertium-transfer -b">
<file name="apertium-nno-nob.nob-nno.t1x"/>
<file name="nob-nno.t1x.bin"/>
</program>
<program name="apertium-interchunk">
<file name="apertium-nno-nob.nob-nno.t2x"/>
<file name="nob-nno.t2x.bin"/>
</program>
<program name="apertium-postchunk">
<file name="apertium-nno-nob.nob-nno.t3x"/>
<file name="nob-nno.t3x.bin"/>
</program>
<program name="lsx-proc -w" debug-suff="tlseq">
<file name="nno.autoseq.bin"/>
</program>
<program name="lt-proc $1 -b" debug-suff="dgen">
<file name="nob-nno.autogen.bin"/>
</program>
<program name="cg-proc" debug-suff="e_vi-genprefs">
<file name="nob-nno.e_vi_genprefs.rlx.bin"/>
</program>
<program name="cg-proc -1 -n -g" debug-suff="genprefs">
<file name="nob-nno.genprefs.rlx.bin"/>
</program>
<program name="lt-proc -p">
<file name="nob-nno.autopgen.bin"/>
</program>
</pipeline>
</mode>

<mode name="nob-nno_e-notagger" install="no" gendebug="no">
<!--
Due to https://github.com/apertium/apertium/issues/37 we
Expand Down

0 comments on commit b187465

Please sign in to comment.