diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d4bb2cb..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index d013aef..0000000 --- a/docs/conf.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# SASTA documentation build configuration file, created by -# sphinx-quickstart on Wed Apr 20 09:29:46 2022. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', - 'sphinx.ext.doctest'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'SASTA' -copyright = '2022, Jan Odijk, Jelte van Boheemen, Martin Kroon' -author = 'Jan Odijk, Jelte van Boheemen, Martin Kroon' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '' -# The full version, including alpha/beta/rc tags. -release = '' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -#html_theme = 'alabaster' -html_theme = 'classic' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - - -# -- Options for HTMLHelp output ------------------------------------------ - -# Output file base name for HTML help builder. -htmlhelp_basename = 'SASTAdoc' - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'SASTA.tex', 'SASTA Documentation', - 'Jan Odijk, Jelte van Boheemen, Martin Kroon', 'manual'), -] - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'sasta', 'SASTA Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'SASTA', 'SASTA Documentation', - author, 'SASTA', 'One line description of project.', - 'Miscellaneous'), -] diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 954237b..0000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/smallclausetable.csv b/docs/smallclausetable.csv deleted file mode 100644 index 74922ee..0000000 --- a/docs/smallclausetable.csv +++ /dev/null @@ -1,11 +0,0 @@ -Word 1,Word 1 additional,Word 2,Word 2 additional,Inserted verb,Example -demonstrative pronoun or known noun or personal pronoun.,,predicative adverb or adposition or adverb,,*moet* / *moeten*, -predicative adverb,,nominative personal pronoun,,*moet* / *moeten*, -demonstrative pronoun or known noun,,adjective,,*is* / *zijn*, -big locative adposition,,demonstrative pronoun or known noun or numeral,,*is* / *zijn*, -known noun,has genitive,different known noun,,*z'n*, -known noun,no genitive,different known noun,,*is* / *zijn*, -demonstrative pronoun or known noun or tswnoun,,infinitive,intransitive,*wil* / *willen*,mama slapen -demonstrative pronoun or known noun or tswnoun,human,infinitive,transitive,*wil* / *willen*,mama beertje pakken -demonstrative pronoun or known noun or tswnoun,human or animate,infinitive,pseudotransitive,*wil* / *willen*,mama eten -demonstrative pronoun or known noun or tswnoun,not( human or animate),infinitive,pseudotransitive,*ik wil* __,boekje lezen diff --git a/src/sastadev/ASTApostfunctions.py b/src/sastadev/ASTApostfunctions.py index bf5dbff..4619298 100644 --- a/src/sastadev/ASTApostfunctions.py +++ b/src/sastadev/ASTApostfunctions.py @@ -79,7 +79,7 @@ def neologisme(stree: SynTree) -> List[SynTree]: n]" or the special form "@n" applies. It uses the function *mdbasedquery* to achieve this. - .. autofunction:: ASTApostfunctions::mdbasedquery + .. autofunction:: sastadev.ASTApostfunctions::mdbasedquery ''' results1 = mdbasedquery(stree, errormarking, "['n']") @@ -93,7 +93,7 @@ def sempar(stree: SynTree) -> List[SynTree]: The function *sempar* identifies the nodes for which the CHAT error marking "[* s]" applies. It uses the function *mdbasedquery* to achieve this. - .. autofunction:: ASTApostfunctions::mdbasedquery + .. autofunction:: sastadev.ASTApostfunctions::mdbasedquery ''' @@ -106,7 +106,7 @@ def phonpar(stree: SynTree) -> List[SynTree]: The function *phonpar* identifies the nodes for which the CHAT error marking "[* p]" applies. It uses the function *mdbasedquery* to achieve this. - .. autofunction:: ASTApostfunctions::mdbasedquery + .. autofunction:: sastadev.ASTApostfunctions::mdbasedquery ''' results = mdbasedquery(stree, errormarking, "['p']") @@ -280,7 +280,7 @@ def getnounlemmas(allresults, _): *allresults* and the query identifier for nouns to obtain the lemmas for nouns. - .. autofunction:: ASTApostfunctions::getposlemmas + .. autofunction:: sastadev.ASTApostfunctions::getposlemmas ''' result = getposlemmas(allresults, nounreskey) @@ -293,7 +293,7 @@ def getlexlemmas(allresults, _): *allresults* and the query identifier for lexical verbs to obtain the lemmas for lexical verbs. - .. autofunction:: ASTApostfunctions::getposlemmas + .. autofunction:: sastadev.ASTApostfunctions::getposlemmas ''' result = getposlemmas(allresults, lexreskey) return result diff --git a/src/sastadev/Sziplus.py b/src/sastadev/Sziplus.py index 2871ea9..4a1dc8c 100644 --- a/src/sastadev/Sziplus.py +++ b/src/sastadev/Sziplus.py @@ -3,14 +3,14 @@ * Vr5+: through the function *vr5plus*: - .. autofunction:: Sziplus::vr5plus + .. autofunction:: sastadev.Sziplus::vr5plus In the meantime a different implementation using macros has replaced this function, so it has become obsolete * 6+: through the function *sziplus6*: - .. autofunction:: Sziplus::sziplus6 + .. autofunction:: sastadev.Sziplus::sziplus6 ''' @@ -56,7 +56,7 @@ def isindexnode(node: SynTree) -> bool: The function *noposcatin* is defined as follows: - .. autofunction:: Sziplus::noposcatin + .. autofunction:: sastadev.Sziplus::noposcatin * **Remark** The function *noposcatin* is better replaced by a function that checks for the absence of the attributes *cat* and *word*. @@ -70,7 +70,7 @@ def isvcinforppart(node: SynTree) -> bool: The function *isvcinforppart* determines whether a node is a node for a nonfinite verbal complement. That is the case if - * its category is one of *inf*, *teinf*, or *ppart* + * its category is one of *inf*, *teinf*, or *ppart* * its relation has the value *vc* ''' @@ -85,7 +85,7 @@ def isvcinforppart(node: SynTree) -> bool: def isrealnode(node: SynTree) -> bool: ''' - The fucntion *isrealnode* determines whether a nide is a real node, which it is if: + The function *isrealnode* determines whether a nide is a real node, which it is if: * it is not a node for an interpunction sign * it is not a nonfinite complement @@ -94,7 +94,7 @@ def isrealnode(node: SynTree) -> bool: The function *isindexnode* is defined as follows: - .. autofunction:: Sziplus::isindexnode + .. autofunction:: sastadev.Sziplus::isindexnode ''' pt = getattval(node, 'pt') rel = getattval(node, 'rel') @@ -134,11 +134,11 @@ def getnodecount(clause: SynTree) -> int: The function *isrealnode* is defined as follows: - .. autofunction:: Sziplus::isrealnode + .. autofunction:: sastadev.Sziplus::isrealnode The function *isvcinforppart* is defined as follows: - .. autofunction:: Sziplus::isvcinforppart + .. autofunction:: sastadev.Sziplus::isvcinforppart ''' nodectr = 0 @@ -157,11 +157,11 @@ def sziplus(syntree: SynTree, i: int) -> List[SynTree]: The function *sziplus* takes a SynTree *syntree* and an integer *i* and uses the function *nodeiplus* by applying it to *syntree*, *i*, and *clausequery*: - .. autodata:: Sziplus::clausequery + .. autodata:: sastadev.Sziplus::clausequery The function *nodeiplus* is defined as follows: - .. autofunction:: Sziplus::nodeiplus + .. autofunction:: sastadev.Sziplus::nodeiplus ''' results = nodeiplus(syntree, i, clausequery) @@ -178,11 +178,12 @@ def vr5plus(syntree: SynTree) -> List[SynTree]: The Xpath *vrquery* is defined as follows: - .. autodata:: Sziplus::vrquery + .. autodata:: sastadev.Sziplus::vrquery The function *nodeiplus* is defined as follows: - .. autofunction:: Sziplus::nodeiplus + .. autofunction:: sastadev.Sziplus::nodeiplus + ''' results = nodeiplus(syntree, 5, vrquery) return results @@ -198,7 +199,8 @@ def nodeiplus(syntree: SynTree, It makes use of the function *getnodecount*, which is defined as follows: - .. autofunction:: Sziplus::getnodecount + .. autofunction:: sastadev.Sziplus::getnodecount + ''' clauses = syntree.xpath(query) results = [] @@ -211,14 +213,12 @@ def nodeiplus(syntree: SynTree, def sziplus6(syntree: SynTree) -> List[SynTree]: ''' - The function *sziplus6* implements the TARSP language measure *6+*. It makes use of - the function *sziplus*, which is applied to the *syntree* in combination with the - integer *6*. + The function *sziplus6* implements the TARSP language measure *6+*. It makes use of the function *sziplus*, which is applied to the *syntree* in combination with the integer *6*. + + .. autofunction:: sastadev.Sziplus::sziplus - .. autofunction:: Sziplus::sziplus + * **Remark** The function *sziplus* was written in an early stage, but probably can now be better rewritten as a composed language measure. - * **Remark** The function *sziplus* was written in an early stage, but probably can now be better - rewritten as a composed language measure. ''' results = sziplus(syntree, 6) diff --git a/src/sastadev/TARSPpostfunctions.py b/src/sastadev/TARSPpostfunctions.py index f5baaee..ccc7ddc 100644 --- a/src/sastadev/TARSPpostfunctions.py +++ b/src/sastadev/TARSPpostfunctions.py @@ -37,11 +37,11 @@ def getqueriesbystage(queries: QueryDict) -> Dict[Stage, List[QId]]: It selects those QIds for which the query's (lower cased) subcategory is contained in the constant *tarsp_clausetypes*: - .. autodata:: TARSPpostfunctions::tarsp_clausetypes + .. autodata:: sastadev.TARSPpostfunctions::tarsp_clausetypes and which is not included in the list of *excludedqids*: - .. autodata:: TARSPpostfunctions::excludedqids + .. autodata:: sastadev.TARSPpostfunctions::excludedqids if these conditions are met, the QId is appended to the dictionary item with key equal to the stage of the query associated with QId. @@ -110,7 +110,7 @@ def getuttcountsbystage(queriesbystage: Dict[Stage, List[QId]], allresults: AllR number of utterances marked in *allresults.coreresults*. For this it uses the function *countutts*: - .. autofunction:: TARSPpostfunctions::countutts + .. autofunction:: sastadev.TARSPpostfunctions::countutts ''' uttcounts = {} @@ -130,7 +130,7 @@ def getstage(uttcounts: Dict[Stage, int], allresults: AllResults) -> Stage: The stage is taken into consideration if its number of scores divided by *gtotaal* is greater or equal to the value of *gofase_minthreshold*: - .. autodata:: TARSPpostfunctions::gofase_minthreshold + .. autodata:: sastadev.TARSPpostfunctions::gofase_minthreshold From the remaining candidates the highest stage value is selected. ''' @@ -157,17 +157,17 @@ def gofase(allresults: AllResults, thequeries: QueryDict) -> Stage: It first obtains *queriesbystage*, a dictionary of Stage, List[QId] items, via the function *getqueriesbystage* applied to *thequeries*: - .. autofunction:: TARSPpostfunctions::getqueriesbystage + .. autofunction:: sastadev.TARSPpostfunctions::getqueriesbystage Next, it obtains *uttcounts*, a dictionary of Stage, int items by applying the function *getuttcountsbystage* to *queriesbystage* and *allresults*: - .. autofunction:: TARSPpostfunctions::getuttcountsbystage + .. autofunction:: sastadev.TARSPpostfunctions::getuttcountsbystage Finally, it obtains the stage by applying the function *getstage* to *uttcounts* and *allresults*: - .. autofunction:: TARSPpostfunctions::getstage + .. autofunction:: sastadev.TARSPpostfunctions::getstage and then it returns the obtained *stage*. ''' @@ -256,7 +256,7 @@ def pf(allresults: AllResults, allqueries: QueryDict) -> int: The *'Profielscore's* per stage are computed by *pf2* through *pf7*, each of which uses the function *genpfi*: - .. autofunction:: TARSPpostfunctions::genpfi + .. autofunction:: sastadev.TARSPpostfunctions::genpfi ''' postresults = allresults.postresults diff --git a/src/sastadev/alpino.py b/src/sastadev/alpino.py index 23fa61c..ff09278 100644 --- a/src/sastadev/alpino.py +++ b/src/sastadev/alpino.py @@ -4,7 +4,7 @@ * obtaining grammatical information from nouns in an Alpino parse - .. autofunction:: alpino::getdehetwordinfo + .. autofunction:: sastadev.alpino::getdehetwordinfo ''' #from __future__ import annotations @@ -55,7 +55,7 @@ def getdehetwordinfo(wrd: str) -> Tuple[List[WordInfo], str]: It returns a tuple consisting of a list of WordInfo objects and a string indicating the source where the properties have been found. - .. autofunction:: alpino::getalpinowordinfo + .. autofunction:: sastadev.alpino::getalpinowordinfo ''' wordinfos = lexicon.getwordinfo(wrd) diff --git a/src/sastadev/alpinoparsing.py b/src/sastadev/alpinoparsing.py index 17c1dc2..ddbce0f 100644 --- a/src/sastadev/alpinoparsing.py +++ b/src/sastadev/alpinoparsing.py @@ -3,11 +3,11 @@ * parsing: -.. autofunction:: alpinoparsing::parse +.. autofunction:: sastadev.alpinoparsing::parse * previewing a parse tree: -.. autofunction:: alpinoparsing::previewurl +.. autofunction:: sastadev.alpinoparsing::previewurl ''' diff --git a/src/sastadev/asta_queries.py b/src/sastadev/asta_queries.py index 78fd97a..7168aac 100644 --- a/src/sastadev/asta_queries.py +++ b/src/sastadev/asta_queries.py @@ -89,7 +89,7 @@ def asta_noun( The *expanded_noun_path* is the expansion of the macro **noun_path**. - .. autofunction:: treebankfunctions::asta_recognised_nounnode + .. autofunction:: sastadev.treebankfunctions::asta_recognised_nounnode ''' results = asta_x(stree, expanded_noun_xpath, asta_recognised_nounnode) @@ -102,7 +102,7 @@ def asta_lex(stree: SynTree) -> List[SynTree]: The *expanded_lex_path* is the expansion of the macro **lex_path**. - .. autofunction:: treebankfunctions::asta_recognised_lexnode + .. autofunction:: sastadev.treebankfunctions::asta_recognised_lexnode :noindex: ''' @@ -121,7 +121,7 @@ def asta_lemma(stree: SynTree) -> List[SynTree]: *lemma_path* and the function *asta_recognised_lexicalnode*. - .. autofunction:: treebankfunctions::asta_recognised_lexicalnode + .. autofunction:: sastadev.treebankfunctions::asta_recognised_lexicalnode :noindex: ''' @@ -322,7 +322,7 @@ def asta_bijzin(stree: SynTree) -> List[SynTree]: results, so the repetitions are removed from the results by means of the function *removerepetitions*: - .. autofunction:: asta_queries::removerepetitions + .. autofunction:: sastadev.asta_queries::removerepetitions ''' theyield = getyield(stree) clausenodes = stree.xpath(astabijzinquery) diff --git a/src/sastadev/basicreplacements.py b/src/sastadev/basicreplacements.py index c50d167..e0121a3 100644 --- a/src/sastadev/basicreplacements.py +++ b/src/sastadev/basicreplacements.py @@ -75,7 +75,7 @@ #: for variants of *er* (*der*, *d'r*) plus an adposition. The list is generated by using #: the constant *Rvzlist* #: -#: .. autodata:: basicreplacements::Rvzlist +#: .. autodata:: sastadev.basicreplacements::Rvzlist #: :no-value: #: ervzvariants: List[BasicReplacement] = \ @@ -120,16 +120,16 @@ #: instead of *ervan* for all adpositions that allow an R-pronoun. It derives these by #: using the constant *ervzvariants* #: -#: .. autodata:: basicreplacements::ervzvariants +#: .. autodata:: sastadev.basicreplacements::ervzvariants #: :no-value: #: #: It obtains replacements for short prepositions with 'e' or 'u' attached (e.g. *inne*, *innu* for *in*) from the constants #: *innereplacements* and *innureplacements*. #: -#: .. autodata:: basicreplacements::innereplacements +#: .. autodata:: sastadev.basicreplacements::innereplacements #: :no-value: #: -#: .. autodata:: basicreplacements::innureplacements +#: .. autodata:: sastadev.basicreplacements::innureplacements #: :no-value: #: basicreplacementlist: List[BasicReplacement] = [('as', 'als', pron, infpron, codared, dp), @@ -243,7 +243,7 @@ #: 5-tuples containing the correct orthography, 3 strings to encode metadata, and an integer for the associated penalty. The #: dictionary is derived from the constant *basicreplacementlist* #: -#: .. autodata:: basicreplacements::basicreplacementlist +#: .. autodata:: sastadev.basicreplacements::basicreplacementlist #: :no-value: #: basicreplacements: Dict[str, List[Tuple[List[str], str, str, str, int]]] = defaultdict(list) @@ -289,10 +289,10 @@ #: * penalty for this modification #: #: * the constant *closesyllshortprepexpansions*: -#: * .. autodata:: basicreplacements::closesyllshortprepexpansions +#: * .. autodata:: sastadev.basicreplacements::closesyllshortprepexpansions #: #: * the constant *innuclosesyllshortprepexpansions*: -#: * .. autodata:: basicreplacements::innuclosesyllshortprepexpansions +#: * .. autodata:: sastadev.basicreplacements::innuclosesyllshortprepexpansions #: basicexpansionlist: List[BasicExpansion] = \ [('dis', ['dit', 'is'], pron, infpron, contract, dp), @@ -328,7 +328,7 @@ #: containing its expansion as a list of words, plus 3 strings to encode metadata. The #: dictionary is derived from the constant *basicexpansionlist*: #: -#: .. autodata:: basicreplacements::basicexpansionlist +#: .. autodata:: sastadev.basicreplacements::basicexpansionlist #: :no-value: #: basicexpansions: Dict[str, List[Tuple[List[str], str, str, str, int]]] = defaultdict(list) @@ -419,7 +419,7 @@ def getdisambiguationdict() -> Dict[str, Tuple[TokenTreePredicate, str]]: The function *getdisambiguationdict* creates a dictionary with word:(cond, replacement) items. It selects its content from the constant *disambiguation_replacements*: - .. autodata:: basicreplacements::disambiguation_replacements + .. autodata:: sastadev.basicreplacements::disambiguation_replacements :no-value: ''' disambiguationdict = {} diff --git a/src/sastadev/cleanCHILDEStokens.py b/src/sastadev/cleanCHILDEStokens.py index 2ff751f..a8696ae 100644 --- a/src/sastadev/cleanCHILDEStokens.py +++ b/src/sastadev/cleanCHILDEStokens.py @@ -2,7 +2,7 @@ The module cleanCHILDEStokens provides the function cleantext to tokenize and clean utterances, yielding a cleaned text and associated metadata. -* ..autofunction:: cleantext +* ..autofunction:: sastadev.cleantext ''' import re diff --git a/src/sastadev/corrector.py b/src/sastadev/corrector.py index 63da09a..96b52b9 100644 --- a/src/sastadev/corrector.py +++ b/src/sastadev/corrector.py @@ -69,7 +69,7 @@ #: different word to avoid unwanted readings of the original word. It is filled by a #: call to the function *getdisambiguationdict* from the module *basicreplacements*. #: -#: .. autofunction:: basicreplacements::getdisambiguationdict +#: .. autofunction:: sastadev.basicreplacements::getdisambiguationdict #: disambiguationdict = getdisambiguationdict() @@ -729,7 +729,7 @@ def OLDgetexpansions(uttmd: TokenListMD) -> List[TokenListMD]: It checks whether a word is a contraction by checking whether it occurs in the dictionary *basicexpansions* from the module *basicreplacements* - .. autodata:: basicreplacements::basicexpansions + .. autodata:: sastadev.basicreplacements::basicexpansions :no-value: ''' @@ -841,14 +841,14 @@ def getexpansions2(tokenlist: List[Token], intokenposlist: List[int]) -> List[Tu It applies the function *getsingleitemexpansions* to the head (first) element of the *tokenlist* and recursively applies itself to the tail of *intokenlist*, after which it combines the results by the *combine* function. - .. autofunction:: corrector::getsingleitemexpansions + .. autofunction:: sastadev.corrector::getsingleitemexpansions - .. autofunction:: corrector::combine + .. autofunction:: sastadev.corrector::combine It checks whether a word is a contraction by checking whether it occurs in the dictionary *basicexpansions* from the module *basicreplacements* - .. autodata:: basicreplacements::basicexpansions + .. autodata:: sastadev.basicreplacements::basicexpansions :no-value: ''' @@ -884,7 +884,7 @@ def getexpansions(uttmd: TokenListMD) -> List[TokenListMD]: It does so by a call to the function *getexpansions2*, which recursively generates all alternatives with expansions: - .. autofunction:: corrector::getexpansions2 + .. autofunction:: sastadev.corrector::getexpansions2 ''' newtokenmds = [] @@ -1014,7 +1014,7 @@ def initdevoicing(token: Token, voiceless: str, voiced: str, newtokenmds: List[T A known word is *special* if it is contained in the variable *specialdevoicingwords*. - .. autodata:: corrector::specialdevoicingwords + .. autodata:: sastadev.corrector::specialdevoicingwords ''' # initial s -> z, f -> v @@ -1450,7 +1450,7 @@ def gaatie(word: str) -> List[str]: *gaatie*) by a sequence of two words where the first word equals word[:-2] ( *gaat*) and is a known word and the second word equals word[-2:] (*ie*). - .. autodata:: corrector::gaatiepattern + .. autodata:: sastadev.corrector::gaatiepattern ''' results = [] # kan-ie, moet-ie, gaat-ie, wil-ie @@ -1475,7 +1475,7 @@ def oldgaatie(word: str) -> List[str]: *gaatie*) by a sequence of two words where the first word equals word[:-2] ( *gaat*) and is a known word and the second word equals word[-2:] (*ie*). - .. autodata:: corrector::gaatiepattern + .. autodata:: sastadev.corrector::gaatiepattern ''' results = [] if gaatiere.match(word): @@ -1502,12 +1502,12 @@ def old_getwrongdetalternatives(tokensmd: TokenListMD, tree: SynTree, uttid: Utt belongs to words that would lead to wrong corrections, as specified in the constant *wrongdet_excluded_words*: - .. autodata:: corrector::wrongdet_excluded_words + .. autodata:: sastadev.corrector::wrongdet_excluded_words The properties of the token following are determined by the function *getdehetwordinfo* from the module *alpino*: - .. autofunction:: alpino::getdehetwordinfo + .. autofunction:: sastadev.alpino::getdehetwordinfo ''' correctiondone = False tokens = tokensmd.tokens @@ -1570,12 +1570,12 @@ def getwrongdetalternatives(tokensmd: TokenListMD, tree: SynTree, uttid: UttId) belongs to words that would lead to wrong corrections, as specified in the constant *wrongdet_excluded_words*: - .. autodata:: corrector::wrongdet_excluded_words + .. autodata:: sastadev.corrector::wrongdet_excluded_words The properties of the token following are determined by the function *getdehetwordinfo* from the module *alpino*: - .. autofunction:: alpino::getdehetwordinfo + .. autofunction:: sastadev.alpino::getdehetwordinfo ''' correctiondone = False tokens = tokensmd.tokens diff --git a/src/sastadev/dedup.py b/src/sastadev/dedup.py index a43e3ae..8a8d1c3 100644 --- a/src/sastadev/dedup.py +++ b/src/sastadev/dedup.py @@ -187,7 +187,7 @@ def incompletetreeleaves(stree: SynTree) -> List[SynTree]: The function *incompletetreeleaves returns a list of all nodes for words that are part of an incomplete sentence. A sentence is incomplete if it matches a query from the list of queries in the variable *incompletexpaths*. - .. autodata:: dedup::incompletexpaths + .. autodata:: sastadev.dedup::incompletexpaths ''' results = [] @@ -221,7 +221,7 @@ def isfilledpausenort(nort: Nort) -> bool: ''' The function *isfilledpausenort* returns the result of the function *isfilledpause* applied to the *word* of *nort*. - * .. autofunction:; dedup::isfilledpause + * .. autofunction:: sastadev.dedup::isfilledpause ''' theword = getword(nort) @@ -234,7 +234,7 @@ def getfilledpauses(nortlist: List[Nort]) -> List[Nort]: The function *getfilledpauses returns Norts that are in nortlist for which the function *isfilledpausenort* yields True. - * .. autofunction:: dedup::isfilledpausenort + * .. autofunction:: sastadev.dedup::isfilledpausenort ''' resultlist = [tok for tok in nortlist if isfilledpausenort(tok)] @@ -440,7 +440,7 @@ def normalisestring(str1: str) -> str: The function *normalisestring* carries out normalisation by means of the function *phoneticise* from the *phonetics* module: - .. autofunction:: phonetics::phoneticise + .. autofunction:: sastadev.phonetics::phoneticise ''' result = phoneticise(str1) @@ -456,7 +456,7 @@ def isnortduplicate(tlist1: List[Nort], tlist2: List[Nort]) -> bool: Normalisation is carried out to be robust against certain spelling variations and is taken care of by the function *normalisestring*: - * .. autofunction:: dedup::normalisestring + * .. autofunction:: sastadev.dedup::normalisestring ''' result = True @@ -558,7 +558,7 @@ def mlux(stree: SynTree) -> List[SynTree]: *mlux2* to *stree*. The latter function returns a node list and metadata on the excluded word nodes. - .. autofunction:: dedup::mlux2 + .. autofunction:: sastadev.dedup::mlux2 ''' result, _ = mlux2(stree) @@ -587,25 +587,25 @@ def mlux2(stree: SynTree) -> Tuple[List[SynTree], DupInfo]: * it updates these variables for nodes for filledpauses as found in the :ref:`filledpauseslexicon` * it updates these variables for nodes for words and word sequences that are duplicated, using the function *find_simpleduplicates2*: - * .. autofunction:: dedup::find_simpleduplicates2 + * .. autofunction:: sastadev.dedup::find_simpleduplicates2 * it updates these variables for nodes for word sequences that are duplicated, using the function *find_duplicates2*: - * .. autofunction:: dedup::find_duplicates2 + * .. autofunction:: sastadev.dedup::find_duplicates2 * it updates these variables for nodes for words the prefix of which is a repetition of its successor, where the prefix is larger than 50% of the length of its successor (long duplications). It does so by means of the function *getprefixwords2*: - * .. autofunction:: dedup::getprefixwords2 + * .. autofunction::sastadev.dedup::getprefixwords2 * it updates these variables for nodes for unknown words that are a substring of their successor. It does so using the function *find_substringduplicates2*: - * .. autofunction:: dedup::find_substringduplicates2 + * .. autofunction:: sastadev.dedup::find_substringduplicates2 * it updates these variables for nodes for words that consist of consonants only * it updates these variables for nodes for words in incomplete sentences. Determining the incompleteness of a sentence is very difficult and is so far only done for a limited number of sentence types. It uses the function *incompletetreeleaves* for this purpose: - * .. autofunction:: dedup::incompletetreeleaves + * .. autofunction:: sastadev.dedup::incompletetreeleaves The function does not (yet) deal with: * false starts, e.g. word + *nee* / *eh* word; w of pos1 w of pos1 @@ -870,7 +870,7 @@ def getunwantedtokens(nortlist: List[Nort]) -> List[Nort]: The function *getunwantedtokens* returns nodes for tokens that are to be discarded for sample size as defined in the constant *unwantedtokenlist* - * .. autodata:: dedup::unwantedtokenlist + * .. autodata:: sastadev.dedup::unwantedtokenlist ''' results = [] @@ -886,7 +886,7 @@ def samplesize(stree: SynTree) -> List[SynTree]: The function *samplesize* yields the tokens to be excluded from the samplesize. It does so by applying the function *samplesize2* and ignoring the DupInfo object that is returned in the tuple. - .. autofunction:: dedup::samplesize2 + .. autofunction:: sastadev.dedup::samplesize2 ''' result, _ = samplesize2(stree) @@ -912,17 +912,17 @@ def samplesize2(stree: SynTree) -> Tuple[List[SynTree], DupInfo]: * The function first adds nodes to the *resultlist* that have been found by the *reduce* function in the correction module and that are represented in the metadata. * It next adds nodes for symbols that should be discarded. It obtains these nodes via the function *getunwantedtokens* - * .. autofunction:: dedup::getunwantedtokens + * .. autofunction:: sastadev.dedup::getunwantedtokens * It adds nodes for interjections and filledpauses to the resultlist via the function *getfilledpauses* - * .. autofunction:: dedup::getfilledpauses + * .. autofunction:: sastadev.dedup::getfilledpauses * It adds duplicates of the words *ja*, *nee*, *nou* * It adds short repetitions in the tokenlist with *ja*, *nee*, *nou* removed by applying the function *getprefixwords2* - * .. autofunction:: dedup::getprefixwords2 + * .. autofunction:: sastadev.dedup::getprefixwords2 ''' diff --git a/src/sastadev/deregularise.py b/src/sastadev/deregularise.py index 08509aa..fbed9bb 100644 --- a/src/sastadev/deregularise.py +++ b/src/sastadev/deregularise.py @@ -8,19 +8,19 @@ * functions for generating a list of overgeneralised forms, with their corrections and a characterisation of the error made. The main function for this is *makeparadigm*. Generating these forms and string them in a file is done by the module *update_inflectioncorrection* - .. autofunction:: deregularise::makeparadigm + .. autofunction:: sastadev.deregularise::makeparadigm * functions for finding the correct form for a wrongly inflected verb. The main function for this is *correctinflection*: - .. autofunction:: deregularise::correctinflection + .. autofunction:: sastadev.deregularise::correctinflection The module initialises the dictionary *correction* by reading in the file with the name contained in the constant *correctionfullname* - .. autodata:: deregularise::correctionfullname + .. autodata:: sastadev.deregularise::correctionfullname which uses the constsnt *correctionfilename* - .. autodata:: deregularise::correctionfilename + .. autodata:: sastadev.deregularise::correctionfilename **Remark** The function does not work perfectly yet for certain past participles ( @@ -105,10 +105,10 @@ def correctinflection(word: str) -> List[Tuple[str, str]]: metadata) for (wrongly inflected) *word*. It does so by calling the function *getcorrections* applied to *word* and the dictionary *correction*. - .. autodata:: deregularise::correction + .. autodata:: sastadev.deregularise::correction :annotation: - .. autofunction:: deregularise::getcorrections + .. autofunction:: sastadev.deregularise::getcorrections ''' @@ -430,7 +430,7 @@ def getcorrections(thestr: str, correction: Dict[str, Tuple[str, str]]) -> List[ * Otherwise, it splits the word up into a separable prefix, a nonseparable prefix, and a base by means of the function *desep*. * If the function *desep* returns an empty list, *getcorrections* also returns an empty list. - .. autofunction:: deregularise::desep + .. autofunction:: sastadev.deregularise::desep **Remark** The prefix *ge* is incorrectly added to verbs with inseparable prefixes, e.g. *vervald* is incorrectly mapped on *vergevallen* (instead of on diff --git a/docs/Tarsp.rst b/src/sastadev/docs/Tarsp.rst similarity index 99% rename from docs/Tarsp.rst rename to src/sastadev/docs/Tarsp.rst index 23da796..9aaa409 100644 --- a/docs/Tarsp.rst +++ b/src/sastadev/docs/Tarsp.rst @@ -372,7 +372,7 @@ T003: 6+ sziplus6 -.. autofunction:: Sziplus::sziplus6 +.. autofunction:: sastadev.Sziplus::sziplus6 * **Schlichting**: "6 Zinsdelen of meer in een zin" @@ -3493,7 +3493,7 @@ T106: Vo/bij voslashbij -.. autofunction:: queryfunctions::voslashbij +.. autofunction:: sastadev.queryfunctions::voslashbij * **Schlichting**: "Voornaamwoordelijk bijwoord, gesplitst. Het gesplitste voornaamwoordelijk bijwoord behoeft niet gescoord te worden bij Vobij, kolom Voornaamwoorden in Fase IV, alleen hier bij de Woordgroepen in Fase V." @@ -3518,7 +3518,7 @@ T107: Vobij vobij -.. autofunction:: queryfunctions::vobij +.. autofunction:: sastadev.queryfunctions::vobij @@ -3932,7 +3932,7 @@ T121: W(X) It has been implemented by means of a Python function **wx**. -.. autofunction:: imperatives::wx +.. autofunction:: sastadev.imperatives::wx In a later stage a query with macros was defined for it, but the Python function has not been replaced yet. @@ -4374,7 +4374,7 @@ T138: X en X (en X) xenx -.. autofunction:: xenx::xenx +.. autofunction:: sastadev.xenx::xenx * **Schlichting**: "Dit zijn twee woorden van dezelfde woordsoort, verbonden door 'en', die samen één Zinsdeel vormen, soms worden drie elementen verbonden. Ook woordgroepen kunnen op een dergelijke wijze verbonden worden. Behalve door 'en' kunnen de woorden of woordgroepen ook verbonden worden door 'of'." @@ -4669,7 +4669,7 @@ T151: V.U. Totaal vutotaal -.. autofunction:: TARSPpostfunctions::vutotaal +.. autofunction:: sastadev.TARSPpostfunctions::vutotaal * **Schlichting**: "Hier noteren we het totaal aantal Sociale en Stereotype Uitdrukkingen" @@ -4692,7 +4692,7 @@ T152: G Totaal gtotaal -.. autofunction:: TARSPpostfunctions::gtotaal +.. autofunction:: sastadev.TARSPpostfunctions::gtotaal @@ -4721,7 +4721,7 @@ T153: G.O Fase gofase -.. autofunction:: TARSPpostfunctions::gofase +.. autofunction:: sastadev.TARSPpostfunctions::gofase * **Schlichting**: Eerst wordt *G totaal* berekend. Zie hiervoor :ref:`G_Totaal`. "We berekenen hoeveel Zinsconstructies per Fase zijn gescoord. We letten hiervoor uitsluitend op de vakken van de Zinsconstructies: Mededelende zin, Vraag, en Gebiedende wijs. Per Fase noteren we het aantal Zinsconstructies in de eerste kolom bóven de Fase-aanduiding (niet meegeteld worden Intonatie en Koppelwerkwoord in Fase II, en 'hè' en Inversie in Fase III). Bij een bepaald percentage Zinsconstructies kunnen we zeggen dat het kind zich in die Fase aan het ontwikkelen is. De regel is dat 5% van de Analyse-eenheden van het taalsample in een Fase bij de Zinsconstructies gescoord moet zijn om het kind in die Fase te kunnen plaatsen. @@ -4746,7 +4746,7 @@ T154: PFII pf2 -.. autofunction:: TARSPpostfunctions::pf2 +.. autofunction:: sastadev.TARSPpostfunctions::pf2 @@ -4771,7 +4771,7 @@ T155: PFIII pf3 -.. autofunction:: TARSPpostfunctions::pf3 +.. autofunction:: sastadev.TARSPpostfunctions::pf3 * **Schlichting**: "Profielscore voor Fase III" @@ -4840,7 +4840,7 @@ T158: PFIV pf4 -.. autofunction:: TARSPpostfunctions::pf4 +.. autofunction:: sastadev.TARSPpostfunctions::pf4 * **Schlichting**: "Profielscore voor Fase IV" @@ -4864,7 +4864,7 @@ T159: PFV pf5 -.. autofunction:: TARSPpostfunctions::pf5 +.. autofunction:: sastadev.TARSPpostfunctions::pf5 @@ -4890,7 +4890,7 @@ T160: PFVI pf6 -.. autofunction:: TARSPpostfunctions::pf6 +.. autofunction:: sastadev.TARSPpostfunctions::pf6 * **Schlichting**: "Profielscore voor Fase VI" @@ -4914,7 +4914,7 @@ T161: PFVII pf7 -.. autofunction:: TARSPpostfunctions::pf7 +.. autofunction:: sastadev.TARSPpostfunctions::pf7 * **Schlichting**: "Profielscore voor Fase VII" @@ -4939,7 +4939,7 @@ T162: PF pf -.. autofunction:: TARSPpostfunctions::pf +.. autofunction:: sastadev.TARSPpostfunctions::pf * **Schlichting**: " Profielscore. Een tweede mogelijkheid om twee samples meer in detail te vergelijken is de Profielscore (PF). De Profielscore is het totaal aantal constructies waarbij een of meermalen gescoord is. Bij het vergelijken van twee taalsamples door middel van de Profielscore moeten de samples eenzelfde aantal Analyse-eenheden bevatten. De Fase I constructies worden voor de Profielscore niet meegeteld". Ook de constructies met twee ** tellen niet mee (Bijzin zonder verbindingswoord en de Vraagwoordconstructie zonder vraagwoord: ((Vr)WOnd+). *OndVC* telt men, indien deze niet in het taalsample voorkomt, wel mee als tenminste bij *OndWVC* of *OndWBVC* gescoord is. *XNeg* mag meegeteld worden indien een langere uiting met 'niet' gescoord is. Hetzelfde principe geldt voor *OndB*, *VCW* en *BX*. De vraagintonatie 'Into' mag meegeteld worden indien ergens anders in de vraagkolom gescoord is. In het meest linkse vak op de profielkaart staat het aantal constructies vermeld dat in een Fase ontwikkeld wordt. Hier kan men het gescoorde aantal constructies per Fase noteren. De Profielscore wordt genoteerd naast de G.O. Fase, bij PF. De maximale Profielscore is 100. diff --git a/docs/alpinoparser.rst b/src/sastadev/docs/alpinoparser.rst similarity index 100% rename from docs/alpinoparser.rst rename to src/sastadev/docs/alpinoparser.rst diff --git a/docs/asta.rst b/src/sastadev/docs/asta.rst similarity index 100% rename from docs/asta.rst rename to src/sastadev/docs/asta.rst diff --git a/docs/auxiliarymodules.rst b/src/sastadev/docs/auxiliarymodules.rst similarity index 100% rename from docs/auxiliarymodules.rst rename to src/sastadev/docs/auxiliarymodules.rst diff --git a/docs/chatannotations.rst b/src/sastadev/docs/chatannotations.rst similarity index 100% rename from docs/chatannotations.rst rename to src/sastadev/docs/chatannotations.rst diff --git a/docs/deviantlanguage.rst b/src/sastadev/docs/deviantlanguage.rst similarity index 94% rename from docs/deviantlanguage.rst rename to src/sastadev/docs/deviantlanguage.rst index eb04736..6e4cbb6 100644 --- a/docs/deviantlanguage.rst +++ b/src/sastadev/docs/deviantlanguage.rst @@ -28,15 +28,15 @@ We first give a global overview of the strategy followed, and illustrate it with A corrected treebank is generated by a call in *sastadev* to the function *correcttreebank* in the module *correcttreebank*: -.. autofunction:: correcttreebank::correcttreebank +.. autofunction:: sastadev.correcttreebank::correcttreebank Each syntactic structure in the input treebank is corrected by the function *correct_stree* in the module *correcttreebank*. -.. autofunction:: correcttreebank::correct_stree +.. autofunction:: sastadev.correcttreebank::correct_stree Since in the corrected utterance multiple words may have to be inserted or removed, and since the linear order in Alpino syntactic structures is indicated by means of *begin* and *end* attributes (see :ref:`alpinoparser`), SASTA uses "inflated" syntactic structures, i.e syntactic structures in which the *begin* attribute of the first original word has the value *'10'*, and the *begin* attribute of the next word has the value *'20'*. The values of the *end* attribute are equal to str(int(begin) + 1 ), as usual. This enables one to delete and insert nodes for words without having to adapt the *begin* and *end* values of each node. Inflating a syntactic structure in this way is done by the function *treeinflate* in the module treebankfunctions. -.. autofunction:: treebankfunctions::treeinflate +.. autofunction:: sastadev.treebankfunctions::treeinflate Various Types of Deviant Language --------------------------------- @@ -72,14 +72,14 @@ Basic Replacements For an utterance that contains a word that occurs with a deviant spelling (e.g. *as* instead of *als*) an alternative variant is generated containing the correct spelling ("als") and with metadata about this replacement. This is done in function *getalternativetokenmds* in the module *corrector* by checking whether the word occurs in the dictionary *basicreplacements*: -.. autodata:: basicreplacements::basicreplacements +.. autodata:: sastadev.basicreplacements::basicreplacements :no-value: Basic Expansions """""""""""""""" For an utterance that contains a contracted word (e.g. *das* instead of *dat is*) an alternative variant is generated containing the expansion ("dat is") and with metadata about this replacement. This is done in function *getalternatives* in the module *corrector* by applying the function *getexpansions*: -.. autofunction:: corrector::getexpansions +.. autofunction:: sastadev.corrector::getexpansions Verb form + *ie* written as one word @@ -87,13 +87,13 @@ Verb form + *ie* written as one word Example: *gaatie* / *gaat ie* (goes he): probably written as one word because it is pronounced as one word. This is dealt with by the *gaatie* function in the corrector module: - * .. autofunction:: corrector::gaatie + * .. autofunction:: sastadev.corrector::gaatie Dehyphenation """"""""""""" * *zie-ken-huis* / *ziekenhuis* (hospital): syllables pronounced separately (used mostly in ASTA). This is dealt with in the function *getalternativetokenmds* in the module corrector by a call to the function *fullworddehyphenate*: - * .. autofunction:: stringfunctions::fullworddehyphenate + * .. autofunction:: sastadev.stringfunctions::fullworddehyphenate (Regional) informal spoken language @@ -104,7 +104,7 @@ Dehyphenation *ie*-diminutives """""""""""""""" -.. automodule:: iedims +.. automodule:: sastadev.iedims @@ -125,7 +125,7 @@ Initial Devoicing of fricatives The voiced fricative consonants /v/ and /z/) are often pronounced voiceless, and this is often reflected in the transcript. Examples are *sit* instead of *zit*, *fan* instead of *van*, etc. An alternative with a voiced initial fricative is generated by the function *initdevoicing* from the module *corrector*. This function is called both for /f/-/v/ and for /s/-/z/: -.. autofunction:: corrector::initdevoicing +.. autofunction:: sastadev.corrector::initdevoicing Wrong pronunciation of content words in the transcript """""""""""""""""""""""""""""""""""""""""""""""""""""" @@ -147,7 +147,7 @@ Determiner agreement errors Determiner Agreement errors (e.g. *de huis* instead of *het huis*) are dealt with by the function *getwrongdetalternatives* from the module *corrector*. -.. autofunction:: corrector::getwrongdetalternatives +.. autofunction:: sastdev.corrector::getwrongdetalternatives @@ -168,7 +168,7 @@ SASTA currently only covers overgeneralisations of verbs. The matter is not comp This is dealt with in the function *getalternativetokenmds* of the module *corrector* by a call to the function *correctinflection* of the module *deregularise* applied to a word that is unknown. -.. autofunction:: deregularise::correctinflection +.. autofunction:: sastadev.deregularise::correctinflection For more details, see the module :ref:`deregularise`. @@ -186,7 +186,7 @@ In order to prevent an analysis with the less plausible reading by Alpino we rep The replacement is carried out in the function *getalternativetokenmds* of module *corrector* by checking whether the word occurs in the *disambiguationdict* and replacing it by the replacement specified there. -.. autodata:: corrector::disambiguationdict +.. autodata:: sastadev.corrector::disambiguationdict :no-value: @@ -200,7 +200,7 @@ Limitations of Alpino Small clauses """"""""""""" -.. automodule:: smallclauses +.. automodule:: sastadev.smallclauses .. vz+defdet: @@ -224,4 +224,4 @@ Examples: These replacements are done in the function *getalternatives* from the module *corrector* by a call to the function *correctPdit*. -.. autofunction:: corrector::correctPdit \ No newline at end of file +.. autofunction:: sastadev.corrector::correctPdit \ No newline at end of file diff --git a/docs/index.rst b/src/sastadev/docs/index.rst similarity index 100% rename from docs/index.rst rename to src/sastadev/docs/index.rst diff --git a/docs/introduction.rst b/src/sastadev/docs/introduction.rst similarity index 100% rename from docs/introduction.rst rename to src/sastadev/docs/introduction.rst diff --git a/docs/lexicons.rst b/src/sastadev/docs/lexicons.rst similarity index 92% rename from docs/lexicons.rst rename to src/sastadev/docs/lexicons.rst index 60ca802..69d3409 100644 --- a/docs/lexicons.rst +++ b/src/sastadev/docs/lexicons.rst @@ -37,18 +37,18 @@ CELEX ----- The lexicon that we use most is CELEX. There is a module lexicon.py which provides the interface to the lexion actually used: -.. automodule:: lexicon +.. automodule:: sastadev.lexicon But in it the actual lexicon used is the CELEX lexicon, taken care of by the celexlexicon module: -.. automodule:: celexlexicon +.. automodule:: sastadev.celexlexicon .. _top3000: Top3000 ------- -.. automodule:: top3000 +.. automodule:: sastadev.top3000 .. _namelexicons: @@ -59,12 +59,12 @@ Names very often consist of multiple words. For individual words it is therefore important to check whether they can be a part of a (possibly multiword) name. The relevant module is the namepartlexicon module. -.. automodule:: namepartlexicon +.. automodule:: sastadev.namepartlexicon The dictionary with nameparts has been derived by the SASTA script getnamepartslexicon: -.. automodule:: getnamepartslexicon -.. automodule:: namelexicons +.. automodule:: sastadev.getnamepartslexicon +.. automodule:: sastadev.namelexicons .. _filledpauseslexicon: @@ -82,7 +82,7 @@ This file has been created by searching for strings marked with & in the Dutch C Compounds --------- -.. automodule:: compounds +.. automodule:: sastadev.compounds .. _exceptionslists: diff --git a/docs/methodimplementation.rst b/src/sastadev/docs/methodimplementation.rst similarity index 98% rename from docs/methodimplementation.rst rename to src/sastadev/docs/methodimplementation.rst index e4c09a5..84d71b4 100644 --- a/docs/methodimplementation.rst +++ b/src/sastadev/docs/methodimplementation.rst @@ -35,7 +35,7 @@ The language measures with their properties are internally stored in a QueryDic A special class *Method* has been defined in the module methods.py. -.. autoclass:: methods::Method +.. autoclass:: sastadev.methods::Method @@ -104,11 +104,11 @@ where the definition only contains pieces of Xpath-code. Inside SASTA an Xpath query is first expanded so that all macros have been replaced by pieces of Xpath code. Only then is the query launched. The module that takes care of macros is the module macros.py: -.. automodule:: macros +.. automodule:: sastadev.macros The expansion function in this module is called *expandmacros*. It takes as input a string and outputs a string with the macros expanded: -.. autofunction:: macros::expandmacros +.. autofunction:: sastadev.macros::expandmacros Expanding the following Xpath expression:: @@ -144,7 +144,7 @@ Generation of macros Macros make writing queries much simpler and makes it easier to maintain them. But in some cases macros are not enough, e.g. if a macro expansion is very large but built up in a regular way. One such case is dealt with by the module generatemacros.py: -.. automodule:: generatemacros +.. automodule:: sastadev.generatemacros Python functions ---------------- @@ -154,7 +154,7 @@ In some cases formulating the query in Xpath is too cumbersome (even with macros In the definition of the query one includes the name of the Python function, for example (from TARSP): *sziplus6*. This function must be defined somewhere, of course. This can be done in any python module. -.. automodule:: external_functions +.. automodule:: sastadev.external_functions diff --git a/docs/methods.rst b/src/sastadev/docs/methods.rst similarity index 100% rename from docs/methods.rst rename to src/sastadev/docs/methods.rst diff --git a/docs/references.rst b/src/sastadev/docs/references.rst similarity index 100% rename from docs/references.rst rename to src/sastadev/docs/references.rst diff --git a/docs/sastaanddev.rst b/src/sastadev/docs/sastaanddev.rst similarity index 95% rename from docs/sastaanddev.rst rename to src/sastadev/docs/sastaanddev.rst index 6741470..d855769 100644 --- a/docs/sastaanddev.rst +++ b/src/sastadev/docs/sastaanddev.rst @@ -14,5 +14,5 @@ It takes as input a word document or a CHAT file, or a (corrected) annotation fi SASTAdev -------- -.. automodule:: sastadev +.. automodule:: sastadev.__main__ diff --git a/docs/stap.rst b/src/sastadev/docs/stap.rst similarity index 100% rename from docs/stap.rst rename to src/sastadev/docs/stap.rst diff --git a/src/sastadev/iedims.py b/src/sastadev/iedims.py index 8345f93..059217d 100644 --- a/src/sastadev/iedims.py +++ b/src/sastadev/iedims.py @@ -20,7 +20,7 @@ The module has been tested against a gold reference of all words ending in ie(s) from OpenSonaR. The module to use for a renewed test is tests.iedims_test.py, the file is tests.iediminutives.iedimsgold2.csv -.. autofunction:: iedims::getjeforms +.. autofunction:: sastadev.iedims::getjeforms ''' @@ -365,8 +365,8 @@ def getjeforms(ieform: str) -> List[str]: It crucially makes use of the functions *getjeformsnolex* and *getbase* - .. autofunction:: iedims::getjeformsnolex - .. autofunction:: iedims::getbase + .. autofunction:: sastadev.iedims::getjeformsnolex + .. autofunction:: sastadev.iedims::getbase ''' results1 = getjeformsnolex(ieform) diff --git a/src/sastadev/namepartlexicon.py b/src/sastadev/namepartlexicon.py index 2559d56..9258e75 100644 --- a/src/sastadev/namepartlexicon.py +++ b/src/sastadev/namepartlexicon.py @@ -9,8 +9,8 @@ -.. autofunction:: namepart_isa_namepart -.. autofunction:: namepart_isa_namepart_uc +.. autofunction:: sastadev.namepart_isa_namepart +.. autofunction:: sastadev.namepart_isa_namepart_uc ''' diff --git a/src/sastadev/phonetics.py b/src/sastadev/phonetics.py index 09f0e4b..5a3d654 100644 --- a/src/sastadev/phonetics.py +++ b/src/sastadev/phonetics.py @@ -22,7 +22,7 @@ def phoneticise(instr: str) -> str: The function *phoneticise* carries out substitutions for string patterns as given in the variable *replacements* derived from the variable *replacementpatterns*. - .. autodata:: phonetics::replacementpatterns + .. autodata:: sastadev.phonetics::replacementpatterns ''' result = instr diff --git a/src/sastadev/queryfunctions.py b/src/sastadev/queryfunctions.py index 487140f..0cbc91f 100644 --- a/src/sastadev/queryfunctions.py +++ b/src/sastadev/queryfunctions.py @@ -98,7 +98,7 @@ def auxvobij(stree: SynTree, pred: Callable[[SynTree, SynTree, SynTree], bool]) (which should be analysed as TARSP *Vobij*) from those that are not adjacent (which should be analysed as TARSP Vo/Bij). - .. autodata:: queryfunctions::voslashbijxpath + .. autodata:: sastadev.queryfunctions::voslashbijxpath ''' RPnodes = stree.xpath(voslashbijxpath) @@ -126,11 +126,11 @@ def vobij(stree: SynTree) -> List[SynTree]: * The *vobijxpath* expression matches with so-called adverbial pronouns: - .. autodata:: queryfunctions::vobijxpath + .. autodata:: sastadev.queryfunctions::vobijxpath * The function *auxvobij* finds adjacent R-pronoun + adposition cases: - .. autofunction:: queryfunctions::auxvobij + .. autofunction:: sastadev.queryfunctions::auxvobij ''' results1 = stree.xpath(vobijxpath) @@ -147,7 +147,7 @@ def voslashbij(stree: SynTree) -> List[SynTree]: The function *voslashbij* uses the function *auxvobij* to find non-adjacent R-pronoun + adposition cases: - .. autofunction:: queryfunctions::auxvobij + .. autofunction:: sastadev.queryfunctions::auxvobij :noindex: @@ -197,6 +197,7 @@ def hequery(syntree: SynTree) -> List[SynTree]: """ def vudivers(syntree: SynTree) -> List[SynTree]: + expandedvudiversxpath = expandmacros(vudiversxpath) expandedvudiversxpath = expandmacros(vudiversxpath) rawresults = syntree.xpath(expandedvudiversxpath) heresults = hequery(syntree) diff --git a/src/sastadev/smallclauses.py b/src/sastadev/smallclauses.py index 2305075..e32e8c4 100644 --- a/src/sastadev/smallclauses.py +++ b/src/sastadev/smallclauses.py @@ -31,7 +31,7 @@ The different subcases are dealt with by the function *smallclauses*: -.. autofunction:: smallclauses::smallclauses +.. autofunction:: sastadev.smallclauses::smallclauses ''' diff --git a/src/sastadev/stringfunctions.py b/src/sastadev/stringfunctions.py index ea73e58..fc8eb1a 100644 --- a/src/sastadev/stringfunctions.py +++ b/src/sastadev/stringfunctions.py @@ -172,8 +172,8 @@ def fullworddehyphenate(word: str, inlexicon: Callable[[str], bool]) -> List[str The functions *dehyphenate* and *delhyphenprefix* are described here: - * .. autofunction:: stringfunctions::dehyphenate - * .. autofunction:: stringfunctions::delhyphenprefix + * .. autofunction:: sastadev.stringfunctions::dehyphenate + * .. autofunction:: sastadev.tringfunctions::delhyphenprefix ''' diff --git a/src/sastadev/tblex.py b/src/sastadev/tblex.py index 07076b2..bcbd7b9 100644 --- a/src/sastadev/tblex.py +++ b/src/sastadev/tblex.py @@ -26,12 +26,12 @@ def recognised_wordnodepos(node: SynTree, pos: str) -> bool: * the node is a node for a compound, as determined by the function *iscompound*: - .. autofunction:: treebankfunctions::iscompound + .. autofunction:: sastadev.treebankfunctions::iscompound :noindex: * the node is a node for a diminutive, as determined by the function *isdiminutive*: - .. autofunction:: treebankfunctions::isdiminutive + .. autofunction:: sastadev.treebankfunctions::isdiminutive :noindex: * the node is a node for a name part, as determined by the function *lex.isa_namepart* @@ -59,11 +59,11 @@ def recognised_wordnode(node: SynTree) -> bool: * the node is a node for a compound, as determined by the function *iscompound*: - .. autofunction:: treebankfunctions::iscompound + .. autofunction:: sastadev.treebankfunctions::iscompound * the node is node for a diminutive, as determined by the function *isdiminutive*: - .. autofunction:: treebankfunctions::isdiminutive + .. autofunction:: sastadev.treebankfunctions::isdiminutive * the node is a node for a name part, as determined by the function *lex.isa_namepart* @@ -110,7 +110,7 @@ def asta_recognised_lexnode(node: SynTree) -> bool: This is the case if *pt* equals *ww* and the node is not a substantivised verb as determined by the function *issubstantivised_verb*: - .. autofunction:: treebankfunctions::issubstantivised_verb + .. autofunction:: sastadev.treebankfunctions::issubstantivised_verb ''' if issubstantivised_verb(node): @@ -129,37 +129,37 @@ def asta_recognised_nounnode(node: SynTree) -> bool: * either the node meets the conditions of *sasta_pseudonym* - .. autofunction:: treebankfunctions::sasta_pseudonym + .. autofunction:: sastadev.treebankfunctions::sasta_pseudonym * or the node meets the conditions of *spec_noun* - .. autofunction:: treebankfunctions::spec_noun + .. autofunction:: sastadev.treebankfunctions::spec_noun * or the node meets the conditions of *is_duplicate_spec_noun* - .. autofunction:: treebankfunctions::is_duplicate_spec_noun + .. autofunction:: sastadev.treebankfunctions::is_duplicate_spec_noun * or the node meets the conditions of *sasta_long* - .. autofunction:: treebankfunctions::sasta_long + .. autofunction:: sastadev.treebankfunctions::sasta_long * or the node meets the conditions of *recognised_wordnodepos* - .. autofunction:: treebankfunctions::recognised_wordnodepos + .. autofunction:: sastadev.treebankfunctions::recognised_wordnodepos * or the node meets the conditions of *recognised_lemmanodepos(node, pos)* - .. autofunction:: treebankfunctions::recognised_lemmanodepos(node, pos) + .. autofunction:: sastadev.treebankfunctions::recognised_lemmanodepos(node, pos) However, the node should: * neither consist of lower case consonants only, as determined by *all_lower_consonantsnode*: - .. autofunction:: treebankfunctions::all_lower_consonantsnode + .. autofunction:: sastadev.treebankfunctions::all_lower_consonantsnode * nor satisfy the conditions of *short_nucl_n*: - .. autofunction:: treebankfunctions::short_nucl_n + .. autofunction:: sastadev.treebankfunctions::short_nucl_n ''' diff --git a/src/sastadev/treebankfunctions.py b/src/sastadev/treebankfunctions.py index 8616228..0b47c30 100644 --- a/src/sastadev/treebankfunctions.py +++ b/src/sastadev/treebankfunctions.py @@ -85,6 +85,10 @@ def md2XMLElement(self): complrels = ['su', 'obj1', 'pobj1', 'obj2', 'se', 'pc', 'vc', 'svp', 'predc', 'ld'] +headrels = ['hd', 'crd'] + +extendedheadrels = ['hdf'] + mainclausecats = ['smain', 'whq', 'sv1'] ptsubclasspairs = [('n', 'ntype'), ('tw', 'numtype'), ('vnw', 'vwtype'), ('lw', 'lwtype'), ('vz', 'vztype'), @@ -787,7 +791,7 @@ def iscompound(node: SynTree) -> bool: This is the case if the *lemma* attribute contains the compound separator *compoundsep* - .. autodata:: treebankfunctions::compoundsep + .. autodata:: sastadev.treebankfunctions::compoundsep """ lemma = getattval(node, 'lemma') result = compoundsep in lemma @@ -860,7 +864,7 @@ def sasta_long(node: SynTree) -> bool: The function sasta_long checks whether the length of the *word* attribute of the node is greater or equal to *min_sasta_length*: - .. autodata:: treebankfunctions::min_sasta_length + .. autodata:: sastadev.treebankfunctions::min_sasta_length """ word = getattval(node, 'word') @@ -914,7 +918,7 @@ def onbvnwdet(node: SynTree) -> bool: # This is the case if *pt* equals *ww* and the node is not a substantivised verb as # determined by the function *issubstantivised_verb*: # -# .. autofunction:: treebankfunctions::issubstantivised_verb +# .. autofunction:: sastadev.treebankfunctions::issubstantivised_verb # # """ # if issubstantivised_verb(node): @@ -947,45 +951,45 @@ def ismonthname(node: SynTree) -> bool: # # * either the node meets the conditions of *sasta_pseudonym* # -# .. autofunction:: treebankfunctions::sasta_pseudonym +# .. autofunction:: sastadev.treebankfunctions::sasta_pseudonym # # * or the node is part of name (pt = *spec*, spectype= *deeleigen*) # -# .. autofunction:: treebankfunctions::isspecdeeleigen +# .. autofunction:: sastadev.treebankfunctions::isspecdeeleigen # # * or the node is a month name (these are not always nouns in Alpino) # -# .. autofunction:: treebankfunctions::ismonthname +# .. autofunction:: sastadev.treebankfunctions::ismonthname # # * or the node meets the conditions of *spec_noun* # -# .. autofunction:: treebankfunctions::spec_noun +# .. autofunction:: sastadev.treebankfunctions::spec_noun # # * or the node meets the conditions of *is_duplicate_spec_noun* # -# .. autofunction:: treebankfunctions::is_duplicate_spec_noun +# .. autofunction:: sastadev.treebankfunctions::is_duplicate_spec_noun # # * or the node meets the conditions of *sasta_long* # -# .. autofunction:: treebankfunctions::sasta_long +# .. autofunction:: sastadev.treebankfunctions::sasta_long # # * or the node meets the conditions of *recognised_wordnodepos* # -# .. autofunction:: treebankfunctions::recognised_wordnodepos +# .. autofunction:: sastadev.treebankfunctions::recognised_wordnodepos # # * or the node meets the conditions of *recognised_lemmanodepos(node, pos)* # -# .. autofunction:: treebankfunctions::recognised_lemmanodepos(node, pos) +# .. autofunction:: sastadev.treebankfunctions::recognised_lemmanodepos(node, pos) # # However, the node should: # # * neither consist of lower case consonants only, as determined by *all_lower_consonantsnode*: # -# .. autofunction:: treebankfunctions::all_lower_consonantsnode +# .. autofunction:: sastadev.treebankfunctions::all_lower_consonantsnode # # * nor satisfy the conditions of *short_nucl_n*: # -# .. autofunction:: treebankfunctions::short_nucl_n +# .. autofunction:: sastadev.treebankfunctions::short_nucl_n # # """ # @@ -1031,7 +1035,7 @@ def sasta_short(inval: str) -> bool: The function *sasta_short* determines whether the string *inval* is short, i.e, with a length smaller or equal than *sasta_short_length*: - .. autodata:: treebankfunctions::sasta_short_length + .. autodata:: sastadev.treebankfunctions::sasta_short_length """ result = len(inval) <= sasta_short_length @@ -1044,7 +1048,7 @@ def short_nucl_n(node: SynTree) -> bool: *pt* equal to *n*, relation *nucl*, and whose *word* attribute is short (as determined by the function *sasta_short*) - .. autofunction:: treebankfunctions::sasta_short + .. autofunction:: sastadev.treebankfunctions::sasta_short """ pt = getattval(node, 'pt') rel = getattval(node, 'rel') @@ -1062,10 +1066,10 @@ def sasta_pseudonym(node: SynTree) -> bool: pseudonym regular expressions have been created using the constant sasta_pseudonyms: - .. autodata:: treebankfunctions::sasta_pseudonyms + .. autodata:: sastadev.treebankfunctions::sasta_pseudonyms :noindex: - .. autodata:: treebankfunctions::pseudonym_patternlist + .. autodata:: sastadev.treebankfunctions::pseudonym_patternlist """ word = getattval(node, 'word') @@ -1171,7 +1175,7 @@ def getindexednodesmap(basicdict: Dict[str, SynTree]) -> Dict[str, SynTree]: The function *getindexednodesmap* creates a new dictionary for each item in *basicdict* in which the bare index nodes have been replaced by their antecedents by applying the function *expandtree*: - .. autofunction:: treebankfunctions::expandtree + .. autofunction:: sastadev.treebankfunctions::expandtree """ newdict = {} @@ -1296,17 +1300,17 @@ def indextransform(stree: SynTree) -> SynTree: It first gathers the antecedents of bare index nodes in a dictionary (*basicindexednodesmap*) of index-SynTree items by means of the function *getbasicindexednodesmap*. - .. autofunction:: treebankfunctions::getbasicindexednodesmap + .. autofunction:: sastadev.treebankfunctions::getbasicindexednodesmap The antecedents can contain bare index nodes themselves. So, in a second step, each antecedent is expanded so that bare index nodes are replaced by their antecedents. This is done by the function *getindexednodesmap*, which creates a new dictionary of index-SynTree items called *indexnodesmap* - .. autofunction:: treebankfunctions::getindexednodesmap + .. autofunction:: sastadev.treebankfunctions::getindexednodesmap Finally, the input tree is transformed by the function *indextransform2*, which uses *indexnodesmap*: - .. autofunction:: treebankfunctions::indextransform2 + .. autofunction:: sastadev.treebankfunctions::indextransform2 """