Skip to content

Commit

Permalink
Maintenance message, add source (PM or PMC) to mapping file.
Browse files Browse the repository at this point in the history
  • Loading branch information
khituras committed Sep 5, 2024
1 parent 9e7ab40 commit 68e2aab
Show file tree
Hide file tree
Showing 22 changed files with 223 additions and 121 deletions.
2 changes: 1 addition & 1 deletion gepi/gepi-indexing/gepi-indexing-base/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
<dependency>
<groupId>de.julielab</groupId>
<artifactId>jcore-descriptor-creator</artifactId>
<version>2.6.0-SNAPSHOT</version>
<version>2.6.1</version>
<scope>provided</scope>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ else if (j > i)
String arg2EntryIdPath = "/ref/resourceEntryList[" + l + "]/entryId";
document.addField("argument1", createRawFieldValueForParallelAnnotations(new FeatureStructure[]{argPair[0], argPair[0], argPair[0], argPair[0], argPair[0]}, new String[]{arg1EntryIdPath, arg1EntryIdPath, arg1EntryIdPath, arg1EntryIdPath, arg1EntryIdPath}, new Filter[]{geneFb.orgid2tid2atidAddonFilter, geneFb.eg2famplexFilter, geneFb.eg2hgncFilter, geneFb.eg2gohypertidFilter, geneFb.orgid2equalnameatidReplaceFilter}, new UniqueFilter()));
// document.addField("argument1geneid", createRawFieldValueForAnnotation(argPair[0], arg1EntryIdPath, null));
// document.addField("argument1taxid", createRawFieldValueForAnnotation(argPair[0], arg1EntryIdPath, geneFb.egid2taxidReplaceFilter));
document.addField("argument1taxid", createRawFieldValueForAnnotation(argPair[0], arg1EntryIdPath, geneFb.egid2taxidReplaceFilter));
// document.addField("argument1conceptid", createRawFieldValueForAnnotation(argPair[0], arg1EntryIdPath, geneFb.eg2tidReplaceFilter));
// document.addField("argument1tophomoid", createRawFieldValueForAnnotation(argPair[0], arg1EntryIdPath, geneFb.eg2tophomoFilter));
// document.addField("argument1famplexid", createRawFieldValueForAnnotation(argPair[0], arg1EntryIdPath, geneFb.eg2famplexFilter));
Expand All @@ -202,7 +202,7 @@ else if (j > i)
// document.addField("argument1genemappingsource", createRawFieldValueForAnnotation(argPair[0], "/ref/resourceEntryList[" + k + "]/componentId", geneComponentIdProcessingfilter));
document.addField("argument2", createRawFieldValueForParallelAnnotations(new FeatureStructure[]{argPair[1], argPair[1], argPair[1], argPair[1], argPair[1]}, new String[]{arg2EntryIdPath, arg2EntryIdPath, arg2EntryIdPath, arg2EntryIdPath, arg2EntryIdPath}, new Filter[]{geneFb.orgid2tid2atidAddonFilter, geneFb.eg2famplexFilter, geneFb.eg2hgncFilter, geneFb.eg2gohypertidFilter, geneFb.orgid2equalnameatidReplaceFilter}, new UniqueFilter()));
// document.addField("argument2geneid", createRawFieldValueForAnnotation(argPair[1], arg2EntryIdPath, null));
// document.addField("argument2taxid", createRawFieldValueForAnnotation(argPair[1], arg2EntryIdPath, geneFb.egid2taxidReplaceFilter));
document.addField("argument2taxid", createRawFieldValueForAnnotation(argPair[1], arg2EntryIdPath, geneFb.egid2taxidReplaceFilter));
// document.addField("argument2conceptid", createRawFieldValueForAnnotation(argPair[1], arg2EntryIdPath, geneFb.eg2tidReplaceFilter));
// document.addField("argument2tophomoid", createRawFieldValueForAnnotation(argPair[1], arg2EntryIdPath, geneFb.eg2tophomoFilter));
// document.addField("argument2famplexid", createRawFieldValueForAnnotation(argPair[1], arg2EntryIdPath, geneFb.eg2famplexFilter));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
"enabled": false
},
"properties": {
"source": {
"type": "text",
"store": true
},
"pmid": {
"type": "keyword",
"store": true
Expand Down Expand Up @@ -103,6 +107,9 @@
"type": "keyword",
"store": true
},
"argument1taxid": {
"type": "keyword"
},
"argument1coveredtext": {
"type": "text",
"store": true,
Expand Down Expand Up @@ -160,6 +167,9 @@
"type": "keyword",
"store": true
},
"argument2taxid": {
"type": "keyword"
},
"argument2coveredtext": {
"type": "text",
"store": true,
Expand Down Expand Up @@ -314,16 +324,8 @@
"headings": {
"type": "text",
"norms": false,
"store": true,
"copy_to": ["paragraph.headingscompletion", "paragraph.headingskw"]
},
"headingskw": {
"type": "keyword",
"store": true
},
"headingscompletion": {
"type": "completion"
},
"textscope": {
"type": "keyword",
"store": true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,34 @@

</configurationParameter>

<configurationParameter>

<name>docIdTransformBeforeDeletionRegex</name>

<description>Optional regular expression to match and then replace (parts) of document IDs before deletion using the docIdTransformBeforeDeletionReplacement parameter value. Can be used when deleteDocumentsBeforeIndexing is set to true. Then, a Matcher with the given regular expression is created and document IDs are transformed using Matcher.replaceAll(&lt;replacement&gt;) for the deletion step.</description>

<type>String</type>

<multiValued>false</multiValued>

<mandatory>false</mandatory>

</configurationParameter>

<configurationParameter>

<name>docIdTransformBeforeDeletionReplacement</name>

<description>Optional replacement String to be used to transform document IDs before deletion. Requires docIdTransformBeforeDeletionRegex to be specified. Then, document IDs used for deletion are matched with that regular expression and matches are replaced with this replacement String using Matcher.replace(&lt;replacement&gt;) where the Matcher was created with the regular expression.</description>

<type>String</type>

<multiValued>false</multiValued>

<mandatory>false</mandatory>

</configurationParameter>

<configurationParameter>

<name>documentIdField</name>
Expand Down Expand Up @@ -195,7 +223,7 @@

<value>

<string>gepi_1.0_3</string>
<string>gepi_1.0_2</string>

</value>

Expand Down Expand Up @@ -253,7 +281,31 @@

<value>

<boolean>false</boolean>
<boolean>true</boolean>

</value>

</nameValuePair>

<nameValuePair>

<name>docIdTransformBeforeDeletionRegex</name>

<value>

<string>^([^_]+)_.*$</string>

</value>

</nameValuePair>

<nameValuePair>

<name>docIdTransformBeforeDeletionReplacement</name>

<value>

<string>$1</string>

</value>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@

<value>

<string>gepi_1.0_3</string>
<string>gepi_1.0_2</string>

</value>

Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"uri":null,"location":"de.julielab.jcore.ae.jsbd.desc.jcore-jsbd-ae-biomedical-english","category":"ae","uimaDescPath":null,"metaDescription":{"description":"UIMA Wrapper for the JCoRe Sentence Boundary Detector (jcore-jsbd-ae) with a model trained on data from both the GENIA and PennBioIE corpus as well as additional material from MedLine abstracts.","group":"general","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.4","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"pear":false,"name":"JCoRe Sentence Annotator, Biomedical English","base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-jsbd-ae-biomedical-english","version":"2.6.1","classifier":null,"file":"/Users/faessler/.m2/repository/de/julielab/jcore-jsbd-ae-biomedical-english/2.6.1/jcore-jsbd-ae-biomedical-english-2.6.1.jar","packaging":"jar"}},"name":"JCoRe Sentence Annotator","active":true},{"uri":null,"location":"de.julielab.jcore.ae.jtbd.desc.jcore-jtbd-ae-biomedical-english","category":"ae","uimaDescPath":null,"metaDescription":{"description":"UIMA Wrapper for the JCoRe Token Boundary Detector with a model trained on a special biomedical corpus which consists of data from (manually annotated) material which we took from MedLine abstracts and a modified version of PennBioIE's underlying tokenization.\n","group":"general","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.5","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"pear":false,"name":"JCoRe Token Annotator, Biomedical English","base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-jtbd-ae-biomedical-english","version":"2.6.0","classifier":null,"file":"/Users/faessler/.m2/repository/de/julielab/jcore-jtbd-ae-biomedical-english/2.6.0/jcore-jtbd-ae-biomedical-english-2.6.0.jar","packaging":"jar"}},"name":"JCoRe Token Annotator","active":true},{"uri":null,"location":"de.julielab.jcore.ae.opennlp.postag.desc.jcore-opennlp-postag-ae-biomedical-english","category":"ae","uimaDescPath":null,"metaDescription":{"description":"This project employs the OpenNLP wrapper (jcore-opennlp-postag-ae) with a model trained on the PennBioIE corpus.","group":"general","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.5","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"pear":false,"name":"JCoRe OpenNLP POS Tagger, Biomedical English","base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-opennlp-postag-ae-biomedical-english","version":"2.6.0","classifier":null,"file":"/Users/faessler/.m2/repository/de/julielab/jcore-opennlp-postag-ae-biomedical-english/2.6.0/jcore-opennlp-postag-ae-biomedical-english-2.6.0.jar","packaging":"jar"}},"name":"JCoRe OpenNLP POS Tagger","active":true},{"uri":null,"location":"de.julielab.jcore.ae.acronymtagger.desc.jcore-acronym-ae","category":"ae","uimaDescPath":null,"metaDescription":{"description":"Arconym Tagger based on the SCHWARTZ & HEARST Algorithm: Ariel S. Schwartz and Marti A. Hearst: A Simple Algorithm For Identifying Abbreviation Definitions in Biomedical Text. In: Pacific Symposium on Biocomputing, 2003.","group":"morpho syntactic","module":{"type":"GitHubRepository","name":"jcore-base","version":"v2.5","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"pear":false,"name":"JCoRe Acronym Tagger","base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-acronym-ae","version":"2.6.1","classifier":null,"file":"/Users/faessler/.m2/repository/de/julielab/jcore-acronym-ae/2.6.1/jcore-acronym-ae-2.6.1.jar","packaging":"jar"}},"name":"JCoRe AcronymAnnotator","active":true},{"uri":null,"location":"de.julielab.jcore.ae.biolemmatizer.desc.jcore-biolemmatizer-ae","category":"ae","uimaDescPath":null,"metaDescription":{"description":"Lemmatization tool for morphological analysis of biomedical literature downloaded from SourceForge","group":"semantic","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.4","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"pear":false,"name":"JCoRe BioLemmatizer","base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-biolemmatizer-ae","version":"2.6.1","classifier":null,"file":"/Users/faessler/.m2/repository/de/julielab/jcore-biolemmatizer-ae/2.6.1/jcore-biolemmatizer-ae-2.6.1.jar","packaging":"jar"}},"name":"BioLemmatizer","active":true},{"uri":null,"location":"de.julielab.jcore.ae.biosem.desc.jcore-biosem-ae-bionlp-st11","category":"ae","uimaDescPath":null,"metaDescription":{"description":"This project employs the JCoRe BioSem wrapper (jcore-biosem-ae) with a BioSem database trained on the\n BioNLP SharedTask 2011 train and development (that is, \"mixed\") data for biological event extraction.\n ","group":"semantic","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.6","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"pear":false,"name":"JCoRe BioSem AE, BioNLP SharedTask 2011","base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-biosem-ae-bionlp-st11","version":"2.6.0","classifier":null,"file":"/Users/faessler/.m2/repository/de/julielab/jcore-biosem-ae-bionlp-st11/2.6.0/jcore-biosem-ae-bionlp-st11-2.6.0.jar","packaging":"jar"}},"name":"JCoRe BioSEM Event Annotator","active":true}]
[{"uri":null,"location":"de.julielab.jcore.ae.jsbd.desc.jcore-jsbd-ae-biomedical-english","category":"ae","uimaDescPath":null,"metaDescription":{"description":"UIMA Wrapper for the JCoRe Sentence Boundary Detector (jcore-jsbd-ae) with a model trained on data from both the GENIA and PennBioIE corpus as well as additional material from MedLine abstracts.","group":"general","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.4","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"name":"JCoRe Sentence Annotator, Biomedical English","pear":false,"base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-jsbd-ae-biomedical-english","version":"2.6.1","classifier":null,"file":"/home/faessler/.m2/repository/de/julielab/jcore-jsbd-ae-biomedical-english/2.6.1/jcore-jsbd-ae-biomedical-english-2.6.1.jar","packaging":"jar"}},"name":"JCoRe Sentence Annotator","active":true},{"uri":null,"location":"de.julielab.jcore.ae.jtbd.desc.jcore-jtbd-ae-biomedical-english","category":"ae","uimaDescPath":null,"metaDescription":{"description":"UIMA Wrapper for the JCoRe Token Boundary Detector with a model trained on a special biomedical corpus which consists of data from (manually annotated) material which we took from MedLine abstracts and a modified version of PennBioIE's underlying tokenization.\n","group":"general","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.5","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"name":"JCoRe Token Annotator, Biomedical English","pear":false,"base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-jtbd-ae-biomedical-english","version":"2.6.0","classifier":null,"file":"/home/faessler/.m2/repository/de/julielab/jcore-jtbd-ae-biomedical-english/2.6.0/jcore-jtbd-ae-biomedical-english-2.6.0.jar","packaging":"jar"}},"name":"JCoRe Token Annotator","active":true},{"uri":null,"location":"de.julielab.jcore.ae.opennlp.postag.desc.jcore-opennlp-postag-ae-biomedical-english","category":"ae","uimaDescPath":null,"metaDescription":{"description":"This project employs the OpenNLP wrapper (jcore-opennlp-postag-ae) with a model trained on the PennBioIE corpus.","group":"general","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.5","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"name":"JCoRe OpenNLP POS Tagger, Biomedical English","pear":false,"base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-opennlp-postag-ae-biomedical-english","version":"2.6.0","classifier":null,"file":"/home/faessler/.m2/repository/de/julielab/jcore-opennlp-postag-ae-biomedical-english/2.6.0/jcore-opennlp-postag-ae-biomedical-english-2.6.0.jar","packaging":"jar"}},"name":"JCoRe OpenNLP POS Tagger","active":true},{"uri":null,"location":"de.julielab.jcore.ae.acronymtagger.desc.jcore-acronym-ae","category":"ae","uimaDescPath":null,"metaDescription":{"description":"Arconym Tagger based on the SCHWARTZ & HEARST Algorithm: Ariel S. Schwartz and Marti A. Hearst: A Simple Algorithm For Identifying Abbreviation Definitions in Biomedical Text. In: Pacific Symposium on Biocomputing, 2003.","group":"morpho syntactic","module":{"type":"GitHubRepository","name":"jcore-base","version":"v2.5","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"name":"JCoRe Acronym Tagger","pear":false,"base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-acronym-ae","version":"2.6.1","classifier":null,"file":"/home/faessler/.m2/repository/de/julielab/jcore-acronym-ae/2.6.1/jcore-acronym-ae-2.6.1.jar","packaging":"jar"}},"name":"JCoRe AcronymAnnotator","active":true},{"uri":null,"location":"de.julielab.jcore.ae.biolemmatizer.desc.jcore-biolemmatizer-ae","category":"ae","uimaDescPath":null,"metaDescription":{"description":"Lemmatization tool for morphological analysis of biomedical literature downloaded from SourceForge","group":"semantic","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.4","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"name":"JCoRe BioLemmatizer","pear":false,"base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-biolemmatizer-ae","version":"2.6.1","classifier":null,"file":"/home/faessler/.m2/repository/de/julielab/jcore-biolemmatizer-ae/2.6.1/jcore-biolemmatizer-ae-2.6.1.jar","packaging":"jar"}},"name":"BioLemmatizer","active":true},{"uri":null,"location":"de.julielab.jcore.ae.biosem.desc.jcore-biosem-ae-bionlp-st11","category":"ae","uimaDescPath":null,"metaDescription":{"description":"This project employs the JCoRe BioSem wrapper (jcore-biosem-ae) with a BioSem database trained on the\n BioNLP SharedTask 2011 train and development (that is, \"mixed\") data for biological event extraction.\n ","group":"semantic","module":{"type":"GitHubRepository","name":"jcore-projects","version":"v2.6","updateable":true,"type":"GitHubRepository","gitHubName":"JULIELab"},"base":null,"exposable":true,"categories":["ae"],"name":"JCoRe BioSem AE, BioNLP SharedTask 2011","pear":false,"base-project":null,"maven-artifact":{"groupId":"de.julielab","artifactId":"jcore-biosem-ae-bionlp-st11","version":"2.6.0","classifier":null,"file":"/home/faessler/.m2/repository/de/julielab/jcore-biosem-ae-bionlp-st11/2.6.0/jcore-biosem-ae-bionlp-st11-2.6.0.jar","packaging":"jar"}},"name":"JCoRe BioSEM Event Annotator","active":true}]
Loading

0 comments on commit 68e2aab

Please sign in to comment.