From d6e59f80f97db9f2e5c21ada11a45a8c95f99b43 Mon Sep 17 00:00:00 2001 From: William Mattingly Date: Mon, 8 Apr 2024 06:24:17 -0400 Subject: [PATCH 1/3] Added Date spaCy to file --- website/meta/universe.json | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index 6278dd4899b..23e04f06527 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -4517,7 +4517,42 @@ "website": "https://redfield.ai" }, "category": ["standalone"] - } + }, + { + "id": "date-spacy", + "title": "Date spaCy", + "slogan": "Effortless Date Recognition in Text with spaCy", + "description": "Date spaCy is a spaCy pipeline component designed to identify and parse date entities in text effortlessly. It uses Regular Expressions (RegEx) to detect a wide range of date formats and leverages the 'dateparser' library for accurate conversion into structured datetime objects. Particularly useful in NLP tasks involving date information extraction, this component seamlessly integrates into existing or new spaCy pipelines. The tool assumes the current year for dates without a specified year, ensuring sensible defaults while maintaining flexibility. The parsed dates are stored in a custom entity extension, providing easy access and manipulation within spaCy's ecosystem. This makes Date spaCy a go-to solution for developers and data scientists dealing with temporal data in natural language.", + "github": "wjbmattingly/date-spacy", + "pip": "date-spacy", + "code_example": [ + "import spacy", + "from date_spacy import find_dates", + "", + "nlp = spacy.blank('en')", + "nlp.add_pipe('find_dates')", + "", + "doc = nlp(\"\"\"The event is scheduled for 25th August 2023.", + " We also have a meeting on 10 September and another one on the twelfth of October and a", + " final one on January fourth.\"\"\")", + "", + "for ent in doc.ents:", + " if ent.label_ == 'DATE':", + " print(f'Text: {ent.text} -> Parsed Date: {ent._.date}')" + ], + "code_language": "python", + "url": "https://github.com/wjbmattingly/date-spacy", + "thumb": "https://github.com/wjbmattingly/date-spacy/raw/main/images/date-spacy-logo.png?raw=true", + "image": "https://github.com/wjbmattingly/date-spacy/raw/main/images/date-spacy-logo.png?raw=true", + "author": "W.J.B. Mattingly", + "author_links": { + "twitter": "wjb_mattingly", + "github": "wjbmattingly", + "website": "https://wjbmattingly.com" + }, + "category": ["pipeline"], + "tags": ["dates", "ner", "nlp", "spacy"] + } ], "categories": [ From c9ac88ad08fa7d87e86438cd3ce3b68437c1992a Mon Sep 17 00:00:00 2001 From: William Mattingly Date: Mon, 8 Apr 2024 09:56:45 -0400 Subject: [PATCH 2/3] spaCy Annoy added --- website/meta/universe.json | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index 23e04f06527..9bb57efb6ac 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -4552,7 +4552,40 @@ }, "category": ["pipeline"], "tags": ["dates", "ner", "nlp", "spacy"] - } + }, + { + "id": "spacy-annoy", + "title": "Spacy Annoy", + "slogan": "Integrating Spacy NLP and Annoy for Semantic Text Search with spaCy linguistic tags.", + "description": "Spacy Annoy offers a combination of Spacy's natural language processing (NLP) capabilities and Annoy's efficient similarity search algorithms. This Python class is tailored for analyzing and querying large text corpora, delivering results based on semantic similarity. Key features include contextual window chunking and controlled overlap with preservation of original context at the Doc level, allowing access to all original Spacy properties.", + "github": "wjbmattingly/spacy-annoy", + "pip": "spacy-annoy", + "code_example": [ + "from SpacyAnnoy import SpacyAnnoy", + "", + "# Initialize with a Spacy model name", + "sa = SpacyAnnoy('en_core_web_lg')", + "", + "texts = ['This is a text about sports', 'This is a text about dogs']*20", + "sa.load_docs(texts)", + "", + "sa.build_index(n_trees=10, metric='euclidean')", + "", + "# Query the index", + "results = sa.query_index('Dogs and cats.', depth=5)", + "", + "# Pretty print results", + "sa.pretty_print(results)", + "", + "# Accessing the Spacy span of the first result", + "first_result_span = results[0][0]" + ], + "code_language": "python", + "url": "https://github.com/wjbmattingly/spacy-annoy", + "category": ["nlp", "search", "similarity"], + "tags": ["spacy", "annoy", "text analysis", "semantic search"] + } + ], "categories": [ From 44e1b81b4bd3a8eaa191f68d7678450c32d0c8a8 Mon Sep 17 00:00:00 2001 From: William Mattingly Date: Mon, 8 Apr 2024 10:00:00 -0400 Subject: [PATCH 3/3] Update universe.json --- website/meta/universe.json | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index 9bb57efb6ac..23e04f06527 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -4552,40 +4552,7 @@ }, "category": ["pipeline"], "tags": ["dates", "ner", "nlp", "spacy"] - }, - { - "id": "spacy-annoy", - "title": "Spacy Annoy", - "slogan": "Integrating Spacy NLP and Annoy for Semantic Text Search with spaCy linguistic tags.", - "description": "Spacy Annoy offers a combination of Spacy's natural language processing (NLP) capabilities and Annoy's efficient similarity search algorithms. This Python class is tailored for analyzing and querying large text corpora, delivering results based on semantic similarity. Key features include contextual window chunking and controlled overlap with preservation of original context at the Doc level, allowing access to all original Spacy properties.", - "github": "wjbmattingly/spacy-annoy", - "pip": "spacy-annoy", - "code_example": [ - "from SpacyAnnoy import SpacyAnnoy", - "", - "# Initialize with a Spacy model name", - "sa = SpacyAnnoy('en_core_web_lg')", - "", - "texts = ['This is a text about sports', 'This is a text about dogs']*20", - "sa.load_docs(texts)", - "", - "sa.build_index(n_trees=10, metric='euclidean')", - "", - "# Query the index", - "results = sa.query_index('Dogs and cats.', depth=5)", - "", - "# Pretty print results", - "sa.pretty_print(results)", - "", - "# Accessing the Spacy span of the first result", - "first_result_span = results[0][0]" - ], - "code_language": "python", - "url": "https://github.com/wjbmattingly/spacy-annoy", - "category": ["nlp", "search", "similarity"], - "tags": ["spacy", "annoy", "text analysis", "semantic search"] - } - + } ], "categories": [