Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Package absolute imports #37

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions Nepali_nlp/__init__.py

This file was deleted.

36 changes: 0 additions & 36 deletions Nepali_nlp/news_latest.py

This file was deleted.

86 changes: 0 additions & 86 deletions Nepali_nlp/unicode_nepali.py

This file was deleted.

24 changes: 16 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Synonym().raw_synonym(word = 'माया',word_vec=word_vec) #method: 1
Synonym().filter_synonym(word = 'साथी',word_vec=word_vec) #method: 2
#output -> 'भाइहरू','सहपाठी','प्रेमी','दाइ','प्रेमि','बहिनी'
```

<h3>Word-spell corrector</h3>

```python
Expand All @@ -34,32 +35,37 @@ Corrector().corrector(word='सुशल') #In a very raw stage for now.
Corrector().spell_correct("कस्त भको हेरौ है")
#output-> "कस्तो भयो हेर है"
```
<h3>Nepali text summerizer</h3>

<h3>Nepali text Summarizer</h3>

```python
from Nepali_nlp import Summerize
Summerize().show_summary(word_vec,text, length_sentence_predict=5)
from Nepali_nlp import Summarize
Summarize().show_summary(word_vec,text, length_sentence_predict=5)
```

<h3>Nepali unicode to Devnagiri Font</h3>

```python
from Nepali_nlp import Unicode
text = 'ma ghara jaanchhu'
Unicode().unicode_word(text) #output-> 'म घर जान्छु'
```

<h3>Preeti-font character to Devnagiri Font</h3>

```python
from Nepali_nlp import preeti
unicode_word = 'g]kfnL'
print(preeti(unicode_word)) #output-> नेपाली
```

<h3>OCR(optical character reader)</h3>

```python
from Nepali_nlp import OCR
text = OCR(image_location)
```

<h3>Nepali Tokenizer</h3>

```python
Expand Down Expand Up @@ -94,22 +100,24 @@ from Nepali_nlp import extract_news
news_link = 'https://www.onlinekhabar.com/2019/12/821094'
title, news = extract_news(news_link) #onlinekhabar and ekantipur is supported at the moment.
```

<h3>Show latest news summary</h3>

```python
from Nepali_nlp import UpdateNews
title, links, summerized_news = UpdateNews().show_latest(word_vec=word_vec,portal='onlinekhabar',number_of_news=5) #ekantipur portal is also supported
title, links, Summarized_news = UpdateNews().show_latest(word_vec=word_vec,portal='onlinekhabar',number_of_news=5) #ekantipur portal is also supported
```

TODOs:</br>
- [x] Nepali Embeddings
- [x] Tokenizers (sentence, word, character)

- [x] Nepali Embeddings
- [x] Tokenizers (sentence, word, character)
- [x] Stop Words
- [x] Nepali Words Collection
- [x] Nepali Words Collection
- [x] Nepali Word synonym
- [x] Roman Nepali to Nepali
- [x] Nepali OCR
- [x] Summerization
- [x] summarization
- [x] Pos_tag
- [x] Nepali stemming
- [x] Sentence similarity score
Expand Down
12 changes: 12 additions & 0 deletions nepali_nlp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from nepali_nlp.embedding import Embeddings
from nepali_nlp.synonym import Synonym
from nepali_nlp.spellcheck import Corrector
from nepali_nlp.summarization import Summarize
from nepali_nlp.unicode_nepali import Unicode
from nepali_nlp.preeti_unicode import preeti
from nepali_nlp.ocr import OCR
from nepali_nlp.nepali_tokenizer import Tokenizer
from nepali_nlp.sentence_similar import Avg_vector_similar
from nepali_nlp.news_scrap import extract_news
from nepali_nlp.news_latest import UpdateNews
from nepali_nlp.stemmer import Stem
File renamed without changes.
20 changes: 11 additions & 9 deletions Nepali_nlp/Embedding.py → nepali_nlp/embedding.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from nepali_nlp.download_embedding import Download
from gensim.models.keyedvectors import KeyedVectors
import gensim
import os
import sys
sys.path.append('..')

import gensim
from gensim.models.keyedvectors import KeyedVectors
from .Download_embedding import Download

class Embeddings:
"""This class helps to load embedding in keyedvector format."""
Expand All @@ -19,23 +18,26 @@ def load_large_vector(self):
[keyedVectors] -- [Custom Nepali word Embedding]
"""
download = Download()
download.download_file_from_google_drive('1ik38vahOmzhiU2DBi78VOqDt7YFPsk5w', 'word_vector.sg')
word_vector = KeyedVectors.load_word2vec_format('word_vector.sg', binary=False)
download.download_file_from_google_drive(
'1ik38vahOmzhiU2DBi78VOqDt7YFPsk5w', 'word_vector.sg')
word_vector = KeyedVectors.load_word2vec_format(
'word_vector.sg', binary=False)
os.remove("word_vector.sg")

return word_vector

def load_vector(self):
"""Returns a large Nepali word embedding. Creator: https://github.com/rabindralamsal/Word2Vec-Embeddings-for-Nepali-Language

Returns:
[keyedVectors] -- [Custom Nepali word Embedding]
"""
download = Download()
download.download_file_from_google_drive('1KnAZ2Eeqwz3S9VrAuzTLWysAaRB6Ch7e', 'nepali_embeddings_word2vec.txt')
download.download_file_from_google_drive(
'1KnAZ2Eeqwz3S9VrAuzTLWysAaRB6Ch7e', 'nepali_embeddings_word2vec.txt')
word_vector = KeyedVectors.load('nepali_embeddings_word2vec.txt')
os.remove("nepali_embeddings_word2vec.txt")

return word_vector

def __str__(self):
Expand Down
File renamed without changes.
Empty file.
File renamed without changes.
7 changes: 3 additions & 4 deletions Nepali_nlp/n_gram.py → nepali_nlp/n_gram.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from nepali_nlp.nepali_tokenizer import Tokenizer
import sys
sys.path.append('..')

from .Nepali_tokenizer import Tokenizer

class NgramGenerator(Tokenizer):

Expand All @@ -10,11 +9,11 @@ def __init__(self, n_gram):

def generate_n_gram(self, token_text):
"""This function generate ngram token list

Arguments:
sentence {list} -- list of tokenized text
n_gram {int} -- value of n-gram

Returns:
list -- multi array list of n-gram tokenized words
"""
Expand Down
File renamed without changes.
36 changes: 36 additions & 0 deletions nepali_nlp/news_latest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from nepali_nlp.summarization import Summarize
from nepali_nlp.news_scrap import extract_news
from nepali_nlp.utils import top_news_link
import sys


class UpdateNews:
def __init__(self):
pass

def show_latest(self, word_vec, portal='onlinekhabar', number_of_news=5):
"""This function returns tile of latest news, link for latest news and Summarize news

Keyword Arguments:
portal {str} -- [news portal sites; for now either 'onlinekhabar' or 'ekantipur'] (default: {'onlinekhabar'})
number_of_news {int} -- [Number of top trending news] (default: {5})

Returns:
[tuple] -- [tuple of (titles, links, news_summaries)]
"""
assert portal in [
'onlinekhabar', 'ekantipur'], "we currently support only ekantipur and onlinekhabar"
extracted_link = top_news_link(portal=portal, top_n=number_of_news)
summary_ = Summarize()
links = []
titles = []
news_summaries = []
for link in extracted_link:
title, text = extract_news(link)
summary_news = summary_.show_summary(
word_vec, text, length_sentence_predict=7)
links.append(link)
titles.append(title)
news_summaries.append(summary_news)

return (titles, links, news_summaries)
Loading