-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
63 changed files
with
2,145 additions
and
1,245 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,100 @@ | ||
from transformers import pipeline | ||
from dateutil import parser | ||
from fastapi import FastAPI | ||
from pydantic import BaseModel | ||
import re | ||
|
||
from translated_events import event_list | ||
from school_events_extractor import SchoolEventsExtractor | ||
from translated_events import event_list_kr, event_list_en, event_list_km, event_list_th, event_list_zh, event_list_ja, event_list_vi | ||
|
||
|
||
class Request(BaseModel): | ||
language: str | ||
kr_text: str | ||
en_text: str | ||
translated_text: str | ||
|
||
|
||
app = FastAPI() | ||
qa_pipeline = pipeline( | ||
"question-answering", | ||
model="deepset/bert-large-uncased-whole-word-masking-squad2", | ||
tokenizer="deepset/bert-large-uncased-whole-word-masking-squad2" | ||
) | ||
events = [] | ||
NOT_FOUNDED = -1 | ||
|
||
|
||
@app.post("/event-dict") | ||
def make_event_dict_by_lang(lang): | ||
if lang == 'en': | ||
return dict(zip(event_list_en, event_list_en)) | ||
elif lang == 'kr': | ||
return dict(zip(event_list_en, event_list_kr)) | ||
elif lang == 'th': | ||
return dict(zip(event_list_en, event_list_th)) | ||
elif lang == 'km': | ||
return dict(zip(event_list_en, event_list_km)) | ||
elif lang == 'vi': | ||
return dict(zip(event_list_en, event_list_vi)) | ||
elif lang == 'ja': | ||
return dict(zip(event_list_en, event_list_ja)) | ||
elif lang == 'zh': | ||
return dict(zip(event_list_en, event_list_zh)) | ||
|
||
|
||
def ask_model(context, question): | ||
response = qa_pipeline({ | ||
'context': context, | ||
'question': question | ||
}) | ||
answer = response["answer"] | ||
return answer | ||
|
||
|
||
def delete_weekday_from_datetext(date): | ||
# delete day information inside brackets e.g.(Mon) | ||
if date.find("(") != NOT_FOUNDED and date.find(")") != NOT_FOUNDED: | ||
open_index = date.find("(") | ||
close_index = date.find(")") | ||
date = date[:open_index] + date[close_index + 2:] | ||
return date | ||
|
||
|
||
def has_alpha(text): | ||
reg = re.compile(r'[a-zA-Z]') | ||
if reg.match(text): | ||
return True | ||
return False | ||
|
||
|
||
@app.post("/event") | ||
async def root(request: Request): | ||
events_extractor = SchoolEventsExtractor(request.kr_text, request.translated_text, event_list, request.language) | ||
events_extractor.find_all_events_starting_index() | ||
en_to_tr_event_dict = make_event_dict_by_lang(request.language) | ||
for event in event_list_en: | ||
if request.en_text.find(event) != NOT_FOUNDED: | ||
question = 'When is the {}?'.format(event) | ||
answer = ask_model(request.en_text, question) | ||
extracted_date = delete_weekday_from_datetext(answer) | ||
print(extracted_date) | ||
try: | ||
datetime = parser.parse(extracted_date) | ||
datetime = str(datetime.year) + "-" + str(datetime.month).zfill(2) + "-" + str(datetime.day).zfill(2) | ||
except: | ||
return {"status": 200, "message": "no date information"} | ||
|
||
translated_event = en_to_tr_event_dict[event] | ||
translated_event_start_index = request.translated_text.lower().find(translated_event.lower()) | ||
translated_event_end_index = translated_event_start_index + len(translated_event) | ||
|
||
if events_extractor.get_number_of_events_in_kr_text() < 1: | ||
return {"status": 200, "message": "no events"} | ||
if translated_event_start_index == NOT_FOUNDED: | ||
return {"status": 200, "message": "translation error"} | ||
|
||
events_extractor.find_all_dates_from_korean_text() | ||
events_extractor.match_dates_with_events_and_save() | ||
result = { | ||
"event": en_to_tr_event_dict[event], | ||
"s_index": translated_event_start_index, | ||
"e_index": translated_event_end_index, | ||
"date": datetime | ||
} | ||
|
||
return {'status': 200, 'body': events_extractor.results} | ||
events.append(result) | ||
notice_title = ask_model(request.en_text, "What is the main school event?") | ||
return {"status": 200, "body": {"title": notice_title, "events": events}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from fastapi import FastAPI | ||
from pydantic import BaseModel | ||
|
||
from translated_events import event_list | ||
from school_events_extractor import SchoolEventsExtractor | ||
|
||
|
||
class Request(BaseModel): | ||
language: str | ||
kr_text: str | ||
translated_text: str | ||
|
||
|
||
app = FastAPI() | ||
|
||
|
||
@app.post("/event-dict") | ||
async def root(request: Request): | ||
events_extractor = SchoolEventsExtractor(request.kr_text, request.translated_text, event_list, request.language) | ||
events_extractor.find_all_events_starting_index() | ||
|
||
if events_extractor.get_number_of_events_in_kr_text() < 1: | ||
return {"status": 200, "message": "no events"} | ||
|
||
events_extractor.find_all_dates_from_korean_text() | ||
events_extractor.match_dates_with_events_and_save() | ||
|
||
return {'status': 200, 'body': events_extractor.results} |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,141 @@ | ||
anyio==3.5.0 | ||
asgiref==3.5.0 | ||
absl-py==1.0.0 | ||
aiohttp==3.8.1 | ||
aiosignal==1.2.0 | ||
altair==4.1.0 | ||
anyio==3.6.1 | ||
appnope==0.1.2 | ||
argon2-cffi==21.3.0 | ||
argon2-cffi-bindings==21.2.0 | ||
asgiref==3.5.2 | ||
astor==0.8.1 | ||
asttokens==2.0.5 | ||
async-timeout==4.0.2 | ||
attrs==21.4.0 | ||
backcall==0.2.0 | ||
backports.zoneinfo==0.2.1 | ||
base58==2.1.1 | ||
bleach==4.1.0 | ||
blinker==1.4 | ||
boto3==1.21.15 | ||
botocore==1.24.15 | ||
cachetools==5.0.0 | ||
certifi==2021.10.8 | ||
cffi==1.15.0 | ||
charset-normalizer==2.0.12 | ||
click==8.0.4 | ||
fastapi==0.75.0 | ||
debugpy==1.5.1 | ||
decorator==5.1.1 | ||
defusedxml==0.7.1 | ||
entrypoints==0.4 | ||
executing==0.8.3 | ||
fastapi==0.78.0 | ||
filelock==3.6.0 | ||
frozenlist==1.3.0 | ||
fsspec==2022.2.0 | ||
future==0.18.2 | ||
gitdb==4.0.9 | ||
GitPython==3.1.27 | ||
google-auth==2.6.0 | ||
google-auth-oauthlib==0.4.6 | ||
grpcio==1.44.0 | ||
gunicorn==20.1.0 | ||
h11==0.13.0 | ||
httptools==0.4.0 | ||
huggingface-hub==0.4.0 | ||
idna==3.3 | ||
pydantic==1.9.0 | ||
python-dotenv==0.19.2 | ||
importlib-metadata==4.11.2 | ||
importlib-resources==5.4.0 | ||
ipykernel==6.9.1 | ||
ipython==8.1.1 | ||
ipython-genutils==0.2.0 | ||
ipywidgets==7.6.5 | ||
jedi==0.18.1 | ||
Jinja2==3.0.3 | ||
jmespath==0.10.0 | ||
joblib==1.1.0 | ||
jsonschema==4.4.0 | ||
jupyter-client==7.1.2 | ||
jupyter-core==4.9.2 | ||
jupyterlab-pygments==0.1.2 | ||
jupyterlab-widgets==1.0.2 | ||
kobart @ git+https://github.com/SKT-AI/KoBART@41d7b5e225b9678bed870c5f9f4584492f04305d | ||
Markdown==3.3.6 | ||
MarkupSafe==2.1.0 | ||
matplotlib-inline==0.1.3 | ||
mistune==0.8.4 | ||
multidict==6.0.2 | ||
nbclient==0.5.12 | ||
nbconvert==6.4.2 | ||
nbformat==5.1.3 | ||
nest-asyncio==1.5.4 | ||
notebook==6.4.8 | ||
numpy==1.22.3 | ||
oauthlib==3.2.0 | ||
packaging==21.3 | ||
pandas==1.4.1 | ||
pandocfilters==1.5.0 | ||
parso==0.8.3 | ||
pexpect==4.8.0 | ||
pickleshare==0.7.5 | ||
Pillow==9.0.1 | ||
prometheus-client==0.13.1 | ||
prompt-toolkit==3.0.28 | ||
protobuf==3.19.4 | ||
ptyprocess==0.7.0 | ||
pure-eval==0.2.2 | ||
pyarrow==7.0.0 | ||
pyasn1==0.4.8 | ||
pyasn1-modules==0.2.8 | ||
pycparser==2.21 | ||
pydantic==1.9.1 | ||
pydeck==0.7.1 | ||
pyDeprecate==0.3.1 | ||
Pygments==2.11.2 | ||
Pympler==1.0.1 | ||
pyparsing==3.0.7 | ||
pyrsistent==0.18.1 | ||
python-dateutil==2.8.2 | ||
pytorch-lightning==1.2.1 | ||
pytz==2021.3 | ||
pytz-deprecation-shim==0.1.0.post0 | ||
PyYAML==6.0 | ||
pyzmq==22.3.0 | ||
regex==2022.3.2 | ||
requests==2.27.1 | ||
requests-oauthlib==1.3.1 | ||
rsa==4.8 | ||
s3transfer==0.5.2 | ||
sacremoses==0.0.47 | ||
semver==2.13.0 | ||
Send2Trash==1.8.0 | ||
six==1.16.0 | ||
smmap==5.0.0 | ||
sniffio==1.2.0 | ||
starlette==0.17.1 | ||
stack-data==0.2.0 | ||
starlette==0.19.1 | ||
streamlit==1.7.0 | ||
tensorboard==2.8.0 | ||
tensorboard-data-server==0.6.1 | ||
tensorboard-plugin-wit==1.8.1 | ||
terminado==0.13.3 | ||
testpath==0.6.0 | ||
tokenizers==0.10.3 | ||
toml==0.10.2 | ||
toolz==0.11.2 | ||
torch==1.7.1 | ||
torchmetrics==0.7.2 | ||
tornado==6.1 | ||
tqdm==4.63.0 | ||
traitlets==5.1.1 | ||
transformers==4.3.3 | ||
typing-extensions==4.1.1 | ||
tzdata==2021.5 | ||
tzlocal==4.1 | ||
urllib3==1.26.8 | ||
uvicorn==0.17.6 | ||
uvloop==0.16.0 | ||
watchgod==0.8.1 | ||
websockets==10.2 | ||
validators==0.18.2 | ||
wcwidth==0.2.5 | ||
webencodings==0.5.1 | ||
Werkzeug==2.0.3 | ||
widgetsnbextension==3.5.2 | ||
yarl==1.7.2 | ||
zipp==3.7.0 |
Oops, something went wrong.