Skip to content

Commit

Permalink
Merge pull request #833 from PyThaiNLP/add-ancient
Browse files Browse the repository at this point in the history
Add pythainlp.ancient
  • Loading branch information
wannaphong authored Aug 14, 2023
2 parents c7470dc + 47d5189 commit d1b2b5c
Show file tree
Hide file tree
Showing 15 changed files with 531 additions and 90 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/macos-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@ jobs:
python -m pip install --upgrade pip
pip uninstall --y pythainlp
pip install --no-deps fastai==1.0.61
pip install PyYAML attacut emoji epitran gensim nltk numpy pandas sacremoses sentencepiece ssg bpemb transformers sefr_cut phunspell spylls symspellpy tltk oskut nlpo3 onnxruntime thai_nner wunsen spacy_thai ufal.chu-liu-edmonds
pip install -e .
conda install -c conda-forge icu
conda install -c conda-forge pyicu
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt
pip install .[full]
python -m nltk.downloader omw-1.4
python -m unittest discover
if: matrix.os == 'self-hosted'
Expand All @@ -73,10 +75,11 @@ jobs:
pip install pytest coverage coveralls
conda install -c conda-forge icu
conda install -c conda-forge pyicu
if [ -f docker_requirements.txt ]; then SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt; fi
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt
pip install deepcut tltk
pip install .[full]
python -m nltk.downloader omw-1.4
python -m pip cache purge
if: matrix.os != 'self-hosted'
- name: Test
shell: bash -l {0}
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install backports.zoneinfo[tzdata]
pip install pytest coverage coveralls
if [ -f docker_requirements.txt ]; then SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt; fi
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt
pip install deepcut tltk
pip install .[full]
python -m nltk.downloader omw-1.4
python -m pip install spacy deepcut tltk
python -m pip cache purge
- name: Test
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down
4 changes: 2 additions & 2 deletions docker_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ fairseq==0.10.2
pyicu==2.8
deepcut==0.7.0.0
h5py==3.1.0
tensorflow==2.9.3
tensorflow==2.11.1
pandas==1.4.*
tltk==1.6.8
OSKut==1.3
Expand All @@ -37,4 +37,4 @@ ufal.chu-liu-edmonds==1.0.2
wtpsplit==1.0.1
fastcoref==2.1.6
panphon==0.20.0
sentence-transformers==2.2.2
sentence-transformers==2.2.2
9 changes: 9 additions & 0 deletions docs/api/ancient.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.. currentmodule:: pythainlp.ancient

pythainlp.ancient
=================

Modules
-------

.. autofunction:: aksonhan_to_current
322 changes: 322 additions & 0 deletions notebooks/test-aksonhan.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pythainlp.ancient import aksonhan_to_current"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'จัก'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"จกก\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'บรร'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"บรร\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ดั่ง'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"ดง่ง\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'นั้น'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"นน้น\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ขัด'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"ขดด\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ตรัส'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"ตรสส\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ขับ'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"ขบบ\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'วัน'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"วนน\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'หลัง'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"หลงง\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'บังคับ'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"บงงคบบ\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'สรรเพชญ'"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"สรรเพชญ\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'กก'"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"กก\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ก'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ถนน'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"aksonhan_to_current(\"ถนน\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit d1b2b5c

Please sign in to comment.