Skip to content

Commit

Permalink
Merge pull request #67 from kakao/feature/64
Browse files Browse the repository at this point in the history
Feature/64
  • Loading branch information
hubert-lee authored Jun 3, 2019
2 parents d8514f6 + 7629c1f commit 61bea08
Show file tree
Hide file tree
Showing 49 changed files with 1,487 additions and 1,352 deletions.
4 changes: 2 additions & 2 deletions rsc/bin/compile_errpatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from typing import Dict, List, Tuple

from khaiii.resource.char_align import Aligner, align_patch
from khaiii.resource.resource import load_restore_dic, load_vocab_out
from khaiii.resource.resource import load_vocab_out, parse_restore_dic
from khaiii.resource.morphs import Morph, ParseError, mix_char_tag
from khaiii.resource.trie import Trie

Expand Down Expand Up @@ -221,7 +221,7 @@ def run(args: Namespace):
args: program arguments
"""
aligner = Aligner(args.rsc_src)
restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src))
restore_dic = parse_restore_dic('{}/restore.dic'.format(args.rsc_src))
if not restore_dic:
sys.exit(1)
vocab_out = load_vocab_out(args.rsc_src)
Expand Down
4 changes: 2 additions & 2 deletions rsc/bin/compile_preanal.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from khaiii.munjong import sejong_corpus
from khaiii.resource.char_align import Aligner, AlignError, align_to_tag
from khaiii.resource.morphs import Morph, ParseError
from khaiii.resource.resource import load_restore_dic, load_vocab_out
from khaiii.resource.resource import load_vocab_out, parse_restore_dic
from khaiii.resource.trie import Trie

from compile_restore import append_new_entries
Expand Down Expand Up @@ -231,7 +231,7 @@ def run(args: Namespace):
args: program arguments
"""
aligner = Aligner(args.rsc_src)
restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src))
restore_dic = parse_restore_dic('{}/restore.dic'.format(args.rsc_src))
if not restore_dic:
sys.exit(1)
restore_new = defaultdict(dict)
Expand Down
4 changes: 2 additions & 2 deletions rsc/bin/compile_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from typing import Dict

from khaiii.resource.morphs import TAG_SET
from khaiii.resource.resource import load_restore_dic, load_vocab_out
from khaiii.resource.resource import load_vocab_out, parse_restore_dic


#############
Expand Down Expand Up @@ -139,7 +139,7 @@ def run(args: Namespace):
Args:
args: program arguments
"""
restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src))
restore_dic = parse_restore_dic('{}/restore.dic'.format(args.rsc_src))
if not restore_dic:
sys.exit(1)
vocab_out = load_vocab_out(args.rsc_src)
Expand Down
30 changes: 7 additions & 23 deletions rsc/src/base.config.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,8 @@
{
"batch_grow": 10000,
"batch_size": 500,
"check_iter": 10000,
"context_len": 7,
"cutoff": 2,
"debug": false,
"embed_dim": 30,
"epoch": 104,
"gpu_num": 7,
"hidden_dim": 310,
"in_pfx": "./data/pos_tagger/munjong",
"iter_best": 4440000,
"iteration": 4440000,
"learning_rate": 3.3813919135227317e-06,
"log_dir": "./logdir",
"lr_decay": 0.9,
"model_id": "munjong.cnn.cut2.win3.emb30.lr0.001.lrd0.9.bs500.ci10000.bg10000",
"model_name": "cnn",
"out_dir": "./logdir/munjong.cnn.cut2.win3.emb30.lr0.001.lrd0.9.bs500.ci10000.bg10000",
"patience": 10,
"rsc_src": "../rsc/src",
"window": 3
}
"cutoff": 1,
"embed_dim": 35,
"hidden_dim": 320,
"model_id": "munjong.cut1.win4.sdo0.1.emb35.lr0.001.lrd0.9.bs500",
"rsc_src": "../rsc/src",
"window": 4
}
1,391 changes: 545 additions & 846 deletions rsc/src/base.errpatch.auto

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions rsc/src/base.errpatch.manual
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# 아래 엔트리는 단위테스트에 사용되는 것으로 삭제하지 마시기 바랍니다.
지저스크라이스트 지저스/NNG + 크라이스트/NNP 지저스/NNP + 크라이스트/NNP
지저스 크라이스트 지저스/NNG + _ + 크라이스트/NNP 지저스/NNP + _ + 크라이스트/NNP
지저스크라이스트 지저스크라이스/NNP + 트/NNG 지저스/NNP + 크라이스트/NNP
지저스 크라이스트 지저스/NNP + _ + 크라이스/NNP + 트/NNG 지저스/NNP + _ + 크라이스트/NNP
고타마싯다르타 | + 고타마싯다르타/NNP | + 고타마/NNP + 싯다르타/NNP
무함마드압둘라 무함마드/NNP + 압/NNG + 둘/NNP + 라/EC + | 무함마드/NNP + 압둘라/NNP + |
무함마드압둘라 무함마드압/NNP + 둘/NR + 라/NNP + | 무함마드/NNP + 압둘라/NNP + |
Binary file modified rsc/src/base.model.pickle
Binary file not shown.
30 changes: 7 additions & 23 deletions rsc/src/large.config.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,8 @@
{
"batch_grow": 10000,
"batch_size": 500,
"check_iter": 10000,
"context_len": 7,
"cutoff": 2,
"debug": false,
"embed_dim": 150,
"epoch": 84,
"gpu_num": 7,
"hidden_dim": 550,
"in_pfx": "./data/pos_tagger/munjong",
"iter_best": 3770000,
"iteration": 3770000,
"learning_rate": 7.069650490151056e-06,
"log_dir": "./logdir",
"lr_decay": 0.9,
"model_id": "munjong.cnn.cut2.win3.emb150.lr0.001.lrd0.9.bs500.ci10000.bg10000",
"model_name": "cnn",
"out_dir": "./logdir/munjong.cnn.cut2.win3.emb150.lr0.001.lrd0.9.bs500.ci10000.bg10000",
"patience": 10,
"rsc_src": "../rsc/src",
"window": 3
}
"cutoff": 1,
"embed_dim": 180,
"hidden_dim": 610,
"model_id": "munjong.cut1.win4.sdo0.1.emb180.lr0.001.lrd0.9.bs500",
"rsc_src": "../rsc/src",
"window": 4
}
Loading

0 comments on commit 61bea08

Please sign in to comment.