Skip to content

Commit

Permalink
mwptoolkit v0.0.5
Browse files Browse the repository at this point in the history
  • Loading branch information
LYH-YF committed Oct 14, 2021
1 parent 21545fe commit be5595f
Show file tree
Hide file tree
Showing 29 changed files with 1,289 additions and 3,136 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
/.vscode
/.idea
/mwptoolkit.egg-info
/dist
__pycache__
/dataset/math23k/partition.py
/dataset/ape200k/process.py
/dataset/mawps/dataprocess.py
/dataset/mawps/MAWPS_.json
/dataset/hmwp/process.py
/dataset/math23k/graph2tree_deprel_info.json
/dataset/alg514/stat_equations.py
preprocess.py
pos_info.json
pos_info_.json
span_level_deprel_tree_info.json
deprel_tree_info.json
*.pth
.pypirc
make.bat
Makefile
run_setup.sh
/pretrain/gpt2_cn
/pretrain/gpt2_en
*.log
Expand Down
1 change: 1 addition & 0 deletions PYPI.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ MWPToolkit is a PyTorch-based toolkit for Math Word Problem(MWP) solving. It is
* **Highly modularized framework**. MWP toolkit is designed with highly reused modules and provides convenient interfaces for users. Specifically, data preprocessor, data loader, encoder, decoder and evaluator form the running procedure. Each module could be developed and extended independently.

## News
* **Fix some bugs.**

2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Our framework has the following architecture. You could utilize our toolkit to e

## News

* **Fix some bugs.**

## Characteristics

* **Unification and Modularization**. We decouple solvers with different model architectures into highly modularized, reusable components and integrate them in a unified framework, which includes data, model, evaluation modules. It is convenient for you to study MWPs at a conceptual level and compare different models fairly.
Expand Down
5 changes: 3 additions & 2 deletions docs/_static/cmd.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0" />

<title>mwptoolkit.config.configuration &mdash; MWPToolkit 0.0.4 documentation</title>
<title>command line &mdash; MWPToolkit 0.0.5 documentation</title>



Expand Down Expand Up @@ -73,7 +73,7 @@
cmd_line = cmd_line+"--dataset="+dataset+" ";

var single_dataset=new Array("math23k","asdiv-a","mawps-single","mawps_asdiv-a_svamp");
var multi_dataset=new Array("draw","mawps","hmwp");
var multi_dataset=new Array("draw","mawps","hmwp","alg514");
if (single_dataset.includes(dataset)){
cmd_line = cmd_line+"--task_type="+"single_equation ";
}
Expand Down Expand Up @@ -316,6 +316,7 @@ <h1>command line<a class="headerlink" href="#cmd.html" title="Permalink to this
<td>
<input id="dataset" style="width: 100%; height: 100%" name="dataset" type="text" list="datasetlist" />
<datalist id="datasetlist">
<option value="alg514" />
<option value="asdiv-a" />
<option value="draw" />
<option value="hmwp" />
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
author = "''"

# The full version, including alpha/beta/rc tags
release = '0.0.4'
release = '0.0.5'


# -- General configuration ---------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion mwptoolkit/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from mwptoolkit.config import configuration
from mwptoolkit.config import configuration
from mwptoolkit.config.configuration import Config
3 changes: 2 additions & 1 deletion mwptoolkit/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from mwptoolkit.data import dataloader,dataset,utils
from mwptoolkit.data import dataloader,dataset,utils

10 changes: 9 additions & 1 deletion mwptoolkit/data/dataloader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from mwptoolkit.data.dataloader import abstract_dataloader,dataloader_ept,dataloader_multiencdec,pretrain_dataloader,single_equation_dataloader,multi_equation_dataloader,template_dataloader
from mwptoolkit.data.dataloader import abstract_dataloader,dataloader_ept,dataloader_multiencdec,pretrain_dataloader,single_equation_dataloader,multi_equation_dataloader,template_dataloader

from mwptoolkit.data.dataloader.abstract_dataloader import AbstractDataLoader
from mwptoolkit.data.dataloader.single_equation_dataloader import SingleEquationDataLoader
from mwptoolkit.data.dataloader.multi_equation_dataloader import MultiEquationDataLoader
from mwptoolkit.data.dataloader.template_dataloader import TemplateDataLoader
from mwptoolkit.data.dataloader.dataloader_ept import DataLoaderEPT
from mwptoolkit.data.dataloader.dataloader_multiencdec import DataLoaderMultiEncDec
from mwptoolkit.data.dataloader.pretrain_dataloader import PretrainDataLoader
9 changes: 2 additions & 7 deletions mwptoolkit/data/dataloader/multi_equation_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,8 @@ def __init__(self, config, dataset):
self.validset_nums = len(dataset.validset)
self.testset_nums = len(dataset.testset)

if config["pretrained_model_path"]:
self.pretrained_tokenzier = AutoTokenizer.from_pretrained(config["pretrained_model_path"])
if config["model"].lower() in ["ept"]:
self.pretrained_tokenzier.add_special_tokens({'additional_special_tokens': ['[N]']})
else:
self.in_pad_token = dataset.in_word2idx[SpecialTokens.PAD_TOKEN]
self.in_unk_token = dataset.in_word2idx[SpecialTokens.UNK_TOKEN]
self.in_pad_token = dataset.in_word2idx[SpecialTokens.PAD_TOKEN]
self.in_unk_token = dataset.in_word2idx[SpecialTokens.UNK_TOKEN]

if self.symbol_for_tree or self.equation_fix == FixType.MultiWayTree:
self.out_pad_token = self.in_pad_token
Expand Down
12 changes: 7 additions & 5 deletions mwptoolkit/data/dataloader/pretrain_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,15 @@ def load_batch(self,batch_data):
num_stack_batch = []

group_nums_batch = []
for data in batch_data:
data['question_']=self.dataset.tokenizer.tokenize(' '.join(data["question"]))
batch_data=sorted(batch_data,key=lambda x:len(x['question_']),reverse=True)
# for data in batch_data:
# data['question_']=self.dataset.tokenizer.tokenize(' '.join(data["question"]))
#batch_data=sorted(batch_data,key=lambda x:len(x['question_']),reverse=True)
batch_data = sorted(batch_data, key=lambda x: len(x['question']), reverse=True)
for data in batch_data:
ques_tensor = []
equ_tensor = []
temp_tensor = []
sentence = data["question_"]
sentence = data["question"]
equation = data["equation"]
template = data["template"]

Expand Down Expand Up @@ -281,7 +282,8 @@ def load_batch(self,batch_data):
def _word2idx(self, sentence):
sentence_idx = []

sentence_idx = self.dataset.tokenizer.convert_tokens_to_ids(sentence)
# sentence_idx = self.dataset.tokenizer.convert_tokens_to_ids(sentence)
sentence_idx = self.dataset.tokenizer.encode(sentence,add_special_token=False)

return sentence_idx

Expand Down
9 changes: 2 additions & 7 deletions mwptoolkit/data/dataloader/single_equation_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,8 @@ def __init__(self, config, dataset):
self.validset_nums = len(dataset.validset)
self.testset_nums = len(dataset.testset)

if config["pretrained_model"]:
self.pretrained_tokenzier = AutoTokenizer.from_pretrained(config["pretrained_model"])
if config["model"].lower() in ["ept"]:
self.pretrained_tokenzier.add_special_tokens({'additional_special_tokens': ['[N]']})
else:
self.in_pad_token = dataset.in_word2idx[SpecialTokens.PAD_TOKEN]
self.in_unk_token = dataset.in_word2idx[SpecialTokens.UNK_TOKEN]
self.in_pad_token = dataset.in_word2idx[SpecialTokens.PAD_TOKEN]
self.in_unk_token = dataset.in_word2idx[SpecialTokens.UNK_TOKEN]

if self.symbol_for_tree or self.equation_fix == FixType.MultiWayTree:
self.out_pad_token = self.in_pad_token
Expand Down
10 changes: 9 additions & 1 deletion mwptoolkit/data/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from mwptoolkit.data.dataset import abstract_dataset,dataset_ept,dataset_multiencdec,pretrain_dataset,single_equation_dataset,multi_equation_dataset,template_dataset
from mwptoolkit.data.dataset import abstract_dataset,dataset_ept,dataset_multiencdec,pretrain_dataset,single_equation_dataset,multi_equation_dataset,template_dataset

from mwptoolkit.data.dataset.abstract_dataset import AbstractDataset
from mwptoolkit.data.dataset.single_equation_dataset import SingleEquationDataset
from mwptoolkit.data.dataset.multi_equation_dataset import MultiEquationDataset
from mwptoolkit.data.dataset.template_dataset import TemplateDataset
from mwptoolkit.data.dataset.dataset_ept import DatasetEPT
from mwptoolkit.data.dataset.dataset_multiencdec import DatasetMultiEncDec
from mwptoolkit.data.dataset.pretrain_dataset import PretrainDataset
4 changes: 3 additions & 1 deletion mwptoolkit/evaluate/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from mwptoolkit.evaluate import evaluator
from mwptoolkit.evaluate import evaluator

from mwptoolkit.evaluate.evaluator import PrefixEvaluator,PostfixEvaluator,InfixEvaluator,MultiWayTreeEvaluator,MultiEncDecEvaluator
Loading

0 comments on commit be5595f

Please sign in to comment.