Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete erroneous sentences #7

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions relation/Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ void Dictionary::update(const Parameters& params, DocumentCollection& collection
sentence->calculate_shortest_paths();
sentence->calculate_dep_tree(this);
}
for(auto it = doc->sentences().begin(); it != doc->sentences().end();){
if(!((*it)->is_correct())){
it = doc->sentences().erase(it);
}else{
++it;
}
}
}
}

Expand All @@ -154,6 +161,13 @@ void Dictionary::apply(Document& doc) const{
sentence->calculate_shortest_paths();
sentence->calculate_dep_tree(this);
}
for(auto it = doc.sentences().begin(); it != doc.sentences().end();){
if(!((*it)->is_correct())){
it = doc.sentences().erase(it);
}else{
++it;
}
}
}

void Dictionary::apply(DocumentCollection& collection) const{
Expand Down Expand Up @@ -414,6 +428,7 @@ void Document::read_parse(const string& file, const ParseParameters& parse) {
}
while(getline(ifs, line)){
if(line == "")continue;
std::cerr << line << std::endl;//added by yoshida
vector<string> annotations;
split(annotations, line, bind2nd(equal_to<char>(), '\t'));
int start = atoi(annotations[0].c_str());
Expand Down Expand Up @@ -924,7 +939,9 @@ void Sentence::calculate_dep_tree(const Dictionary* dict){
//int max_child = 0;
for(int idx = 0;idx < nwords;++idx){
if(dep_tree_.node(idx)->parent() < 0){
dep_tree_.set_root(dep_tree_.node(idx));
if(!dep_tree_.set_root(dep_tree_.node(idx))){
is_correct(false);
}
}
// if(max_child < dep_tree_.node(idx)->children().size()){
// max_child = dep_tree_.node(idx)->children().size();
Expand All @@ -933,7 +950,10 @@ void Sentence::calculate_dep_tree(const Dictionary* dict){
//if(dep_tree_.root() != nullptr){
// cerr << "id:" << doc().id() << ":" << id_ << endl;
//}
assert(dep_tree_.root() != nullptr);
if(dep_tree_.root() == nullptr){
cerr << "root is null" << endl;
is_correct(false);
}
}

} /* namespace coin */
14 changes: 12 additions & 2 deletions relation/Document.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,14 @@ class Tree{
TreeNode *node(int idx){
return nodes_[idx];
}
void set_root(TreeNode *root){
bool set_root(TreeNode *root){
if(root_ != nullptr){
std::cerr << "root is already set" << endl;
return false;
}
assert(root_ == nullptr);
root_ = root;
return true;
}
const TreeNode *root() const{
return root_;
Expand All @@ -344,10 +346,11 @@ class Sentence: public Node{
vector<vector<std::pair<int, int> > > paths_;
unordered_set<string> term_ids_;
Tree dep_tree_;
bool is_correct_;
unordered_map<string, unordered_map<string, Constituent*>> nodes_;
public:
Sentence(int start, int end, string id, const Document& doc):
Node(start, end), doc_(doc), id_(id), missing_terms_(0), missing_rels_(0){}
Node(start, end), doc_(doc), id_(id), missing_terms_(0), missing_rels_(0), is_correct_(true){}
virtual ~Sentence();
void add(Word* word){
words_.push_back(word);
Expand Down Expand Up @@ -392,6 +395,12 @@ class Sentence: public Node{
bool contains_term(const string& term_id){
return term_ids_.find(term_id) != term_ids_.end();
}
bool is_correct() const{
return is_correct_;
}
void is_correct(bool correct){
is_correct_ = correct;
}
const vector<Word*>& words() const {
return words_;
}
Expand Down Expand Up @@ -450,6 +459,7 @@ class Document {
}
string text(int start, int len) const{
if(text_->length() < start + len){
std::cerr << convert(*text_) << " " << text_->length() << " " << start << " " << len << std::endl; //added by yoshida
std::cerr << "index is larger than text size." << std::endl;
}
assert(text_->length() >= start + len);
Expand Down
2 changes: 2 additions & 0 deletions relation/RelationExtraction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ int main(int argc, char **argv){
if(exists(p) && is_directory(p)){
for(directory_iterator it(p); it != directory_iterator(); ++it){
if(is_regular_file(*it) && ends_with(it->path().string(), params.text_ext())){

std::cerr << it->path().string() << std::endl; //added by yoshida
Document doc(params, it->path().string().substr(0, it->path().string().length() - params.text_ext().length()));
dict.apply(doc);
model.output(doc.tables());
Expand Down