Skip to content

Commit

Permalink
In progress
Browse files Browse the repository at this point in the history
  • Loading branch information
Shashi Narayan committed May 3, 2016
1 parent 38abd33 commit 32e1ebf
Show file tree
Hide file tree
Showing 28 changed files with 4,293 additions and 0 deletions.
599 changes: 599 additions & 0 deletions source/boxer_graph_module.py

Large diffs are not rendered by default.

Binary file added source/boxer_graph_module.pyc
Binary file not shown.
591 changes: 591 additions & 0 deletions source/boxer_graph_module.py~

Large diffs are not rendered by default.

309 changes: 309 additions & 0 deletions source/explore_training_graph.py

Large diffs are not rendered by default.

Binary file added source/explore_training_graph.pyc
Binary file not shown.
301 changes: 301 additions & 0 deletions source/explore_training_graph.py~

Large diffs are not rendered by default.

26 changes: 26 additions & 0 deletions source/function_select_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

#===================================================================================
#description : Methods for training graph and features exploration =
#author : Shashi Narayan, shashi.narayan(at){ed.ac.uk,loria.fr,gmail.com})=
#date : Created in 2014, Later revised in April 2016. =
#version : 0.1 =
#===================================================================================


from methods_training_graph import Method_LED, Method_OVERLAP_LED
from methods_feature_extract import Feature_Init, Feature_Nov27

def select_training_graph_method(METHOD_TRAINING_GRAPH):
return{
"method-0.99-lteq-lt": Method_OVERLAP_LED(0.99, "lteq", "lt"),
"method-0.75-lteq-lt": Method_OVERLAP_LED(0.75, "lteq", "lt"),
"method-0.5-lteq-lteq": Method_OVERLAP_LED(0.5, "lteq", "lteq"),
"method-led-lteq": Method_LED("lteq", "lteq", "lteq"),
"method-led-lt": Method_LED("lt", "lt", "lt")
}[METHOD_TRAINING_GRAPH]

def select_feature_extract_method(METHOD_FEATURE_EXTRACT):
return{
"feature-init": Feature_Init(),
"feature-Nov27": Feature_Nov27(),
}[METHOD_FEATURE_EXTRACT]
Binary file added source/function_select_methods.pyc
Binary file not shown.
18 changes: 18 additions & 0 deletions source/function_select_methods.py~
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

from methods_training_graph import Method_LED, Method_OVERLAP_LED
from methods_feature_extract import Feature_Init, Feature_Nov27

def select_training_graph_method(METHOD_TRAINING_GRAPH):
return{
"method-0.99-lteq-lt": Method_OVERLAP_LED(0.99, "lteq", "lt"),
"method-0.75-lteq-lt": Method_OVERLAP_LED(0.75, "lteq", "lt"),
"method-0.5-lteq-lteq": Method_OVERLAP_LED(0.5, "lteq", "lteq"),
"method-led-lteq": Method_LED("lteq", "lteq", "lteq"),
"method-led-lt": Method_LED("lt", "lt", "lt")
}[METHOD_TRAINING_GRAPH]

def select_feature_extract_method(METHOD_FEATURE_EXTRACT):
return{
"feature-init": Feature_Init(),
"feature-Nov27": Feature_Nov27(),
}[METHOD_FEATURE_EXTRACT]
108 changes: 108 additions & 0 deletions source/functions_configuration_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#===================================================================================
#title : functions_configuration_file.py =
#description : Prepare/READ configuration file =
#author : Shashi Narayan, shashi.narayan(at){ed.ac.uk,loria.fr,gmail.com})=
#date : Created in 2014, Later revised in April 2016. =
#version : 0.1 =
#===================================================================================

def write_config_file(config_filename, config_data_dict):
config_file = open(config_filename, "w")

config_file.write("##############################################################\n"+
"####### Discourse-Complex-Simple Congifuration File ##########\n"+
"##############################################################\n\n")

config_file.write("# Generation Information\n")
if "TRAIN-BOXER-GRAPH" in config_data_dict:
config_file.write("[TRAIN-BOXER-GRAPH]\n"+config_data_dict["TRAIN-BOXER-GRAPH"]+"\n\n")

if "TRANSFORMATION-MODEL" in config_data_dict:
config_file.write("[TRANSFORMATION-MODEL]\n"+" ".join(config_data_dict["TRANSFORMATION-MODEL"])+"\n\n")

if "MAX-SPLIT-SIZE" in config_data_dict:
config_file.write("[MAX-SPLIT-SIZE]\n"+str(config_data_dict["MAX-SPLIT-SIZE"])+"\n\n")

if "RESTRICTED-DROP-RELATION" in config_data_dict:
config_file.write("[RESTRICTED-DROP-RELATION]\n"+" ".join(config_data_dict["RESTRICTED-DROP-RELATION"])+"\n\n")

if "ALLOWED-DROP-MODIFIER" in config_data_dict:
config_file.write("[ALLOWED-DROP-MODIFIER]\n"+" ".join(config_data_dict["ALLOWED-DROP-MODIFIER"])+"\n\n")

if "METHOD-TRAINING-GRAPH" in config_data_dict:
config_file.write("[METHOD-TRAINING-GRAPH]\n"+config_data_dict["METHOD-TRAINING-GRAPH"]+"\n\n")

if "METHOD-FEATURE-EXTRACT" in config_data_dict:
config_file.write("[METHOD-FEATURE-EXTRACT]\n"+config_data_dict["METHOD-FEATURE-EXTRACT"]+"\n\n")

if "NUM-EM-ITERATION" in config_data_dict:
config_file.write("[NUM-EM-ITERATION]\n"+str(config_data_dict["NUM-EM-ITERATION"])+"\n\n")

if "LANGUAGE-MODEL" in config_data_dict:
config_file.write("[LANGUAGE-MODEL]\n"+config_data_dict["LANGUAGE-MODEL"]+"\n\n")

config_file.write("# Step-1\n")
if "TRAIN-TRAINING-GRAPH" in config_data_dict:
config_file.write("[TRAIN-TRAINING-GRAPH]\n"+config_data_dict["TRAIN-TRAINING-GRAPH"]+"\n\n")

config_file.write("# Step-2\n")
if "TRANSFORMATION-MODEL-DIR" in config_data_dict:
config_file.write("[TRANSFORMATION-MODEL-DIR]\n"+config_data_dict["TRANSFORMATION-MODEL-DIR"]+"\n\n")

config_file.write("# Step-3\n")
if "MOSES-COMPLEX-SIMPLE-DIR" in config_data_dict:
config_file.write("[MOSES-COMPLEX-SIMPLE-DIR]\n"+config_data_dict["MOSES-COMPLEX-SIMPLE-DIR"]+"\n\n")

config_file.close()


def parser_config_file(config_file):
config_data = (open(config_file, "r").read().strip()).split("\n")
config_data_dict = {}
count = 0
while count < len(config_data):
if config_data[count].startswith("["):
# Start Information
if config_data[count].strip()[1:-1] == "TRAIN-BOXER-GRAPH":
config_data_dict["TRAIN-BOXER-GRAPH"] = config_data[count+1].strip()

if config_data[count].strip()[1:-1] == "TRANSFORMATION-MODEL":
config_data_dict["TRANSFORMATION-MODEL"] = config_data[count+1].strip().split()

if config_data[count].strip()[1:-1] == "MAX-SPLIT-SIZE":
config_data_dict["MAX-SPLIT-SIZE"] = int(config_data[count+1].strip())

if config_data[count].strip()[1:-1] == "RESTRICTED-DROP-RELATION":
config_data_dict["RESTRICTED-DROP-RELATION"] = config_data[count+1].strip().split()

if config_data[count].strip()[1:-1] == "ALLOWED-DROP-MODIFIER":
config_data_dict["ALLOWED-DROP-MODIFIER"] = config_data[count+1].strip().split()

if config_data[count].strip()[1:-1] == "METHOD-TRAINING-GRAPH":
config_data_dict["METHOD-TRAINING-GRAPH"] = config_data[count+1].strip()

if config_data[count].strip()[1:-1] == "METHOD-FEATURE-EXTRACT":
config_data_dict["METHOD-FEATURE-EXTRACT"] = config_data[count+1].strip()

if config_data[count].strip()[1:-1] == "NUM-EM-ITERATION":
config_data_dict["NUM-EM-ITERATION"] = int(config_data[count+1].strip())

if config_data[count].strip()[1:-1] == "LANGUAGE-MODEL":
config_data_dict["LANGUAGE-MODEL"] = config_data[count+1].strip()

# Step 1
if config_data[count].strip()[1:-1] == "TRAIN-TRAINING-GRAPH":
config_data_dict["TRAIN-TRAINING-GRAPH"] = config_data[count+1].strip()

# Step 2
if config_data[count].strip()[1:-1] == "TRANSFORMATION-MODEL-DIR":
config_data_dict["TRANSFORMATION-MODEL-DIR"] = config_data[count+1].strip()

# Step 3
if config_data[count].strip()[1:-1] == "MOSES-COMPLEX-SIMPLE-DIR":
config_data_dict["MOSES-COMPLEX-SIMPLE-DIR"] = config_data[count+1].strip()

count += 2
else:
count += 1
return config_data_dict
Binary file added source/functions_configuration_file.pyc
Binary file not shown.
101 changes: 101 additions & 0 deletions source/functions_configuration_file.py~
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@

def write_config_file(config_filename, config_data_dict):
config_file = open(config_filename, "w")

config_file.write("##############################################################\n"+
"####### Discourse-Complex-Simple Congifuration File ##########\n"+
"##############################################################\n\n")

config_file.write("# Generation Information\n")
if "TRAIN-BOXER-GRAPH" in config_data_dict:
config_file.write("[TRAIN-BOXER-GRAPH]\n"+config_data_dict["TRAIN-BOXER-GRAPH"]+"\n\n")

if "TRANSFORMATION-MODEL" in config_data_dict:
config_file.write("[TRANSFORMATION-MODEL]\n"+" ".join(config_data_dict["TRANSFORMATION-MODEL"])+"\n\n")

if "MAX-SPLIT-SIZE" in config_data_dict:
config_file.write("[MAX-SPLIT-SIZE]\n"+str(config_data_dict["MAX-SPLIT-SIZE"])+"\n\n")

if "RESTRICTED-DROP-RELATION" in config_data_dict:
config_file.write("[RESTRICTED-DROP-RELATION]\n"+" ".join(config_data_dict["RESTRICTED-DROP-RELATION"])+"\n\n")

if "ALLOWED-DROP-MODIFIER" in config_data_dict:
config_file.write("[ALLOWED-DROP-MODIFIER]\n"+" ".join(config_data_dict["ALLOWED-DROP-MODIFIER"])+"\n\n")

if "METHOD-TRAINING-GRAPH" in config_data_dict:
config_file.write("[METHOD-TRAINING-GRAPH]\n"+config_data_dict["METHOD-TRAINING-GRAPH"]+"\n\n")

if "METHOD-FEATURE-EXTRACT" in config_data_dict:
config_file.write("[METHOD-FEATURE-EXTRACT]\n"+config_data_dict["METHOD-FEATURE-EXTRACT"]+"\n\n")

if "NUM-EM-ITERATION" in config_data_dict:
config_file.write("[NUM-EM-ITERATION]\n"+str(config_data_dict["NUM-EM-ITERATION"])+"\n\n")

if "LANGUAGE-MODEL" in config_data_dict:
config_file.write("[LANGUAGE-MODEL]\n"+config_data_dict["LANGUAGE-MODEL"]+"\n\n")

config_file.write("# Step-1\n")
if "TRAIN-TRAINING-GRAPH" in config_data_dict:
config_file.write("[TRAIN-TRAINING-GRAPH]\n"+config_data_dict["TRAIN-TRAINING-GRAPH"]+"\n\n")

config_file.write("# Step-2\n")
if "TRANSFORMATION-MODEL-DIR" in config_data_dict:
config_file.write("[TRANSFORMATION-MODEL-DIR]\n"+config_data_dict["TRANSFORMATION-MODEL-DIR"]+"\n\n")

config_file.write("# Step-3\n")
if "MOSES-COMPLEX-SIMPLE-DIR" in config_data_dict:
config_file.write("[MOSES-COMPLEX-SIMPLE-DIR]\n"+config_data_dict["MOSES-COMPLEX-SIMPLE-DIR"]+"\n\n")

config_file.close()


def parser_config_file(config_file):
config_data = (open(config_file, "r").read().strip()).split("\n")
config_data_dict = {}
count = 0
while count < len(config_data):
if config_data[count].startswith("["):
# Start Information
if config_data[count].strip()[1:-1] == "TRAIN-BOXER-GRAPH":
config_data_dict["TRAIN-BOXER-GRAPH"] = config_data[count+1].strip()

if config_data[count].strip()[1:-1] == "TRANSFORMATION-MODEL":
config_data_dict["TRANSFORMATION-MODEL"] = config_data[count+1].strip().split()

if config_data[count].strip()[1:-1] == "MAX-SPLIT-SIZE":
config_data_dict["MAX-SPLIT-SIZE"] = int(config_data[count+1].strip())

if config_data[count].strip()[1:-1] == "RESTRICTED-DROP-RELATION":
config_data_dict["RESTRICTED-DROP-RELATION"] = config_data[count+1].strip().split()

if config_data[count].strip()[1:-1] == "ALLOWED-DROP-MODIFIER":
config_data_dict["ALLOWED-DROP-MODIFIER"] = config_data[count+1].strip().split()

if config_data[count].strip()[1:-1] == "METHOD-TRAINING-GRAPH":
config_data_dict["METHOD-TRAINING-GRAPH"] = config_data[count+1].strip()

if config_data[count].strip()[1:-1] == "METHOD-FEATURE-EXTRACT":
config_data_dict["METHOD-FEATURE-EXTRACT"] = config_data[count+1].strip()

if config_data[count].strip()[1:-1] == "NUM-EM-ITERATION":
config_data_dict["NUM-EM-ITERATION"] = int(config_data[count+1].strip())

if config_data[count].strip()[1:-1] == "LANGUAGE-MODEL":
config_data_dict["LANGUAGE-MODEL"] = config_data[count+1].strip()

# Step 1
if config_data[count].strip()[1:-1] == "TRAIN-TRAINING-GRAPH":
config_data_dict["TRAIN-TRAINING-GRAPH"] = config_data[count+1].strip()

# Step 2
if config_data[count].strip()[1:-1] == "TRANSFORMATION-MODEL-DIR":
config_data_dict["TRANSFORMATION-MODEL-DIR"] = config_data[count+1].strip()

# Step 3
if config_data[count].strip()[1:-1] == "MOSES-COMPLEX-SIMPLE-DIR":
config_data_dict["MOSES-COMPLEX-SIMPLE-DIR"] = config_data[count+1].strip()

count += 2
else:
count += 1
return config_data_dict
77 changes: 77 additions & 0 deletions source/functions_prepare_elementtree_dot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python
#===================================================================================
#title : functions_prepare_elementtree_dot.py =
#description : Prepare dot file =
#author : Shashi Narayan, shashi.narayan(at){ed.ac.uk,loria.fr,gmail.com})=
#date : Created in 2014, Later revised in April 2016. =
#version : 0.1 =
#===================================================================================


import os
import xml.etree.ElementTree as ET
from xml.dom import minidom

def prettify_xml_element(element):
"""Return a pretty-printed XML string for the Element.
"""
rough_string = ET.tostring(element)
reparsed = minidom.parseString(rough_string)
prettyxml = reparsed.documentElement.toprettyxml(indent=" ")
return prettyxml.encode("utf-8")

############################### Elementary Tree ##########################################

def prepare_write_sentence_element(output_stream, sentid, main_sentence, main_sent_dict, simple_sentences, boxer_graph, training_graph):
# Creating Sentence element
sentence = ET.Element('sentence')
sentence.attrib={"id":str(sentid)}

# Writing main sentence
main = ET.SubElement(sentence, "main")
mainsent = ET.SubElement(main, "s")
mainsent.text = main_sentence
wordinfo = ET.SubElement(main, "winfo")
mainpositions = main_sent_dict.keys()
mainpositions.sort()
for position in mainpositions:
word = ET.SubElement(wordinfo, "w")
word.text = main_sent_dict[position][0]
word.attrib = {"id":str(position), "pos":main_sent_dict[position][1]}

# Writing simple sentence
simpleset = ET.SubElement(sentence, "simple-set")
for simple_sentence in simple_sentences:
simple = ET.SubElement(simpleset, "simple")
simplesent = ET.SubElement(simple, "s")
simplesent.text = simple_sentence

# Writing boxer Data : boxer_graph
boxer = boxer_graph.convert_to_elementarytree()
sentence.append(boxer)

# Writing Training Graph : training_graph
traininggraph = training_graph.convert_to_elementarytree()
sentence.append(traininggraph)

output_stream.write(prettify_xml_element(sentence))

############################ Dot - PNG File ###################################################

def run_visual_graph_creator(sentid, main_sentence, main_sent_dict, simple_sentences, boxer_graph, training_graph):
print "Creating boxer and training graphs for sentence id : "+sentid+" ..."

# Start creating boxer graph
foutput = open("/tmp/boxer-graph-"+sentid+".dot", "w")
boxer_dotstring = boxer_graph.convert_to_dotstring(sentid, main_sentence, main_sent_dict, simple_sentences)
foutput.write(boxer_dotstring)
foutput.close()
os.system("dot -Tpng /tmp/boxer-graph-"+sentid+".dot -o /tmp/boxer-graph-"+sentid+".png")


# Start creating training graph
foutput = open("/tmp/training-graph-"+sentid+".dot", "w")
train_dotstring = training_graph.convert_to_dotstring(main_sent_dict, boxer_graph)
foutput.write(train_dotstring)
foutput.close()
os.system("dot -Tpng /tmp/training-graph-"+sentid+".dot -o /tmp/training-graph-"+sentid+".png")
Binary file added source/functions_prepare_elementtree_dot.pyc
Binary file not shown.
Loading

0 comments on commit 32e1ebf

Please sign in to comment.