In progress

shashiongithub · May 3, 2016 · 32e1ebf · 32e1ebf
1 parent 38abd33
commit 32e1ebf
Show file tree

Hide file tree

Showing 28 changed files with 4,293 additions and 0 deletions.
diff --git a/source/boxer_graph_module.py b/source/boxer_graph_module.py
diff --git a/source/boxer_graph_module.pyc b/source/boxer_graph_module.pyc
diff --git a/source/boxer_graph_module.py~ b/source/boxer_graph_module.py~
diff --git a/source/explore_training_graph.py b/source/explore_training_graph.py
diff --git a/source/explore_training_graph.pyc b/source/explore_training_graph.pyc
diff --git a/source/explore_training_graph.py~ b/source/explore_training_graph.py~
diff --git a/source/function_select_methods.py b/source/function_select_methods.py
@@ -0,0 +1,26 @@
+
+#===================================================================================
+#description     : Methods for training graph and features exploration             =
+#author          : Shashi Narayan, shashi.narayan(at){ed.ac.uk,loria.fr,gmail.com})=                                    
+#date            : Created in 2014, Later revised in April 2016.                   =
+#version         : 0.1                                                             =
+#===================================================================================
+
+
+from methods_training_graph import Method_LED, Method_OVERLAP_LED
+from methods_feature_extract import Feature_Init, Feature_Nov27
+
+def select_training_graph_method(METHOD_TRAINING_GRAPH):
+    return{
+	"method-0.99-lteq-lt": Method_OVERLAP_LED(0.99, "lteq", "lt"),
+        "method-0.75-lteq-lt": Method_OVERLAP_LED(0.75, "lteq", "lt"),
+        "method-0.5-lteq-lteq": Method_OVERLAP_LED(0.5, "lteq", "lteq"),
+        "method-led-lteq": Method_LED("lteq", "lteq", "lteq"),
+        "method-led-lt": Method_LED("lt", "lt", "lt")
+        }[METHOD_TRAINING_GRAPH]
+
+def select_feature_extract_method(METHOD_FEATURE_EXTRACT):
+    return{
+        "feature-init": Feature_Init(),
+        "feature-Nov27": Feature_Nov27(),
+        }[METHOD_FEATURE_EXTRACT]
diff --git a/source/function_select_methods.pyc b/source/function_select_methods.pyc
diff --git a/source/function_select_methods.py~ b/source/function_select_methods.py~
@@ -0,0 +1,18 @@
+
+from methods_training_graph import Method_LED, Method_OVERLAP_LED
+from methods_feature_extract import Feature_Init, Feature_Nov27
+
+def select_training_graph_method(METHOD_TRAINING_GRAPH):
+    return{
+	"method-0.99-lteq-lt": Method_OVERLAP_LED(0.99, "lteq", "lt"),
+        "method-0.75-lteq-lt": Method_OVERLAP_LED(0.75, "lteq", "lt"),
+        "method-0.5-lteq-lteq": Method_OVERLAP_LED(0.5, "lteq", "lteq"),
+        "method-led-lteq": Method_LED("lteq", "lteq", "lteq"),
+        "method-led-lt": Method_LED("lt", "lt", "lt")
+        }[METHOD_TRAINING_GRAPH]
+
+def select_feature_extract_method(METHOD_FEATURE_EXTRACT):
+    return{
+        "feature-init": Feature_Init(),
+        "feature-Nov27": Feature_Nov27(),
+        }[METHOD_FEATURE_EXTRACT]
diff --git a/source/functions_configuration_file.py b/source/functions_configuration_file.py
@@ -0,0 +1,108 @@
+#===================================================================================
+#title           : functions_configuration_file.py                                 =
+#description     : Prepare/READ configuration file                                 =
+#author          : Shashi Narayan, shashi.narayan(at){ed.ac.uk,loria.fr,gmail.com})=                                    
+#date            : Created in 2014, Later revised in April 2016.                   =
+#version         : 0.1                                                             =
+#===================================================================================
+
+def write_config_file(config_filename, config_data_dict):
+    config_file = open(config_filename, "w")
+
+    config_file.write("##############################################################\n"+
+                      "####### Discourse-Complex-Simple Congifuration File ##########\n"+
+                      "##############################################################\n\n")
+
+    config_file.write("# Generation Information\n")
+    if "TRAIN-BOXER-GRAPH" in config_data_dict:
+        config_file.write("[TRAIN-BOXER-GRAPH]\n"+config_data_dict["TRAIN-BOXER-GRAPH"]+"\n\n")
+
+    if "TRANSFORMATION-MODEL" in config_data_dict:
+        config_file.write("[TRANSFORMATION-MODEL]\n"+" ".join(config_data_dict["TRANSFORMATION-MODEL"])+"\n\n")
+
+    if "MAX-SPLIT-SIZE" in config_data_dict:
+        config_file.write("[MAX-SPLIT-SIZE]\n"+str(config_data_dict["MAX-SPLIT-SIZE"])+"\n\n")
+
+    if "RESTRICTED-DROP-RELATION" in config_data_dict:
+        config_file.write("[RESTRICTED-DROP-RELATION]\n"+" ".join(config_data_dict["RESTRICTED-DROP-RELATION"])+"\n\n")
+
+    if "ALLOWED-DROP-MODIFIER" in config_data_dict:
+        config_file.write("[ALLOWED-DROP-MODIFIER]\n"+" ".join(config_data_dict["ALLOWED-DROP-MODIFIER"])+"\n\n")
+
+    if "METHOD-TRAINING-GRAPH" in config_data_dict:
+        config_file.write("[METHOD-TRAINING-GRAPH]\n"+config_data_dict["METHOD-TRAINING-GRAPH"]+"\n\n")
+
+    if "METHOD-FEATURE-EXTRACT" in config_data_dict:
+        config_file.write("[METHOD-FEATURE-EXTRACT]\n"+config_data_dict["METHOD-FEATURE-EXTRACT"]+"\n\n")
+
+    if "NUM-EM-ITERATION" in config_data_dict:
+        config_file.write("[NUM-EM-ITERATION]\n"+str(config_data_dict["NUM-EM-ITERATION"])+"\n\n")
+
+    if "LANGUAGE-MODEL" in config_data_dict:
+        config_file.write("[LANGUAGE-MODEL]\n"+config_data_dict["LANGUAGE-MODEL"]+"\n\n")
+
+    config_file.write("# Step-1\n")
+    if "TRAIN-TRAINING-GRAPH" in config_data_dict:
+        config_file.write("[TRAIN-TRAINING-GRAPH]\n"+config_data_dict["TRAIN-TRAINING-GRAPH"]+"\n\n")
+
+    config_file.write("# Step-2\n")
+    if "TRANSFORMATION-MODEL-DIR" in config_data_dict:
+        config_file.write("[TRANSFORMATION-MODEL-DIR]\n"+config_data_dict["TRANSFORMATION-MODEL-DIR"]+"\n\n")
+
+    config_file.write("# Step-3\n")
+    if "MOSES-COMPLEX-SIMPLE-DIR" in config_data_dict:
+        config_file.write("[MOSES-COMPLEX-SIMPLE-DIR]\n"+config_data_dict["MOSES-COMPLEX-SIMPLE-DIR"]+"\n\n")
+
+    config_file.close()
+
+
+def parser_config_file(config_file):
+    config_data = (open(config_file, "r").read().strip()).split("\n")
+    config_data_dict = {}
+    count = 0
+    while count < len(config_data):
+        if config_data[count].startswith("["):
+            # Start Information
+            if config_data[count].strip()[1:-1] == "TRAIN-BOXER-GRAPH":
+                config_data_dict["TRAIN-BOXER-GRAPH"] = config_data[count+1].strip()
+
+            if config_data[count].strip()[1:-1] == "TRANSFORMATION-MODEL":
+                config_data_dict["TRANSFORMATION-MODEL"] = config_data[count+1].strip().split()
+
+            if config_data[count].strip()[1:-1] == "MAX-SPLIT-SIZE":
+                config_data_dict["MAX-SPLIT-SIZE"] = int(config_data[count+1].strip())
+
+            if config_data[count].strip()[1:-1] == "RESTRICTED-DROP-RELATION":
+                config_data_dict["RESTRICTED-DROP-RELATION"] = config_data[count+1].strip().split()
+
+            if config_data[count].strip()[1:-1] == "ALLOWED-DROP-MODIFIER":
+                config_data_dict["ALLOWED-DROP-MODIFIER"] = config_data[count+1].strip().split()            
+
+            if config_data[count].strip()[1:-1] == "METHOD-TRAINING-GRAPH":
+                config_data_dict["METHOD-TRAINING-GRAPH"] = config_data[count+1].strip()
+
+            if config_data[count].strip()[1:-1] == "METHOD-FEATURE-EXTRACT":
+                config_data_dict["METHOD-FEATURE-EXTRACT"] = config_data[count+1].strip()
+
+            if config_data[count].strip()[1:-1] == "NUM-EM-ITERATION":
+                config_data_dict["NUM-EM-ITERATION"] = int(config_data[count+1].strip())   
+
+            if config_data[count].strip()[1:-1] == "LANGUAGE-MODEL":
+                config_data_dict["LANGUAGE-MODEL"] = config_data[count+1].strip()
+
+            # Step 1
+            if config_data[count].strip()[1:-1] == "TRAIN-TRAINING-GRAPH":
+                config_data_dict["TRAIN-TRAINING-GRAPH"] = config_data[count+1].strip()
+
+            # Step 2
+            if config_data[count].strip()[1:-1] == "TRANSFORMATION-MODEL-DIR":
+                config_data_dict["TRANSFORMATION-MODEL-DIR"] = config_data[count+1].strip()
+
+            # Step 3
+            if config_data[count].strip()[1:-1] == "MOSES-COMPLEX-SIMPLE-DIR":
+                config_data_dict["MOSES-COMPLEX-SIMPLE-DIR"] = config_data[count+1].strip()
+
+            count += 2
+        else:
+            count += 1
+    return config_data_dict
diff --git a/source/functions_configuration_file.pyc b/source/functions_configuration_file.pyc
diff --git a/source/functions_configuration_file.py~ b/source/functions_configuration_file.py~
@@ -0,0 +1,101 @@
+
+def write_config_file(config_filename, config_data_dict):
+    config_file = open(config_filename, "w")
+
+    config_file.write("##############################################################\n"+
+                      "####### Discourse-Complex-Simple Congifuration File ##########\n"+
+                      "##############################################################\n\n")
+
+    config_file.write("# Generation Information\n")
+    if "TRAIN-BOXER-GRAPH" in config_data_dict:
+        config_file.write("[TRAIN-BOXER-GRAPH]\n"+config_data_dict["TRAIN-BOXER-GRAPH"]+"\n\n")
+
+    if "TRANSFORMATION-MODEL" in config_data_dict:
+        config_file.write("[TRANSFORMATION-MODEL]\n"+" ".join(config_data_dict["TRANSFORMATION-MODEL"])+"\n\n")
+
+    if "MAX-SPLIT-SIZE" in config_data_dict:
+        config_file.write("[MAX-SPLIT-SIZE]\n"+str(config_data_dict["MAX-SPLIT-SIZE"])+"\n\n")
+
+    if "RESTRICTED-DROP-RELATION" in config_data_dict:
+        config_file.write("[RESTRICTED-DROP-RELATION]\n"+" ".join(config_data_dict["RESTRICTED-DROP-RELATION"])+"\n\n")
+
+    if "ALLOWED-DROP-MODIFIER" in config_data_dict:
+        config_file.write("[ALLOWED-DROP-MODIFIER]\n"+" ".join(config_data_dict["ALLOWED-DROP-MODIFIER"])+"\n\n")
+
+    if "METHOD-TRAINING-GRAPH" in config_data_dict:
+        config_file.write("[METHOD-TRAINING-GRAPH]\n"+config_data_dict["METHOD-TRAINING-GRAPH"]+"\n\n")
+
+    if "METHOD-FEATURE-EXTRACT" in config_data_dict:
+        config_file.write("[METHOD-FEATURE-EXTRACT]\n"+config_data_dict["METHOD-FEATURE-EXTRACT"]+"\n\n")
+
+    if "NUM-EM-ITERATION" in config_data_dict:
+        config_file.write("[NUM-EM-ITERATION]\n"+str(config_data_dict["NUM-EM-ITERATION"])+"\n\n")
+
+    if "LANGUAGE-MODEL" in config_data_dict:
+        config_file.write("[LANGUAGE-MODEL]\n"+config_data_dict["LANGUAGE-MODEL"]+"\n\n")
+
+    config_file.write("# Step-1\n")
+    if "TRAIN-TRAINING-GRAPH" in config_data_dict:
+        config_file.write("[TRAIN-TRAINING-GRAPH]\n"+config_data_dict["TRAIN-TRAINING-GRAPH"]+"\n\n")
+
+    config_file.write("# Step-2\n")
+    if "TRANSFORMATION-MODEL-DIR" in config_data_dict:
+        config_file.write("[TRANSFORMATION-MODEL-DIR]\n"+config_data_dict["TRANSFORMATION-MODEL-DIR"]+"\n\n")
+
+    config_file.write("# Step-3\n")
+    if "MOSES-COMPLEX-SIMPLE-DIR" in config_data_dict:
+        config_file.write("[MOSES-COMPLEX-SIMPLE-DIR]\n"+config_data_dict["MOSES-COMPLEX-SIMPLE-DIR"]+"\n\n")
+
+    config_file.close()
+
+
+def parser_config_file(config_file):
+    config_data = (open(config_file, "r").read().strip()).split("\n")
+    config_data_dict = {}
+    count = 0
+    while count < len(config_data):
+        if config_data[count].startswith("["):
+            # Start Information
+            if config_data[count].strip()[1:-1] == "TRAIN-BOXER-GRAPH":
+                config_data_dict["TRAIN-BOXER-GRAPH"] = config_data[count+1].strip()
+
+            if config_data[count].strip()[1:-1] == "TRANSFORMATION-MODEL":
+                config_data_dict["TRANSFORMATION-MODEL"] = config_data[count+1].strip().split()
+
+            if config_data[count].strip()[1:-1] == "MAX-SPLIT-SIZE":
+                config_data_dict["MAX-SPLIT-SIZE"] = int(config_data[count+1].strip())
+
+            if config_data[count].strip()[1:-1] == "RESTRICTED-DROP-RELATION":
+                config_data_dict["RESTRICTED-DROP-RELATION"] = config_data[count+1].strip().split()
+
+            if config_data[count].strip()[1:-1] == "ALLOWED-DROP-MODIFIER":
+                config_data_dict["ALLOWED-DROP-MODIFIER"] = config_data[count+1].strip().split()            
+
+            if config_data[count].strip()[1:-1] == "METHOD-TRAINING-GRAPH":
+                config_data_dict["METHOD-TRAINING-GRAPH"] = config_data[count+1].strip()
+
+            if config_data[count].strip()[1:-1] == "METHOD-FEATURE-EXTRACT":
+                config_data_dict["METHOD-FEATURE-EXTRACT"] = config_data[count+1].strip()
+
+            if config_data[count].strip()[1:-1] == "NUM-EM-ITERATION":
+                config_data_dict["NUM-EM-ITERATION"] = int(config_data[count+1].strip())   
+
+            if config_data[count].strip()[1:-1] == "LANGUAGE-MODEL":
+                config_data_dict["LANGUAGE-MODEL"] = config_data[count+1].strip()
+
+            # Step 1
+            if config_data[count].strip()[1:-1] == "TRAIN-TRAINING-GRAPH":
+                config_data_dict["TRAIN-TRAINING-GRAPH"] = config_data[count+1].strip()
+
+            # Step 2
+            if config_data[count].strip()[1:-1] == "TRANSFORMATION-MODEL-DIR":
+                config_data_dict["TRANSFORMATION-MODEL-DIR"] = config_data[count+1].strip()
+
+            # Step 3
+            if config_data[count].strip()[1:-1] == "MOSES-COMPLEX-SIMPLE-DIR":
+                config_data_dict["MOSES-COMPLEX-SIMPLE-DIR"] = config_data[count+1].strip()
+
+            count += 2
+        else:
+            count += 1
+    return config_data_dict
diff --git a/source/functions_prepare_elementtree_dot.py b/source/functions_prepare_elementtree_dot.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+#===================================================================================
+#title           : functions_prepare_elementtree_dot.py                            =
+#description     : Prepare dot file                                                =
+#author          : Shashi Narayan, shashi.narayan(at){ed.ac.uk,loria.fr,gmail.com})=                                    
+#date            : Created in 2014, Later revised in April 2016.                   =
+#version         : 0.1                                                             =
+#===================================================================================
+
+
+import os
+import xml.etree.ElementTree as ET
+from xml.dom import minidom
+
+def prettify_xml_element(element):
+    """Return a pretty-printed XML string for the Element.
+    """
+    rough_string = ET.tostring(element)
+    reparsed = minidom.parseString(rough_string)
+    prettyxml = reparsed.documentElement.toprettyxml(indent=" ")
+    return prettyxml.encode("utf-8")
+
+############################### Elementary Tree ##########################################
+
+def prepare_write_sentence_element(output_stream, sentid, main_sentence, main_sent_dict, simple_sentences, boxer_graph, training_graph):
+    # Creating Sentence element
+    sentence = ET.Element('sentence')
+    sentence.attrib={"id":str(sentid)}
+
+    # Writing main sentence
+    main = ET.SubElement(sentence, "main")
+    mainsent = ET.SubElement(main, "s")
+    mainsent.text = main_sentence
+    wordinfo = ET.SubElement(main, "winfo")
+    mainpositions  = main_sent_dict.keys()
+    mainpositions.sort()
+    for position in mainpositions:
+        word = ET.SubElement(wordinfo, "w")
+        word.text = main_sent_dict[position][0]
+        word.attrib = {"id":str(position), "pos":main_sent_dict[position][1]}
+
+    # Writing simple sentence
+    simpleset = ET.SubElement(sentence, "simple-set")
+    for simple_sentence in simple_sentences:
+        simple = ET.SubElement(simpleset, "simple")
+        simplesent = ET.SubElement(simple, "s")
+        simplesent.text = simple_sentence
+
+    # Writing boxer Data : boxer_graph
+    boxer = boxer_graph.convert_to_elementarytree()
+    sentence.append(boxer)
+
+    # Writing Training Graph : training_graph
+    traininggraph = training_graph.convert_to_elementarytree()
+    sentence.append(traininggraph)
+
+    output_stream.write(prettify_xml_element(sentence))
+
+############################ Dot - PNG File ###################################################
+
+def run_visual_graph_creator(sentid, main_sentence, main_sent_dict, simple_sentences, boxer_graph, training_graph):
+    print "Creating boxer and training graphs for sentence id : "+sentid+" ..."
+
+    # Start creating boxer graph
+    foutput = open("/tmp/boxer-graph-"+sentid+".dot", "w")
+    boxer_dotstring = boxer_graph.convert_to_dotstring(sentid, main_sentence, main_sent_dict, simple_sentences)
+    foutput.write(boxer_dotstring)
+    foutput.close()
+    os.system("dot -Tpng /tmp/boxer-graph-"+sentid+".dot -o /tmp/boxer-graph-"+sentid+".png")
+
+
+    # Start creating training graph
+    foutput = open("/tmp/training-graph-"+sentid+".dot", "w")
+    train_dotstring = training_graph.convert_to_dotstring(main_sent_dict, boxer_graph)
+    foutput.write(train_dotstring)
+    foutput.close()
+    os.system("dot -Tpng /tmp/training-graph-"+sentid+".dot -o /tmp/training-graph-"+sentid+".png")
diff --git a/source/functions_prepare_elementtree_dot.pyc b/source/functions_prepare_elementtree_dot.pyc