Skip to content

Commit

Permalink
Merge pull request #124 from percyliang/geo880
Browse files Browse the repository at this point in the history
adding a geo880 module as a playground
  • Loading branch information
percyliang authored Dec 7, 2016
2 parents 53dacb5 + a7a4c6b commit 3ed286f
Show file tree
Hide file tree
Showing 11 changed files with 324 additions and 7 deletions.
9 changes: 9 additions & 0 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@
<jar destfile="${libsempre}/sempre-overnight.jar" basedir="${classes}/overnight"/>
</target>

<!-- Compile geo880 -->
<target name="geo880" depends="init,core,corenlp,tables">
<echo message="Compiling ${ant.project.name}: geo880"/>
<mkdir dir="${classes}/geo880"/>
<javac srcdir="${src}" destdir="${classes}/geo880" classpathref="lib.path" debug="true" includeantruntime="false" source="${source}" target="${target}">
<include name="edu/stanford/nlp/sempre/geo880/"/>
</javac>
<jar destfile="${libsempre}/sempre-geo880.jar" basedir="${classes}/geo880"/>
</target>

<!-- Clean up -->
<target name="clean">
Expand Down
9 changes: 9 additions & 0 deletions pull-dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,15 @@ addModule('esslli_2016', 'Data for ESSLLI 2016 semantic parsing class', lambda {
pull('/u/nlp/data/semparse/esslli_2016', 'data/esslli_2016/', {:symlink => true})
})

addModule('geo880', 'Data, lexicon, grammars and KB for geo880', lambda {
pull('/u/nlp/data/semparse/geo880/geo880-test.examples', 'data/geo880', {:symlink => true})
pull('/u/nlp/data/semparse/geo880/geo880-test.preprocessed.examples', 'data/geo880', {:symlink => true})
pull('/u/nlp/data/semparse/geo880/geo880-train.preprocessed.examples', 'data/geo880', {:symlink => true})
pull('/u/nlp/data/semparse/geo880/geo880.grammar', 'data/geo880', {:symlink => true})
pull('/u/nlp/data/semparse/geo880/geo880.lexicon', 'data/geo880', {:symlink => true})
pull('/u/nlp/data/semparse/geo880/geo880.kg', 'data/geo880', {:symlink => true})
pull('/u/nlp/data/semparse/geo880/geo880.type_hierarchy', 'data/geo880', {:symlink => true})
})
############################################################

if ARGV.size == 0
Expand Down
66 changes: 66 additions & 0 deletions run
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,72 @@ addMode('genovernight-wrapper', 'Generate utterances for overnight semantic pars
lambda { |e| system 'mkdir -p genovernight.out'; o('execDir', 'genovernight.out/' + e[:domain]) },
nil) })

addMode('geo880', 'Semantic parsing on the geo880 dataset', lambda { |e| l(
# Usual header
header('core,tables,corenlp,geo880'),
'edu.stanford.nlp.sempre.Main',
# Fig parameters
figOpts,
o('executor', 'tables.lambdadcs.LambdaDCSExecutor'),
o('JoinFn.specializedTypeCheck', false), o('JoinFn.typeInference', false),
# Parser
o('Builder.parser', 'BeamParser'),
o('Parser.coarsePrune'),

# Evaluation
o('Builder.valueEvaluator', 'geo880.Geo880ValueEvaluator'),

# Grammar
o('Grammar.inPaths','lib/data/geo880/geo880.grammar'),

# Type hierarchy
o('Geo880TypeLookup.typeHierarchyPath', 'lib/data/geo880/geo880.type_hierarchy'),
o('TypeInference.typeLookup','geo880.Geo880TypeLookup'),

# Yrkvpba
o('SimpleLexicon.inPaths', 'lib/data/geo880/geo880.lexicon'),

# Learner
o('Learner.maxTrainIters', 3),

# Dataset
letDefault(:data, 0),
sel(:data,
l(o('Dataset.inPaths', 'train,lib/data/geo880/geo880-train.preprocessed.examples'), unbalancedTrainDevSplit), # (0) train 0.8, dev 0.2
l(o('Dataset.inPaths', 'train,lib/data/geo880/geo880-train.examples', 'test,lib/data/geo880/geo880-test.preprocessed/examples')), # (1) Don't run on test yet!
nil),
# Load the graph
o('Dataset.globalGraphPath', 'lib/data/geo880/geo880.kg'),
# Verbosity
letDefault(:verbose, 0),
sel(:verbose,
l(),
l(
o('showRules'),
o('Parser.verbose', 2),
o('JoinFn.verbose', 3),
o('JoinFn.showTypeCheckFailures'),
nil),
nil),
# Language Analyzer
l(o('LanguageAnalyzer', 'corenlp.CoreNLPAnalyzer'), o('annotators', *'tokenize ssplit pos lemma ner'.split)),
# Regularization
letDefault(:l1, 0),
sel(:l1,
l(),
l(o('Params.l1Reg','lazy'), o('Params.l1RegCoeff', '3e-5')),
l(o('Params.l1Reg','lazy'), selo(nil, 'Params.l1RegCoeff', 0, 0.00001, 0.0001, 0.001, 0.01)),
nil),
# Features
letDefault(:feat, 'freebase'),
sel(:feat, {
'none' => l(), # No features (random)
'freebase' => l(
o('FeatureExtractor.featureDomains', 'rule opCount constant whType span lemmaAndBinaries denotation lexAlign joinPos skipPos'.split),
# o('FeatureExtractor.featureDomains', 'rule opCount constant whType lemmaAndBinaries denotation lexAlign joinPos skipPos'.split),
nil),
}),
nil) })

############################################################

Expand Down
7 changes: 5 additions & 2 deletions src/edu/stanford/nlp/sempre/ContextValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public ContextValue(String user, DateValue date, List<Exchange> exchanges) {
}

public ContextValue(KnowledgeGraph graph) {
this(null, null, null, graph);
this(null, null, new ArrayList(), graph);
}

// Example:
Expand Down Expand Up @@ -107,8 +107,11 @@ public LispTree toLispTree() {
tree.addChild(LispTree.proto.newList("user", user));
if (date != null)
tree.addChild(date.toLispTree());
// When logging examples, logging the entire graph takes too much screen space.
// I don't think that we ever deserialize a graph from a serialized context,
// so this should be fine.
if (graph != null)
tree.addChild(graph.toLispTree());
tree.addChild(graph.toShortLispTree());
for (Exchange e : exchanges)
tree.addChild(LispTree.proto.newList("exchange", e.toLispTree()));
return tree;
Expand Down
17 changes: 16 additions & 1 deletion src/edu/stanford/nlp/sempre/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ public static class Options {

@Option(gloss = "Only keep examples which have at most this number of tokens")
public int maxTokens = Integer.MAX_VALUE;

@Option(gloss = "Path to a knowledge graph that will be uploaded as global context")
public String globalGraphPath;
}

public static Options opts = new Options();
Expand Down Expand Up @@ -96,10 +99,22 @@ public void readFromPathPairs(List<Pair<String, String>> pathPairs) {
return;
}
}

readLispTreeFromPathPairs(pathPairs);
updateGlobalContext();
}

private void updateGlobalContext() {
if (opts.globalGraphPath != null) {
KnowledgeGraph graph = NaiveKnowledgeGraph.fromFile(opts.globalGraphPath);
for (String group : allExamples.keySet()) {
for (Example ex : allExamples.get(group)) {
ex.setContext(new ContextValue(graph));
}
}
}
}


private void readJsonFromPathPairs(List<Pair<String, String>> pathPairs) {
List<GroupInfo> groups = Lists.newArrayListWithCapacity(pathPairs.size());
for (Pair<String, String> pathPair : pathPairs) {
Expand Down
10 changes: 6 additions & 4 deletions src/edu/stanford/nlp/sempre/FeatureExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,12 @@ void conjoinLemmaAndBinary(Example ex, Derivation deriv) {
List<String> nonEntityLemmas = new LinkedList<>();
extractNonEntityLemmas(ex, deriv, nonEntityLemmas);
List<String> binaries = extractBinaries(deriv.formula);
String binariesStr = Joiner.on('_').join(binaries);
for (String nonEntityLemma : nonEntityLemmas) {
deriv.addFeature("lemmaAndBinaries", "nonEntitylemmas=" + nonEntityLemma +
",binaries=" + binariesStr);
if (!binaries.isEmpty()) {
String binariesStr = Joiner.on('_').join(binaries);
for (String nonEntityLemma : nonEntityLemmas) {
deriv.addFeature("lemmaAndBinaries", "nonEntitylemmas=" + nonEntityLemma +
",binaries=" + binariesStr);
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/edu/stanford/nlp/sempre/KnowledgeGraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ public static List<Pair<Value, Value>> getReversedPairs(Collection<Pair<Value, V
// ============================================================

public abstract LispTree toLispTree();
public abstract LispTree toShortLispTree();
@Override public String toString() { return toLispTree().toString(); }

/** Return all y such that x in firsts and (x,r,y) in graph */
Expand Down
12 changes: 12 additions & 0 deletions src/edu/stanford/nlp/sempre/NaiveKnowledgeGraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -198,4 +198,16 @@ public LispTree toLispTree() {
}
return tree;
}

@Override
public LispTree toShortLispTree() {
if (triples.size() > 1000) {
LispTree tree = LispTree.proto.newList();
tree.addChild("graph");
tree.addChild("NaiveKnowledgeGraph");
tree.addChild(("TooManyTriples"));
return tree;
}
return toLispTree();
}
}
110 changes: 110 additions & 0 deletions src/edu/stanford/nlp/sempre/geo880/Geo880TypeLookup.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
package edu.stanford.nlp.sempre.geo880;

import edu.stanford.nlp.sempre.SemType;
import edu.stanford.nlp.sempre.SemTypeHierarchy;
import edu.stanford.nlp.sempre.TypeLookup;
import fig.basic.IOUtils;
import fig.basic.Option;
import fig.basic.LogInfo;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;

/**
* Type lookup for the geo880 domain, Mostly for distinguishing locations and numbers.
* We also use a type hierarchy provided by a file to match |location.us_state| and |location.location| etc.
* Created by joberant on 05/12/2016.
*/
public class Geo880TypeLookup implements TypeLookup{
public static class Options {
@Option(gloss = "Verbosity") public int verbose = 0;
@Option(gloss = "A path to a file that specified the type hierarchy.")
public String typeHierarchyPath;

}
public static Options opts = new Options();
public static final String LOCATION = "fb:location.location";
public static final String CITY = "fb:location.citytown";
public static final String STATE = "fb:location.us_state";
public static final String RIVER = "fb:location.river";
public static final String LAKE = "fb:location.lake";
public static final String MOUNTAIN = "fb:location.mountain";
public static final String COUNTRY = "fb:location.country";

public Geo880TypeLookup() {
SemTypeHierarchy semTypeHierarchy = SemTypeHierarchy.singleton;
if (opts.typeHierarchyPath != null) {
try {
for (String line : IOUtils.readLines(opts.typeHierarchyPath)) {
String[] tokens = line.split("\\s+");

// Check the file only contains relations about supertypes.
assert tokens[1].endsWith("included_types");
semTypeHierarchy.addSupertype(tokens[0], tokens[0]);
semTypeHierarchy.addSupertype(tokens[2], tokens[2]);
semTypeHierarchy.addSupertype(tokens[0], tokens[2]);
}
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException("Could not read lines from: " + opts.typeHierarchyPath);
}
}
}

@Override
public SemType getEntityType(String entity) {
// Entites are of the form fb:state.florida.
int colonIndex = entity.indexOf(':');
int dotIndex = entity.indexOf('.');
String type = entity.substring(colonIndex+1, dotIndex);

if (type.equals("place")) {
type = LOCATION;
}
else if (type.equals("city")) {
type = CITY;
}
else if (type.equals("state")) {
type = STATE;
}
else if (type.equals("river")) {
type = RIVER;
}
else if (type.equals("lake")) {
type = LAKE;
}
else if (type.equals("mountain")) {
type = MOUNTAIN;
}
else if (type.equals("country")) {
type = COUNTRY;
}
else {
throw new RuntimeException("Illegal entity: " + entity);
}
SemType result = SemType.newUnionSemType(type);
if (opts.verbose >= 1) {
LogInfo.logs("Entity=%s, Type=%s", entity, result);
}
return result;
}

@Override
public SemType getPropertyType(String property) {
// Properties are of the form fb:location.location.population.
String arg1 = property.substring(0, property.lastIndexOf('.'));
String suffix = property.substring(property.lastIndexOf('.') + 1);
String arg2 = LOCATION;
if (suffix.equals("density") || suffix.equals("elevation") ||
suffix.equals("population") || suffix.equals("size") ||
suffix.equals("area") || suffix.equals("length")) {
arg2 = "fb:type.number";
}
SemType result = SemType.newFuncSemType(arg2, arg1);
if (opts.verbose >= 1) {
LogInfo.logs("Property=%s, Type=%s", property, result);
}
return result;
}
}
85 changes: 85 additions & 0 deletions src/edu/stanford/nlp/sempre/geo880/Geo880ValueEvaluator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package edu.stanford.nlp.sempre.geo880;

import edu.stanford.nlp.sempre.*;
import edu.stanford.nlp.sempre.tables.StringNormalizationUtils;
import fig.basic.LogInfo;

import java.util.List;

/**
* This is only used because the data does not mention when a city is in the usa, but
* the kg returns usa, and we want to use exact match, so we add this logic here.
* Created by joberant on 03/12/2016.
*/
public class Geo880ValueEvaluator implements ValueEvaluator {

public double getCompatibility(Value target, Value pred) {
List<Value> targetList = ((ListValue) target).values;
if (!(pred instanceof ListValue)) return 0;
List<Value> predList = ((ListValue) pred).values;

// In geo880, if we return that something is contained in a state, there is no need to return fb:country.usa
Value toDelete = null;
if (predList.size() > 1 && predList.get(0) instanceof NameValue) {
for (Value v: predList) {
String id = ((NameValue) v).id;
if (id.equals("fb:country.usa")) {
toDelete = v;
break;
}
}
}
if (toDelete != null) {
predList.remove(toDelete);
}

if (targetList.size() != predList.size()) return 0;

for (Value targetValue : targetList) {
boolean found = false;
for (Value predValue : predList) {
if (getItemCompatibility(targetValue, predValue)) {
found = true;
break;
}
}
if (!found) return 0;
}
return 1;
}

// ============================================================
// Item Compatibility
// ============================================================

// Compare one element of the list.
protected boolean getItemCompatibility(Value target, Value pred) {
if (pred instanceof ErrorValue) return false; // Never award points for error
if (pred == null) {
LogInfo.warning("Predicted value is null!");
return false;
}

if (target instanceof DescriptionValue) {
String targetText = ((DescriptionValue) target).value;
if (pred instanceof NameValue) {
// Just has to match the description
String predText = ((NameValue) pred).description;
if (predText == null) predText = "";
return targetText.equals(predText);
}
} else if (target instanceof NumberValue) {
NumberValue targetNumber = (NumberValue) target;
if (pred instanceof NumberValue) {
return compareNumberValues(targetNumber, (NumberValue) pred);
}
}

return target.equals(pred);
}

protected boolean compareNumberValues(NumberValue target, NumberValue pred) {
return Math.abs(target.value - pred.value) < 1e-6;
}

}
Loading

0 comments on commit 3ed286f

Please sign in to comment.