-
Notifications
You must be signed in to change notification settings - Fork 299
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #124 from percyliang/geo880
adding a geo880 module as a playground
- Loading branch information
Showing
11 changed files
with
324 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
110 changes: 110 additions & 0 deletions
110
src/edu/stanford/nlp/sempre/geo880/Geo880TypeLookup.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
package edu.stanford.nlp.sempre.geo880; | ||
|
||
import edu.stanford.nlp.sempre.SemType; | ||
import edu.stanford.nlp.sempre.SemTypeHierarchy; | ||
import edu.stanford.nlp.sempre.TypeLookup; | ||
import fig.basic.IOUtils; | ||
import fig.basic.Option; | ||
import fig.basic.LogInfo; | ||
|
||
import java.io.IOException; | ||
import java.util.HashSet; | ||
import java.util.Set; | ||
|
||
/** | ||
* Type lookup for the geo880 domain, Mostly for distinguishing locations and numbers. | ||
* We also use a type hierarchy provided by a file to match |location.us_state| and |location.location| etc. | ||
* Created by joberant on 05/12/2016. | ||
*/ | ||
public class Geo880TypeLookup implements TypeLookup{ | ||
public static class Options { | ||
@Option(gloss = "Verbosity") public int verbose = 0; | ||
@Option(gloss = "A path to a file that specified the type hierarchy.") | ||
public String typeHierarchyPath; | ||
|
||
} | ||
public static Options opts = new Options(); | ||
public static final String LOCATION = "fb:location.location"; | ||
public static final String CITY = "fb:location.citytown"; | ||
public static final String STATE = "fb:location.us_state"; | ||
public static final String RIVER = "fb:location.river"; | ||
public static final String LAKE = "fb:location.lake"; | ||
public static final String MOUNTAIN = "fb:location.mountain"; | ||
public static final String COUNTRY = "fb:location.country"; | ||
|
||
public Geo880TypeLookup() { | ||
SemTypeHierarchy semTypeHierarchy = SemTypeHierarchy.singleton; | ||
if (opts.typeHierarchyPath != null) { | ||
try { | ||
for (String line : IOUtils.readLines(opts.typeHierarchyPath)) { | ||
String[] tokens = line.split("\\s+"); | ||
|
||
// Check the file only contains relations about supertypes. | ||
assert tokens[1].endsWith("included_types"); | ||
semTypeHierarchy.addSupertype(tokens[0], tokens[0]); | ||
semTypeHierarchy.addSupertype(tokens[2], tokens[2]); | ||
semTypeHierarchy.addSupertype(tokens[0], tokens[2]); | ||
} | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
throw new RuntimeException("Could not read lines from: " + opts.typeHierarchyPath); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public SemType getEntityType(String entity) { | ||
// Entites are of the form fb:state.florida. | ||
int colonIndex = entity.indexOf(':'); | ||
int dotIndex = entity.indexOf('.'); | ||
String type = entity.substring(colonIndex+1, dotIndex); | ||
|
||
if (type.equals("place")) { | ||
type = LOCATION; | ||
} | ||
else if (type.equals("city")) { | ||
type = CITY; | ||
} | ||
else if (type.equals("state")) { | ||
type = STATE; | ||
} | ||
else if (type.equals("river")) { | ||
type = RIVER; | ||
} | ||
else if (type.equals("lake")) { | ||
type = LAKE; | ||
} | ||
else if (type.equals("mountain")) { | ||
type = MOUNTAIN; | ||
} | ||
else if (type.equals("country")) { | ||
type = COUNTRY; | ||
} | ||
else { | ||
throw new RuntimeException("Illegal entity: " + entity); | ||
} | ||
SemType result = SemType.newUnionSemType(type); | ||
if (opts.verbose >= 1) { | ||
LogInfo.logs("Entity=%s, Type=%s", entity, result); | ||
} | ||
return result; | ||
} | ||
|
||
@Override | ||
public SemType getPropertyType(String property) { | ||
// Properties are of the form fb:location.location.population. | ||
String arg1 = property.substring(0, property.lastIndexOf('.')); | ||
String suffix = property.substring(property.lastIndexOf('.') + 1); | ||
String arg2 = LOCATION; | ||
if (suffix.equals("density") || suffix.equals("elevation") || | ||
suffix.equals("population") || suffix.equals("size") || | ||
suffix.equals("area") || suffix.equals("length")) { | ||
arg2 = "fb:type.number"; | ||
} | ||
SemType result = SemType.newFuncSemType(arg2, arg1); | ||
if (opts.verbose >= 1) { | ||
LogInfo.logs("Property=%s, Type=%s", property, result); | ||
} | ||
return result; | ||
} | ||
} |
85 changes: 85 additions & 0 deletions
85
src/edu/stanford/nlp/sempre/geo880/Geo880ValueEvaluator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package edu.stanford.nlp.sempre.geo880; | ||
|
||
import edu.stanford.nlp.sempre.*; | ||
import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; | ||
import fig.basic.LogInfo; | ||
|
||
import java.util.List; | ||
|
||
/** | ||
* This is only used because the data does not mention when a city is in the usa, but | ||
* the kg returns usa, and we want to use exact match, so we add this logic here. | ||
* Created by joberant on 03/12/2016. | ||
*/ | ||
public class Geo880ValueEvaluator implements ValueEvaluator { | ||
|
||
public double getCompatibility(Value target, Value pred) { | ||
List<Value> targetList = ((ListValue) target).values; | ||
if (!(pred instanceof ListValue)) return 0; | ||
List<Value> predList = ((ListValue) pred).values; | ||
|
||
// In geo880, if we return that something is contained in a state, there is no need to return fb:country.usa | ||
Value toDelete = null; | ||
if (predList.size() > 1 && predList.get(0) instanceof NameValue) { | ||
for (Value v: predList) { | ||
String id = ((NameValue) v).id; | ||
if (id.equals("fb:country.usa")) { | ||
toDelete = v; | ||
break; | ||
} | ||
} | ||
} | ||
if (toDelete != null) { | ||
predList.remove(toDelete); | ||
} | ||
|
||
if (targetList.size() != predList.size()) return 0; | ||
|
||
for (Value targetValue : targetList) { | ||
boolean found = false; | ||
for (Value predValue : predList) { | ||
if (getItemCompatibility(targetValue, predValue)) { | ||
found = true; | ||
break; | ||
} | ||
} | ||
if (!found) return 0; | ||
} | ||
return 1; | ||
} | ||
|
||
// ============================================================ | ||
// Item Compatibility | ||
// ============================================================ | ||
|
||
// Compare one element of the list. | ||
protected boolean getItemCompatibility(Value target, Value pred) { | ||
if (pred instanceof ErrorValue) return false; // Never award points for error | ||
if (pred == null) { | ||
LogInfo.warning("Predicted value is null!"); | ||
return false; | ||
} | ||
|
||
if (target instanceof DescriptionValue) { | ||
String targetText = ((DescriptionValue) target).value; | ||
if (pred instanceof NameValue) { | ||
// Just has to match the description | ||
String predText = ((NameValue) pred).description; | ||
if (predText == null) predText = ""; | ||
return targetText.equals(predText); | ||
} | ||
} else if (target instanceof NumberValue) { | ||
NumberValue targetNumber = (NumberValue) target; | ||
if (pred instanceof NumberValue) { | ||
return compareNumberValues(targetNumber, (NumberValue) pred); | ||
} | ||
} | ||
|
||
return target.equals(pred); | ||
} | ||
|
||
protected boolean compareNumberValues(NumberValue target, NumberValue pred) { | ||
return Math.abs(target.value - pred.value) < 1e-6; | ||
} | ||
|
||
} |
Oops, something went wrong.