forked from petewarden/geodict
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit fee57b3
Showing
13 changed files
with
2,701,530 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
# Geodict | ||
# Copyright (C) 2010 Pete Warden <pete@petewarden.com> | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
import sys, string | ||
|
||
def print_usage_and_exit(cliargs): | ||
print "Usage:" | ||
|
||
for long, arginfo in cliargs.items(): | ||
short = arginfo['short'] | ||
type = arginfo['type'] | ||
required = (type=='required') | ||
optional = (type=='optional') | ||
description = arginfo['description'] | ||
|
||
output = '-'+short+'/--'+long+' ' | ||
|
||
if optional or required: | ||
output += '<value> ' | ||
|
||
output += ': '+description | ||
|
||
if required: | ||
output += ' (required)' | ||
|
||
print output | ||
|
||
exit() | ||
|
||
def get_options(cliargs): | ||
|
||
options = {'unnamed': [] } | ||
skip_next = False | ||
for index in range(1, len(sys.argv)): | ||
if skip_next: | ||
skip_next = False | ||
continue | ||
|
||
currentarg = sys.argv[index].lower() | ||
argparts = currentarg.split('=') | ||
namepart = argparts[0] | ||
|
||
if namepart.startswith('--'): | ||
longname = namepart[2:] | ||
elif namepart.startswith('-'): | ||
shortname = namepart[1:] | ||
longname = shortname | ||
for name, info in cliargs.items(): | ||
if shortname==info['short']: | ||
longname = name | ||
break | ||
else: | ||
longname = 'unnamed' | ||
|
||
if longname=='unnamed': | ||
options['unnamed'].append(namepart) | ||
else: | ||
if longname not in cliargs: | ||
print "Unknown argument '"+longname+"'" | ||
print_usage_and_exit(cliargs) | ||
|
||
arginfo = cliargs[longname] | ||
argtype = arginfo['type'] | ||
if argtype=='switch': | ||
value = True | ||
elif len(argparts) > 1: | ||
value = argparts[1] | ||
elif (index+1) < len(sys.argv): | ||
value = sys.argv[index+1] | ||
skip_next = True | ||
else: | ||
print "Missing value after '"+longname+"'" | ||
print_usage_and_exit(cliargs) | ||
|
||
options[longname] = value | ||
|
||
for longname, arginfo in cliargs.items(): | ||
type = arginfo['type'] | ||
|
||
if longname not in options: | ||
if type == 'required': | ||
print "Missing required value for '"+longname+"'" | ||
print_usage_and_exit(cliargs) | ||
elif type == 'optional': | ||
if not 'default' in arginfo: | ||
die('Missing default value for '+longname) | ||
options[longname] = arginfo['default'] | ||
elif type == 'switch': | ||
options[longname] = False | ||
else: | ||
die('Unknown type "'+type+'" for '+longname) | ||
|
||
return options |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#!/usr/bin/env python | ||
|
||
# Geodict | ||
# Copyright (C) 2010 Pete Warden <pete@petewarden.com> | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
import csv, os, os.path, MySQLdb, sys, json, csv | ||
import geodict_lib, cliargs | ||
|
||
args = { | ||
'input': { | ||
'short': 'i', | ||
'type': 'optional', | ||
'description': 'The name of the input file to scan for locations. If none is set, will read from STDIN', | ||
'default': '-' | ||
}, | ||
'output': { | ||
'short': 'o', | ||
'type': 'optional', | ||
'description': 'The name of the file to write the location data to. If none is set, will write to STDOUT', | ||
'default': '-' | ||
}, | ||
'format': { | ||
'short': 'f', | ||
'type': 'optional', | ||
'description': 'The format to use to output information about any locations found. By default it will write out location names separated by newlines, but specifying "json" will give more detailed information', | ||
'default': 'text' | ||
} | ||
}; | ||
|
||
options = cliargs.get_options(args) | ||
|
||
input = options['input'] | ||
output = options['output'] | ||
format = options['format'] | ||
|
||
if input is '-': | ||
input_handle = sys.stdin | ||
else: | ||
try: | ||
input_handle = open(input, 'rb') | ||
except: | ||
die("Couldn't open file '"+input+"'") | ||
|
||
if output is '-': | ||
output_handle = sys.stdout | ||
else: | ||
try: | ||
output_handle = open(output, 'wb') | ||
except: | ||
die("Couldn't write to file '"+output+"'") | ||
|
||
text = input_handle.read() | ||
|
||
locations = geodict_lib.find_locations_in_text(text) | ||
|
||
output_string = '' | ||
if format.lower() == 'json': | ||
output_string = json.dumps(locations) | ||
output_handle.write(output_string) | ||
elif format.lower() == 'text': | ||
for location in locations: | ||
found_tokens = location['found_tokens'] | ||
start_index = found_tokens[0]['start_index'] | ||
end_index = found_tokens[len(found_tokens)-1]['end_index'] | ||
output_string += text[start_index:(end_index+1)] | ||
output_string += "\n" | ||
output_handle.write(output_string) | ||
elif format.lower() == 'csv': | ||
writer = csv.writer(output_handle) | ||
writer.writerow(['location', 'type', 'lat', 'lon']) | ||
for location in locations: | ||
found_tokens = location['found_tokens'] | ||
start_index = found_tokens[0]['start_index'] | ||
end_index = found_tokens[len(found_tokens)-1]['end_index'] | ||
name = text[start_index:(end_index+1)] | ||
type = found_tokens[0]['type'].lower() | ||
lat = found_tokens[0]['lat'] | ||
lon = found_tokens[0]['lon'] | ||
writer.writerow([name, type, lat, lon]) | ||
else: | ||
print "Unknown output format '"+format+"'" | ||
exit() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# Geodict | ||
# Copyright (C) 2010 Pete Warden <pete@petewarden.com> | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
# The location of the source data to be loaded into your database | ||
source_folder = './source_data/' | ||
|
||
# Your MySQL user credentials | ||
user = 'root' | ||
password = '' | ||
|
||
# The address and port number of your database server | ||
host = 'localhost' | ||
port = 0 | ||
|
||
# The name of the database to create | ||
database = 'geodict' | ||
|
||
# The maximum number of words in any name | ||
word_max = 3 | ||
|
||
# Words that provide evidence that what follows them is a location | ||
location_words = { | ||
'at': True, | ||
'in': True | ||
} |
Oops, something went wrong.