Skip to content

Commit

Permalink
Merge pull request #5349 from CartoDB/4730-ogr-xy-possible-names
Browse files Browse the repository at this point in the history
4730 ogr xy possible names
  • Loading branch information
Rafa de la Torre committed Sep 7, 2015
2 parents 7840a07 + 14ab4cf commit c30ee49
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 139 deletions.
38 changes: 0 additions & 38 deletions lib/importer/lib/cartodb-migrator/migrator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,44 +80,6 @@ def migrate!
end
end

# if there is no the_geom, and there are latitude and longitude columns, create the_geom
unless column_names.include? "the_geom"

latitude_possible_names = "'latitude','lat','latitudedecimal','latitud','lati'"
longitude_possible_names = "'longitude','lon','lng','longitudedecimal','longitud','long'"

matching_latitude = nil
res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
and lower(column_name) in (#{latitude_possible_names}) LIMIT 1"]
unless res.first.nil?
matching_latitude= res.first[:column_name]
end
matching_longitude = nil
res = @db_connection["select column_name from information_schema.columns where table_name ='#{@suggested_name}'
and lower(column_name) in (#{longitude_possible_names}) LIMIT 1"]
unless res.first.nil?
matching_longitude= res.first[:column_name]
end

if matching_latitude and matching_longitude
#we know there is a latitude/longitude columns
@db_connection.run("SELECT public.AddGeometryColumn('#{@suggested_name}','the_geom',4326, 'POINT', 2);")

@db_connection.run(<<-GEOREF
UPDATE \"#{@suggested_name}\"
SET the_geom =
ST_GeomFromText(
'POINT(' || trim(CAST(\"#{matching_longitude}\" AS text)) || ' ' || trim(CAST(\"#{matching_latitude}\" AS text)) || ')', 4326
)
WHERE
trim(CAST(\"#{matching_longitude}\" AS text)) ~ '^(([-+]?(([0-9]|[1-9][0-9]|1[0-7][0-9])(\.[0-9]+)?))|[-+]?180)$'
AND
trim(CAST(\"#{matching_latitude}\" AS text)) ~ '^(([-+]?(([0-9]|[1-8][0-9])(\.[0-9]+)?))|[-+]?90)$'
GEOREF
)
end
end

@table_created = true
rows_imported = @db_connection["SELECT count(*) as count from #{@suggested_name}"].first[:count]

Expand Down
3 changes: 0 additions & 3 deletions services/importer/lib/importer/geometry_fixer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def enable_autovacuum
})
end

def populate_the_geom_from_latlon(qualified_table_name, latitude_column_name, longitude_column_name)
CartoDB::InternalGeocoder::LatitudeLongitude.new(db, job).geocode(schema, table_name, latitude_column_name, longitude_column_name)
end

private

Expand Down
40 changes: 0 additions & 40 deletions services/importer/lib/importer/georeferencer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,13 @@
require_relative './job'
require_relative './query_batcher'
require_relative './content_guesser'
require_relative '../../../table-geocoder/lib/internal-geocoder/latitude_longitude'

require_relative '../../../../lib/cartodb/stats/importer'

module CartoDB
module Importer2
class Georeferencer
DEFAULT_BATCH_SIZE = 50000
LATITUDE_POSSIBLE_NAMES = %w{ latitude lat latitudedecimal
latitud lati decimallatitude decimallat point_latitude }
LONGITUDE_POSSIBLE_NAMES = %w{ longitude lon lng
longitudedecimal longitud long decimallongitude decimallong point_longitude }
GEOMETRY_POSSIBLE_NAMES = %w{ geometry the_geom wkb_geometry geom geojson wkt }
DEFAULT_SCHEMA = 'cdb_importer'
THE_GEOM_WEBMERCATOR = 'the_geom_webmercator'
Expand Down Expand Up @@ -46,7 +41,6 @@ def run
drop_the_geom_webmercator

the_geom_column_name = create_the_geom_from_geometry_column ||
create_the_geom_from_latlon ||
create_the_geom_from_ip_guessing ||
create_the_geom_from_country_guessing ||
create_the_geom_from_namedplaces_guessing ||
Expand All @@ -72,20 +66,6 @@ def enable_autovacuum
})
end

def create_the_geom_from_latlon
latitude_column_name = latitude_column_name_in
longitude_column_name = longitude_column_name_in

return false unless latitude_column_name && longitude_column_name

job.log 'Creating the_geom from latitude / longitude'
create_the_geom_in(table_name)
populate_the_geom_from_latlon(
qualified_table_name, latitude_column_name, longitude_column_name
)
'the_geom'
end

def create_the_geom_from_geometry_column
column = nil
geometry_column_name = geometry_column_in
Expand Down Expand Up @@ -281,12 +261,6 @@ def user_id
@job.logger.user_id
end

# Note: Performs a really simple ',' to '.' normalization.
# TODO: Candidate for moving to a CDB_xxx function that gets the_geom from lat/long if valid or "convertible"
def populate_the_geom_from_latlon(qualified_table_name, latitude_column_name, longitude_column_name)
CartoDB::InternalGeocoder::LatitudeLongitude.new(db, job).geocode(schema, table_name, latitude_column_name, longitude_column_name)
end

def create_the_geom_in(table_name)
job.log 'Creating the_geom column'
return false if column_exists_in?(table_name, 'the_geom')
Expand All @@ -307,20 +281,6 @@ def columns_in(table_name)
db.schema(table_name, reload: true, schema: schema).map(&:first)
end

def latitude_column_name_in
names = LATITUDE_POSSIBLE_NAMES.map { |name| "'#{name}'" }.join(',')
name = find_column_in(table_name, names)
job.log "Identified #{name} as latitude column" if name
name
end

def longitude_column_name_in
names = LONGITUDE_POSSIBLE_NAMES.map { |name| "'#{name}'" }.join(',')
name = find_column_in(table_name, names)
job.log "Identified #{name} as longitude column" if name
name
end

def geometry_column_in
names = geometry_columns.map { |name| "'#{name}'" }.join(',')
find_column_in(table_name, names)
Expand Down
14 changes: 13 additions & 1 deletion services/importer/lib/importer/ogr2ogr.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ class Ogr2ogr

DEFAULT_BINARY = 'which ogr2ogr2'

LATITUDE_POSSIBLE_NAMES = %w{ latitude lat latitudedecimal
latitud lati decimallatitude decimallat point_latitude }
LONGITUDE_POSSIBLE_NAMES = %w{ longitude lon lng
longitudedecimal longitud long decimallongitude decimallong point_longitude }


def initialize(table_name, filepath, pg_options, layer=nil, options={})
self.filepath = filepath
self.pg_options = pg_options
Expand All @@ -40,14 +46,16 @@ def set_default_properties

def command_for_import
"#{OSM_INDEXING_OPTION} #{PG_COPY_OPTION} #{client_encoding_option} #{shape_encoding_option} " +
"#{executable_path} #{OUTPUT_FORMAT_OPTION} #{overwrite_option} #{guessing_option} #{postgres_options} #{projection_option} " +
"#{executable_path} #{OUTPUT_FORMAT_OPTION} #{overwrite_option} #{guessing_option} #{x_y_possible_names_option} " +
"#{postgres_options} #{projection_option} " +
"#{layer_creation_options} #{filepath} #{layer} #{layer_name_option} #{NEW_LAYER_TYPE_OPTION}" +
" #{shape_coordinate_option} "
end

def command_for_append
"#{OSM_INDEXING_OPTION} #{PG_COPY_OPTION} #{client_encoding_option} " +
"#{executable_path} #{APPEND_MODE_OPTION} #{OUTPUT_FORMAT_OPTION} #{postgres_options} " +
"#{x_y_possible_names_option} " +
"#{projection_option} #{filepath} #{layer} #{layer_name_option} #{NEW_LAYER_TYPE_OPTION}"
end

Expand Down Expand Up @@ -146,6 +154,10 @@ def guessing_option
end
end

def x_y_possible_names_option
"-oo X_POSSIBLE_NAMES=#{LONGITUDE_POSSIBLE_NAMES.join(',')} -oo Y_POSSIBLE_NAMES=#{LATITUDE_POSSIBLE_NAMES.join(',')}"
end

def overwrite_option
overwrite ? "-overwrite" : ''
end
Expand Down
59 changes: 2 additions & 57 deletions services/importer/spec/unit/georeferencer_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# encoding: utf-8
require_relative '../../lib/importer/georeferencer.rb'
require_relative '../../lib/importer/georeferencer'
require_relative '../factories/pg_connection'
require_relative '../../../../services/importer/spec/doubles/log'
require_relative '../../../../spec/rspec_configuration'

include CartoDB

Expand Down Expand Up @@ -48,18 +49,6 @@
end #initialize

describe '#run' do
it 'generates the_geom from lat / lon columns' do
dataset = @db[@table_name.to_sym]

10.times { dataset.insert(random_record) }

georeferencer = georeferencer_instance
georeferencer.run

dataset.first.fetch(:the_geom).should_not be_nil
dataset.first.fetch(:the_geom).should_not be_empty
end

it 'returns self if no lat / lon columns in the table' do
table_name = create_table(@db,
latitude_column: 'bogus_1',
Expand All @@ -81,34 +70,6 @@
end
end #run

describe '#populate_the_geom_from_latlon' do
it 'populates the_geom from lat / lon values' do
lat = Importer2::Georeferencer::LATITUDE_POSSIBLE_NAMES.sample
lon = Importer2::Georeferencer::LONGITUDE_POSSIBLE_NAMES.sample

table_name = create_table(
@db,
latitude_column: lat,
longitude_column: lon
)

georeferencer = georeferencer_instance(@db, table_name)
dataset = @db[table_name.to_sym]

georeferencer.create_the_geom_in(table_name)
dataset.insert(
:name => 'bogus',
:description => 'bogus',
:"#{lat}" => rand(90),
:"#{lon}" => rand(180)
)

dataset.first.fetch(:the_geom).should be_nil
georeferencer.populate_the_geom_from_latlon(table_name, lat, lon)
dataset.first.fetch(:the_geom).should_not be_nil
end
end

describe '#create_the_geom_in' do
before do
@table_name = create_table(@db)
Expand Down Expand Up @@ -151,22 +112,6 @@
end
end #columns_in

describe '#latitude_column_name_in' do
it 'returns the name of a latitude column within a set of candidates, if
existing' do
georeferencer = georeferencer_instance
georeferencer.latitude_column_name_in.should eq 'lat'
end
end

describe '#longitude_column_name_in' do
it 'returns the name of a longitude column within a set of candidates, if
existing' do
georeferencer = georeferencer_instance
georeferencer.longitude_column_name_in.should eq 'lon'
end
end

describe '#find_column_in' do
it 'returns the name of a column in a set of possible names if one of them
actually exists in the table' do
Expand Down
9 changes: 9 additions & 0 deletions spec/requests/api/imports_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@
import_table.should have_required_indexes_and_triggers
end

it 'detects lat/long columns and produces a the_geom column from them' do
post api_v1_imports_create_url,
params.merge(:filename => upload_file('spec/support/data/csv_with_lat_lon.csv', 'application/octet-stream'))
@table_from_import = UserTable.all.last.service

@table_from_import.geometry_types.should == ["ST_Point"]
@table_from_import.record(1)[:the_geom].should == '{"type":"Point","coordinates":[16.5607329,48.1199611]}'
end

it 'duplicates a table without geometries' do
post api_v1_imports_create_url,
params.merge(:filename => upload_file('spec/support/data/csv_with_number_columns.csv', 'application/octet-stream'))
Expand Down

0 comments on commit c30ee49

Please sign in to comment.