diff --git a/NEWS.md b/NEWS.md index 9121ba2d6500..a66abe442203 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,7 @@ sudo make install - None yet ### Bug fixes / enhancements +- New versioned sanitization of column names ([#15326](https://github.com/CartoDB/cartodb/issues/15326)) - Change Catalog dropdown placeholders (([#15335](https://github.com/CartoDB/cartodb/issues/15335))) - Fix /embed_map for kuviz ([#15360](https://github.com/CartoDB/cartodb/pull/15360)) - Avoid extra calls when counting number of likes of each visualization ([#15349](https://github.com/CartoDB/cartodb/pull/15349)) diff --git a/app/models/data_import.rb b/app/models/data_import.rb index 88b46b19fea4..cec818541a3a 100644 --- a/app/models/data_import.rb +++ b/app/models/data_import.rb @@ -24,6 +24,7 @@ require_relative '../../services/importer/lib/importer/overviews' require_relative '../../services/importer/lib/importer/connector_runner' require_relative '../../services/importer/lib/importer/exceptions' +require_relative '../../services/importer/lib/importer/column' require_dependency 'carto/tracking/events' require_dependency 'carto/valid_table_name_proposer' @@ -118,9 +119,10 @@ def after_initialize # New ones are already tracked during the data_import create inside the controller # For the old dashboard def before_create - if self.from_common_data? - self.extra_options = self.extra_options.merge({:common_data => true}) + if from_common_data? + self.extra_options = extra_options.merge(common_data: true) end + self.extra_options = extra_options.merge(column_sanitization_version: CartoDB::Importer2::Column::CURRENT_COLUMN_SANITIZATION_VERSION) end def before_save @@ -426,6 +428,10 @@ def final_state? [STATE_COMPLETE, STATE_FAILURE, STATE_STUCK].include?(state) end + def column_sanitization_version + extra_options[:column_sanitization_version] || CartoDB::Importer2::Column::INITIAL_COLUMN_SANITIZATION_VERSION + end + private def dispatch @@ -574,12 +580,17 @@ def overwrite_strategy? collision_strategy == Carto::DataImportConstants::COLLISION_STRATEGY_OVERWRITE end + def sanitize_columns(table_name) - Table.sanitize_columns(table_name, { + # TODO: is this called before table is registered? otherwise we should use Table#sanitize_columns + Table.sanitize_columns( + table_name, + column_sanitization_version, + { connection: current_user.in_database, - database_schema: current_user.database_schema, - reserved_words: CartoDB::Importer2::Column::RESERVED_WORDS - }) + database_schema: current_user.database_schema + } + ) end def migrate_existing(imported_name) diff --git a/app/models/synchronization/adapter.rb b/app/models/synchronization/adapter.rb index b099965078da..8bb920f25f72 100644 --- a/app/models/synchronization/adapter.rb +++ b/app/models/synchronization/adapter.rb @@ -276,7 +276,15 @@ def import_cleanup(schema_name, table_name) user.db_service.in_database_direct_connection(statement_timeout: STATEMENT_TIMEOUT) do |user_database| # For consistency with regular imports, also eases testing - Table.sanitize_columns(table_name, {database_schema: schema_name, connection: user_database}) + # The sanitization of @table_name is applied to the newly imported table_name + # This should not be necessary, since setup_table, called after cartodbfication + # also perfomes column sanitization via Table#import_to_cartodb + # but is kept because there may be existing syncs for which this double sanitization + # (version 1 sanitization which wasn't idemponent) had the effect of altering some + # satinizated names (e.g. __1 -> _1). + table = ::Table.new(name: @table_name, user_id: @user.id) + # we could as well: table = Carto::UserTable.find(@user.tables.where(name: @table_name).first.id).service + table.sanitize_columns(table_name: table_name, database_schema: schema_name, connection: user_database) # When tables are created using ogr2ogr they are added a ogc_fid or gid primary key # In that case: diff --git a/app/models/table.rb b/app/models/table.rb index 16e3f30f9b67..13a54558d64c 100644 --- a/app/models/table.rb +++ b/app/models/table.rb @@ -11,6 +11,7 @@ require_relative './visualization/table_blender' require_relative '../../services/importer/lib/importer/query_batcher' require_relative '../../services/importer/lib/importer/cartodbfy_time' +require_relative '../../services/importer/lib/importer/column' require_relative '../../services/datasources/lib/datasources/decorators/factory' require_relative '../../services/table-geocoder/lib/internal-geocoder/latitude_longitude' require_relative '../model_factories/layer_factory' @@ -37,13 +38,6 @@ class Table GEOMETRY_TYPES_PRESENT_CACHING_TIMEOUT = 24.hours STATEMENT_TIMEOUT = 1.hour * 1000 - # See http://www.postgresql.org/docs/9.5/static/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS - PG_IDENTIFIER_MAX_LENGTH = 63 - - # @see services/importer/lib/importer/column.rb -> RESERVED_WORDS - # @see config/initializers/carto_db.rb -> RESERVED_COLUMN_NAMES - RESERVED_COLUMN_NAMES = %w(oid tableoid xmin cmin xmax cmax ctid ogc_fid).freeze - DEFAULT_THE_GEOM_TYPE = 'geometry' VALID_GEOMETRY_TYPES = %W{ geometry multipolygon point multilinestring } @@ -59,6 +53,7 @@ def initialize(args = {}) else @user_table = args[:user_table] end + self.user_id = args[:user_id] if args[:user_id].present? # TODO: this probably makes sense only if user_table is not passed as argument @user_table.set_service(self) end @@ -214,20 +209,23 @@ def self.table_and_schema(table_name) end end + def data_import + @data_import ||= DataImport.where(id: @user_table.data_import_id).first || DataImport.new(user_id: owner.id) + end + ## Callbacks def import_to_cartodb(uniname = nil) - @data_import ||= DataImport.where(id: @user_table.data_import_id).first || DataImport.new(user_id: owner.id) if migrate_existing_table.present? || uniname - @data_import.data_type = DataImport::TYPE_EXTERNAL_TABLE if @data_import.data_type.nil? - @data_import.data_source = migrate_existing_table || uniname - @data_import.save + data_import.data_type = DataImport::TYPE_EXTERNAL_TABLE if data_import.data_type.nil? + data_import.data_source = migrate_existing_table || uniname + data_import.save # ensure unique name, also ensures self.name can override any imported table name uniname = get_valid_name(name ? name : migrate_existing_table) unless uniname # Make sure column names are sanitized. Make it consistently. - self.class.sanitize_columns(uniname, {database_schema: owner.database_schema, connection: owner.in_database}) + sanitize_columns(table_name: uniname, database_schema: owner.database_schema, connection: owner.in_database) # with table #{uniname} table created now run migrator to CartoDBify hash_in = ::SequelRails.configuration.environment_for(Rails.env).merge( @@ -242,12 +240,12 @@ def import_to_cartodb(uniname = nil) :debug => Rails.env.development?, :remaining_quota => owner.remaining_quota, :remaining_tables => owner.remaining_table_quota, - :data_import_id => @data_import.id + :data_import_id => data_import.id ).symbolize_keys importer = CartoDB::Migrator.new(hash_in) imported_name = importer.migrate! - @data_import.reload - @data_import.save + data_import.reload + data_import.save imported_name end end @@ -676,10 +674,11 @@ def make_sequel_compatible(attributes) end def add_column!(options) - raise CartoDB::InvalidColumnName if RESERVED_COLUMN_NAMES.include?(options[:name]) || options[:name] =~ /^[0-9]/ + raise CartoDB::InvalidColumnName if CartoDB::Importer2::Column.rejected?(options[:name]) type = options[:type].convert_to_db_type cartodb_type = options[:type].convert_to_cartodb_type - column_name = options[:name].to_s.sanitize_column_name + # FIXME: consider CartoDB::Importer2::Column.get_valid_column_name with CURRENT_COLUMN_SANITIZATION_VERSION + column_name = CartoDB::Importer2::Column.sanitize_name(options[:name].to_s) owner.in_database.add_column name, column_name, type update_cdb_tablemetadata @@ -700,16 +699,19 @@ def drop_column!(options) end def modify_column!(options) - old_name = options.fetch(:name, '').to_s.sanitize - new_name = options.fetch(:new_name, '').to_s.sanitize - raise 'This column cannot be modified' if CARTODB_COLUMNS.include?(old_name.to_s) + old_name = options.fetch(:name, '').to_s + new_name = options.fetch(:new_name, '').to_s + + # FIXME: consider CartoDB::Importer2::Column.get_valid_column_name with CURRENT_COLUMN_SANITIZATION_VERSION + old_sanitized_name = CartoDB::Importer2::Column.sanitize_name(old_name) + raise 'This column cannot be modified' if CARTODB_COLUMNS.include?(old_sanitized_name.to_s) if new_name.present? && new_name != old_name - new_name = new_name.sanitize_column_name - rename_column(old_name, new_name) + new_sanitized_name = CartoDB::Importer2::Column.sanitize_name(new_name) + rename_column(old_sanitized_name, new_sanitized_name) end - column_name = (new_name.present? ? new_name : old_name) + column_name = (new_name.present? ? new_sanitized_name : old_sanitized_name) convert_column_datatype(owner.in_database, name, column_name, options[:type]) column_type = column_type_for(column_name) @@ -731,7 +733,7 @@ def rename_column(old_name, new_name='') raise 'Please provide a column name' if new_name.empty? raise 'This column cannot be renamed' if CARTODB_COLUMNS.include?(old_name.to_s) - if new_name =~ /^[0-9]/ || RESERVED_COLUMN_NAMES.include?(new_name) || CARTODB_COLUMNS.include?(new_name) + if CartoDB::Importer2::Column.reserved_or_unsupported?(new_name) || CARTODB_COLUMNS.include?(new_name) raise CartoDB::InvalidColumnName, 'That column name is reserved, please choose a different one' end @@ -1221,6 +1223,15 @@ def update_bounding_box SequelRails.connection.run(update_sql) end + # Apply the sanitization defined for this table. + # If the table_name parameter is passed, the sanitization which was originally applied to self + # is applied to the table so named. + # If no table_name parameters is paseed the normalization is applied to self. + # This allows re-applying the same normalization to imported tables. + def sanitize_columns(table_name: name, **options) + self.class.sanitize_columns(table_name, column_sanitization_version, options) + end + private def valid_cartodb_id_candidate?(col_name) @@ -1303,14 +1314,19 @@ def get_valid_name(contendent) Carto::ValidTableNameProposer.new.propose_valid_table_name(contendent, taken_names: user_table_names) end - def self.sanitize_columns(table_name, options={}) + def column_sanitization_version + data_import&.column_sanitization_version || CartoDB::Importer2::Column::NO_COLUMN_SANITIZATION_VERSION + end + + def self.sanitize_columns(table_name, column_sanitization_version, options={}) + connection = options.fetch(:connection) database_schema = options.fetch(:database_schema, 'public') connection.schema(table_name, schema: database_schema, reload: true).each do |column| column_name = column[0].to_s column_type = column[1][:db_type] - column_name = ensure_column_has_valid_name(table_name, column_name, options) + column_name = ensure_column_has_valid_name(table_name, column_name, column_sanitization_version, options) if column_type == 'unknown' CartoDB::ColumnTypecaster.new( user_database: connection, @@ -1323,11 +1339,11 @@ def self.sanitize_columns(table_name, options={}) end end - def self.ensure_column_has_valid_name(table_name, column_name, options={}) + def self.ensure_column_has_valid_name(table_name, column_name, column_sanitization_version, options={}) connection = options.fetch(:connection) database_schema = options.fetch(:database_schema, 'public') - valid_column_name = get_valid_column_name(table_name, column_name, options) + valid_column_name = get_valid_column_name(table_name, column_name, column_sanitization_version, options) if valid_column_name != column_name connection.run(%Q{ALTER TABLE "#{database_schema}"."#{table_name}" RENAME COLUMN "#{column_name}" TO "#{valid_column_name}";}) end @@ -1335,30 +1351,8 @@ def self.ensure_column_has_valid_name(table_name, column_name, options={}) valid_column_name end - def self.get_valid_column_name(table_name, candidate_column_name, options={}) - reserved_words = options.fetch(:reserved_words, []) - - existing_names = get_column_names(table_name, options) - [candidate_column_name] - - candidate_column_name = 'untitled_column' if candidate_column_name.blank? - candidate_column_name = candidate_column_name.to_s.squish - - # Subsequent characters can be letters, underscores or digits - candidate_column_name = candidate_column_name.gsub(/[^a-z0-9]/,'_').gsub(/_{2,}/, '_') - - # Valid names start with a letter or an underscore - candidate_column_name = "column_#{candidate_column_name}" unless candidate_column_name[/^[a-z_]{1}/] - - # Avoid collisions - count = 1 - new_column_name = candidate_column_name - while existing_names.include?(new_column_name) || reserved_words.include?(new_column_name.upcase) - suffix = "_#{count}" - new_column_name = candidate_column_name[0..PG_IDENTIFIER_MAX_LENGTH-suffix.length] + suffix - count += 1 - end - - new_column_name + def self.get_valid_column_name(table_name, candidate_column_name, column_sanitization_version, options={}) + CartoDB::Importer2::Column.get_valid_column_name(candidate_column_name, column_sanitization_version, get_column_names(table_name, options)) end def self.get_column_names(table_name, options={}) @@ -1440,7 +1434,7 @@ def create_table_in_database! sanitized_force_schema = force_schema.split(',').map do |column| # Convert existing primary key into a unique key if column =~ /\A\s*\"([^\"]+)\"(.*)\z/ - "#{$1.sanitize} #{$2.gsub(/primary\s+key/i,'UNIQUE')}" + "#{CartoDB::Importer2::Column.sanitize_name $1} #{$2.gsub(/primary\s+key/i,'UNIQUE')}" else column.gsub(/primary\s+key/i,'UNIQUE') end diff --git a/config/initializers/carto_db.rb b/config/initializers/carto_db.rb index 2c7ae745f7b8..8a8f05aec8e4 100644 --- a/config/initializers/carto_db.rb +++ b/config/initializers/carto_db.rb @@ -20,8 +20,6 @@ module CartoDB SURROGATE_NAMESPACE_PUBLIC_PAGES = 'rp'.freeze SURROGATE_NAMESPACE_VIZJSON = 'rj'.freeze - RESERVED_COLUMN_NAMES = %w(FORMAT CONTROLLER ACTION oid tableoid xmin cmin xmax cmax ctid ogc_fid).freeze - LAST_BLOG_POSTS_FILE_PATH = "#{Rails.root}/public/system/last_blog_posts.html" # Helper method to encapsulate Rails full URL generation compatible with our subdomainless mode diff --git a/config/initializers/string.rb b/config/initializers/string.rb index bc1d4cb680b9..4d63372c5d0c 100644 --- a/config/initializers/string.rb +++ b/config/initializers/string.rb @@ -3,137 +3,10 @@ def self.random(length=10) ('a'..'z').sort_by {rand}[0,length].join end - def normalize - str = self.downcase - return '' if str.blank? - n = str.force_encoding("UTF-8") - n.gsub!(/[àáâãäåāă]/, 'a') - n.gsub!(/æ/, 'ae') - n.gsub!(/[ďđ]/, 'd') - n.gsub!(/[çćčĉċ]/, 'c') - n.gsub!(/[èéêëēęěĕė]/, 'e') - n.gsub!(/ƒ/, 'f') - n.gsub!(/[ĝğġģ]/, 'g') - n.gsub!(/[ĥħ]/, 'h') - n.gsub!(/[ììíîïīĩĭ]/, 'i') - n.gsub!(/[įıijĵ]/, 'j') - n.gsub!(/[ķĸ]/, 'k') - n.gsub!(/[łľĺļŀ]/, 'l') - n.gsub!(/[ñńňņʼnŋ]/, 'n') - n.gsub!(/[òóôõöøōőŏŏ]/, 'o') - n.gsub!(/œ/, 'oe') - n.gsub!(/ą/, 'q') - n.gsub!(/[ŕřŗ]/, 'r') - n.gsub!(/[śšşŝș]/, 's') - n.gsub!(/[ťţŧț]/, 't') - n.gsub!(/[ùúûüūůűŭũų]/, 'u') - n.gsub!(/ŵ/, 'w') - n.gsub!(/[ýÿŷ]/, 'y') - n.gsub!(/[žżź]/, 'z') - n.gsub!(/[ÀÁÂÃÄÅĀĂ]/i, 'A') - n.gsub!(/Æ/i, 'AE') - n.gsub!(/[ĎĐ]/i, 'D') - n.gsub!(/[ÇĆČĈĊ]/i, 'C') - n.gsub!(/[ÈÉÊËĒĘĚĔĖ]/i, 'E') - n.gsub!(/Ƒ/i, 'F') - n.gsub!(/[ĜĞĠĢ]/i, 'G') - n.gsub!(/[ĤĦ]/i, 'H') - n.gsub!(/[ÌÌÍÎÏĪĨĬ]/i, 'I') - n.gsub!(/[IJĴ]/i, 'J') - n.gsub!(/[Ķĸ]/i, 'J') - n.gsub!(/[ŁĽĹĻĿ]/i, 'L') - n.gsub!(/[ÑŃŇŅʼnŊ]/i, 'N') - n.gsub!(/[ÒÓÔÕÖØŌŐŎŎ]/i, 'O') - n.gsub!(/Œ/i, 'OE') - n.gsub!(/Ą/i, 'Q') - n.gsub!(/[ŔŘŖ]/i, 'R') - n.gsub!(/[ŚŠŞŜȘ]/i, 'S') - n.gsub!(/[ŤŢŦȚ]/i, 'T') - n.gsub!(/[ÙÚÛÜŪŮŰŬŨŲ]/i, 'U') - n.gsub!(/Ŵ/i, 'W') - n.gsub!(/[ÝŸŶ]/i, 'Y') - n.gsub!(/[ŽŻŹ]/i, 'Z') - n.gsub!(/A/i, 'A') - n.gsub!(/а/i, 'a') - n.gsub!(/Б/i, 'B') - n.gsub!(/б/i, 'b') - n.gsub!(/В/i, 'V') - n.gsub!(/в/i, 'v') - n.gsub!(/Г/i, 'G') - n.gsub!(/г/i, 'g') - n.gsub!(/Д/i, 'D') - n.gsub!(/д/i, 'd') - n.gsub!(/Е/i, 'E') - n.gsub!(/е/i, 'e') - n.gsub!(/Ё/i, 'Yo') - n.gsub!(/ё/i, 'yo') - n.gsub!(/Ж/i, 'Zh') - n.gsub!(/ж/i, 'zh') - n.gsub!(/З/i, 'Z') - n.gsub!(/з/i, 'z') - n.gsub!(/И/i, 'I') - n.gsub!(/и/i, 'i') - n.gsub!(/Й/i, 'J') - n.gsub!(/й/i, 'j') - n.gsub!(/К/i, 'K') - n.gsub!(/к/i, 'k') - n.gsub!(/Л/i, 'L') - n.gsub!(/л/i, 'l') - n.gsub!(/М/i, 'M') - n.gsub!(/м/i, 'm') - n.gsub!(/Н/i, 'N') - n.gsub!(/н/i, 'n') - n.gsub!(/О/i, 'O') - n.gsub!(/о/i, 'o') - n.gsub!(/П/i, 'P') - n.gsub!(/п/i, 'p') - n.gsub!(/Р/i, 'R') - n.gsub!(/р/i, 'r') - n.gsub!(/С/i, 'S') - n.gsub!(/с/i, 's') - n.gsub!(/Т/i, 'T') - n.gsub!(/т/i, 't') - n.gsub!(/У/i, 'U') - n.gsub!(/у/i, 'u') - n.gsub!(/Ф/i, 'F') - n.gsub!(/ф/i, 'f') - n.gsub!(/Х/i, 'X') - n.gsub!(/х/i, 'x') - n.gsub!(/Ц/i, 'Cz') - n.gsub!(/ц/i, 'cz') - n.gsub!(/Ч/i, 'Ch') - n.gsub!(/ч/i, 'ch') - n.gsub!(/Ш/i, 'Sh') - n.gsub!(/ш/i, 'sh') - n.gsub!(/Щ/i, 'Shh') - n.gsub!(/щ/i, 'shh') - n.gsub!(/Ъ/i, '') - n.gsub!(/ъ/i, '') - n.gsub!(/Ы/i, 'Y') - n.gsub!(/ы/i, 'y') - n.gsub!(/Ь/i, '') - n.gsub!(/ь/i, '') - n.gsub!(/Э/i, 'E') - n.gsub!(/э/i, 'e') - n.gsub!(/Ю/i, 'Yu') - n.gsub!(/ю/i, 'yu') - n.gsub!(/Я/i, 'Ya') - n.gsub!(/я/i, 'ya') - n - end - - def sanitize - return if self.blank? - self.gsub(/<[^>]+>/m,'').normalize.downcase.gsub(/&.+?;/,'-'). - gsub(/[^a-z0-9 _-]/,'-').strip.gsub(/\s+/,'-').gsub(/-+/,'-'). - gsub(/-/,' ').strip.gsub(/ /,'-').gsub(/-/,'_') - end - def strip_tags self.gsub(/<[^>]+>/m,'').strip end - def get_cartodb_types { "number" => ["smallint", /numeric\(\d+,\d+\)/, "integer", "bigint", "decimal", "numeric", "double precision", "serial", "big serial", "real"], @@ -178,21 +51,4 @@ def convert_to_cartodb_type self.downcase end end - - def sanitize_sql - self.gsub(/\\/, '\&\&').gsub(/'/, "''") - end - - def sanitize_column_name - temporal_name = sanitize || '' - - if temporal_name !~ /^[a-zA-Z_]/ || - Carto::DB::Sanitize::RESERVED_WORDS.include?(downcase) || - CartoDB::RESERVED_COLUMN_NAMES.include?(upcase) - return '_' + temporal_name - else - temporal_name - end - end - end diff --git a/lib/carto/db/sanitize.rb b/lib/carto/db/sanitize.rb index 30fa54a89548..c5c75d1c5525 100644 --- a/lib/carto/db/sanitize.rb +++ b/lib/carto/db/sanitize.rb @@ -4,6 +4,7 @@ module Sanitize PREFIX_REPLACEMENT = 'table_'.freeze SUFFIX_REPLACEMENT = '_t'.freeze CHARACTER_REPLACEMENT = '_'.freeze + # See https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS MAX_IDENTIFIER_LENGTH = 63 DISALLOWED_STARTING_CHARACTERS_REGEX = /^[^a-z]+/ DISALLOWED_CHARACTERS_REGEX = /[^a-z|_|0-9]/ @@ -11,14 +12,31 @@ module Sanitize SYSTEM_TABLE_NAMES = %w(spatial_ref_sys geography_columns geometry_columns raster_columns raster_overviews cdb_tablemetadata geometry raster).freeze RESERVED_TABLE_NAMES = %w(layergroup all public).freeze - RESERVED_WORDS = %w(all analyse analyze and any array as asc asymmetric authorization between binary both - case cast check collate column constraint create cross current_date current_role - current_time current_timestamp current_user default deferrable desc distinct do else - end except false for foreign freeze from full grant group having ilike in initially - inner intersect into is isnull join leading left like limit localtime localtimestamp - natural new not notnull null off offset old on only or order outer overlaps placing - primary references right select session_user similar some symmetric table then to - trailing true union unique user using verbose when where xmin xmax).freeze + + # See https://www.postgresql.org/docs/current/ddl-system-columns.html + SYSTEM_COLUMN_NAMES = %w(tableoid xmin cmin xmax cmax ctid).freeze + + # See https://www.postgresql.org/docs/current/sql-keywords-appendix.html + RESERVED_WORDS = %w(all analyse analyze and any array as asc asymmetric authorization binary both + case cast check collate collation column concurrently constraint create cross + current_catalog current_date current_role current_schema current_time + current_timestamp current_user default deferrable desc distinct do else end + except false fetch for foreign freeze from full grant group having ilike in + initially inner intersect into is isnull join lateral leading left like limit + localtime localtimestamp natural not notnull null offset on only or order outer + overlaps placing primary references returning right select session_user similar + some symmetric table tablesample then to trailing true union unique user using + variadic verbose when where window with).freeze + + ADDITIONAL_RESERVED_COLUMNS = %w(oid ogc_fid).freeze + + REJECTED_COLUMN_NAMES = (SYSTEM_COLUMN_NAMES + ADDITIONAL_RESERVED_COLUMNS).freeze + + # FIXME we have been reserving these name in columns but I don't know the reason ¯\_(ツ)_/¯ + ADDITIONAL_WORDS = %w(between new off old format controller action).freeze + + RESERVED_COLUMN_NAMES = SYSTEM_COLUMN_NAMES + RESERVED_WORDS + ADDITIONAL_WORDS + def self.append_with_truncate_and_sanitize(identifier, suffix) suffix_length = suffix.length @@ -58,7 +76,8 @@ def self.sanitize_identifier(identifier) sanitized_identifier = sanitized_identifier[0..(MAX_IDENTIFIER_LENGTH - 1)] # Append _t if is a reserved word or reserved table name - if (RESERVED_WORDS + RESERVED_TABLE_NAMES + SYSTEM_TABLE_NAMES).map(&:downcase).include?(sanitized_identifier) + # NOTE: strictly, we don't need to avoid ADDITIONAL_WORDS for table names + if (RESERVED_WORDS + RESERVED_TABLE_NAMES + SYSTEM_TABLE_NAMES + ADDITIONAL_WORDS).map(&:downcase).include?(sanitized_identifier) sanitized_identifier += SUFFIX_REPLACEMENT end diff --git a/lib/importer/lib/cartodb-migrator/migrator.rb b/lib/importer/lib/cartodb-migrator/migrator.rb index 8088e32386a4..3f8d4a1c36c2 100644 --- a/lib/importer/lib/cartodb-migrator/migrator.rb +++ b/lib/importer/lib/cartodb-migrator/migrator.rb @@ -84,14 +84,14 @@ def sanitize_columns! def sanitize(column_names) columns_to_sanitize = column_names.select do |column_name| - column_name != column_name.sanitize_column_name + column_name != CartoDB::Importer2::Column.sanitize_name(column_name) end correct_columns = column_names - columns_to_sanitize sanitization_map = Hash[ columns_to_sanitize.map { |column_name| - [column_name, column_name.sanitize_column_name] + [column_name, CartoDB::Importer2::Column.sanitize_name(column_name)] } ] diff --git a/lib/tasks/db_maintenance.rake b/lib/tasks/db_maintenance.rake index 55eb5109f7ce..537a8eb5199f 100644 --- a/lib/tasks/db_maintenance.rake +++ b/lib/tasks/db_maintenance.rake @@ -1026,7 +1026,8 @@ namespace :cartodb do tables = JSON.parse(File.read(args['table_definition_json_path'].to_s)) u.in_database({as: :superuser, no_cartodb_in_schema: true}) do |db| db.transaction do - server_name = "oracle_#{args[:oracle_url].sanitize}_#{Time.now.to_i}" + name = CartoDB::Importer2::StringSanitizer.sanitize(args[:oracle_url], transliterate_cyrillic: true) + server_name = "oracle_#{name}_#{Time.now.to_i}" db.run('CREATE EXTENSION oracle_fdw') unless db.fetch(%Q{ SELECT count(*) FROM pg_extension WHERE extname='oracle_fdw' }).first[:count] > 0 diff --git a/services/importer/lib/importer/column.rb b/services/importer/lib/importer/column.rb index 2228bf26f7d3..d0b9cf9c7427 100644 --- a/services/importer/lib/importer/column.rb +++ b/services/importer/lib/importer/column.rb @@ -5,6 +5,8 @@ require_relative './exceptions' require_relative './query_batcher' +require_relative '../../../../lib/carto/db/sanitize' + module CartoDB module Importer2 class Column @@ -16,22 +18,14 @@ class Column KML_POINT_RE = // DEFAULT_SCHEMA = 'cdb_importer' DIRECT_STATEMENT_TIMEOUT = 1.hour * 1000 - # @see config/initializers/carto_db.rb -> POSTGRESQL_RESERVED_WORDS - RESERVED_WORDS = %w{ ALL ANALYSE ANALYZE AND ANY ARRAY AS ASC ASYMMETRIC - AUTHORIZATION BETWEEN BINARY BOTH CASE CAST CHECK - COLLATE COLUMN CONSTRAINT CREATE CROSS CURRENT_DATE - CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP - CURRENT_USER DEFAULT DEFERRABLE DESC DISTINCT DO - ELSE END EXCEPT FALSE FOR FOREIGN FREEZE FROM FULL - GRANT GROUP HAVING ILIKE IN INITIALLY INNER INTERSECT - INTO IS ISNULL JOIN LEADING LEFT LIKE LIMIT LOCALTIME - LOCALTIMESTAMP NATURAL NEW NOT NOTNULL NULL OFF - OFFSET OLD ON ONLY OR ORDER OUTER OVERLAPS PLACING - PRIMARY REFERENCES RIGHT SELECT SESSION_USER SIMILAR - SOME SYMMETRIC TABLE THEN TO TRAILING TRUE UNION - UNIQUE USER USING VERBOSE WHEN WHERE XMIN XMAX - FORMAT CONTROLLER ACTION - } + RESERVED_COLUMN_NAMES = Carto::DB::Sanitize::RESERVED_COLUMN_NAMES + PG_IDENTIFIER_MAX_LENGTH = Carto::DB::Sanitize::MAX_IDENTIFIER_LENGTH + + REJECTED_COLUMN_NAMES = Carto::DB::Sanitize::REJECTED_COLUMN_NAMES + + NO_COLUMN_SANITIZATION_VERSION = 0 + INITIAL_COLUMN_SANITIZATION_VERSION = 1 + CURRENT_COLUMN_SANITIZATION_VERSION = 2 def initialize(db, table_name, column_name, user, schema = DEFAULT_SCHEMA, job = nil, logger = nil, capture_exceptions = true) @job = job || Job.new({logger: logger}) @@ -265,21 +259,98 @@ def sanitize end def sanitized_name - name = StringSanitizer.new.sanitize(column_name.to_s) - return name unless reserved?(name) || unsupported?(name) - "_#{name}" + self.class.sanitize_name column_name end - def reserved?(name) - RESERVED_WORDS.include?(name.upcase) + def self.reserved?(name) + RESERVED_COLUMN_NAMES.include?(name.downcase) end - def unsupported?(name) + def self.unsupported?(name) name !~ /^[a-zA-Z_]/ end + def self.reserved_or_unsupported?(name) + reserved?(name) || unsupported?(name) + end + + def self.sanitize_name(column_name) + name = StringSanitizer.sanitize(column_name.to_s, transliterate_cyrillic: true) + return name unless reserved_or_unsupported?(name) + "_#{name}" + end + + def self.rejected?(name) + REJECTED_COLUMN_NAMES.include?(name) || name =~ /^[0-9]/ + end + + def self.get_valid_column_name(candidate_column_name, column_sanitization_version, column_names) + return candidate_column_name if column_sanitization_version == NO_COLUMN_SANITIZATION_VERSION + + existing_names = column_names - [candidate_column_name] + + if column_sanitization_version == INITIAL_COLUMN_SANITIZATION_VERSION + get_valid_column_name_v1(candidate_column_name, existing_names) + elsif column_sanitization_version == 2 + get_valid_column_name_v2(candidate_column_name, existing_names) + elsif column_sanitization_version == 3 + get_valid_column_name_v3(candidate_column_name, existing_names) + else + raise "Invalid column sanitization version #{column_sanitization_version.inspect}" + end + end + private + def self.get_valid_column_name_v1(candidate_column_name, existing_names) + # NOTE: originally not all uses of this sanitization version applied reserved words + # reserved_words = RESERVED_COLUMN_NAMES + reserved_words = [] + + candidate_column_name = 'untitled_column' if candidate_column_name.blank? + candidate_column_name = candidate_column_name.to_s.squish + + # Subsequent characters can be letters, underscores or digits + candidate_column_name = candidate_column_name.gsub(/[^a-z0-9]/,'_').gsub(/_{2,}/, '_') + + # Valid names start with a letter or an underscore + candidate_column_name = "column_#{candidate_column_name}" unless candidate_column_name[/^[a-z_]{1}/] + + avoid_collisions(candidate_column_name, existing_names, reserved_words) + end + + def self.get_valid_column_name_v2(candidate_column_name, existing_names) + new_column_name = sanitize_name(candidate_column_name).gsub(/_{2,}/, '_') + new_column_name = [0, PG_IDENTIFIER_MAX_LENGTH] if new_column_name.size > PG_IDENTIFIER_MAX_LENGTH + avoid_collisions(new_column_name, existing_names, RESERVED_COLUMN_NAMES) + end + + def self.get_valid_column_name_v3(candidate_column_name, existing_names) + # experimental sanitization + # this can be configured using the locale files for the current (I18n.locale) locale; + # for example, for I18n.locale == :en we could add this to config/locales/en.yml + # en: + # i18n: + # transliterate: + # rule: + # Ж: Zh + # ж: zh + new_column_name = candidate_column_name.parameterize.tr('-','_') + new_column_name = [0, PG_IDENTIFIER_MAX_LENGTH] if new_column_name.size > PG_IDENTIFIER_MAX_LENGTH + avoid_collisions(new_column_name, existing_names, RESERVED_COLUMN_NAMES) + end + + def self.avoid_collisions(name, existing_names, reserved_words, max_length=PG_IDENTIFIER_MAX_LENGTH) + count = 1 + new_name = name + while existing_names.include?(new_name) || reserved_words.include?(new_name.downcase) + suffix = "_#{count}" + new_name = name[0..max_length-suffix.length] + suffix + count += 1 + end + new_name + end + attr_reader :job, :db, :table_name, :column_name, :schema def qualified_table_name diff --git a/services/importer/lib/importer/string_sanitizer.rb b/services/importer/lib/importer/string_sanitizer.rb index b66bbc01b669..50165fb7d0cc 100644 --- a/services/importer/lib/importer/string_sanitizer.rb +++ b/services/importer/lib/importer/string_sanitizer.rb @@ -1,11 +1,13 @@ module CartoDB module Importer2 - class StringSanitizer - def normalize(string) + module StringSanitizer + module_function + + def normalize(string, transliterate_cyrillic: false) return '' if string.nil? || string.empty? - n = string.downcase.force_encoding("UTF-8") - n.gsub!(/[àáâãäåāă]/, 'a') + n = string.force_encoding("UTF-8") + n.gsub!(/[àáâãäåāă]/, 'a') n.gsub!(/æ/, 'ae') n.gsub!(/[ďđ]/, 'd') n.gsub!(/[çćčĉċ]/, 'c') @@ -28,36 +30,114 @@ def normalize(string) n.gsub!(/ŵ/, 'w') n.gsub!(/[ýÿŷ]/, 'y') n.gsub!(/[žżź]/, 'z') - n.gsub!(/[ÀÁÂÃÄÅĀĂ]/i, 'A') - n.gsub!(/Æ/i, 'AE') - n.gsub!(/[ĎĐ]/i, 'D') - n.gsub!(/[ÇĆČĈĊ]/i, 'C') - n.gsub!(/[ÈÉÊËĒĘĚĔĖ]/i, 'E') - n.gsub!(/Ƒ/i, 'F') - n.gsub!(/[ĜĞĠĢ]/i, 'G') - n.gsub!(/[ĤĦ]/i, 'H') - n.gsub!(/[ÌÌÍÎÏĪĨĬ]/i, 'I') - n.gsub!(/[IJĴ]/i, 'J') - n.gsub!(/[Ķĸ]/i, 'J') - n.gsub!(/[ŁĽĹĻĿ]/i, 'L') - n.gsub!(/[ÑŃŇŅʼnŊ]/i, 'M') - n.gsub!(/[ÒÓÔÕÖØŌŐŎŎ]/i, 'N') - n.gsub!(/Œ/i, 'OE') - n.gsub!(/Ą/i, 'Q') - n.gsub!(/[ŔŘŖ]/i, 'R') - n.gsub!(/[ŚŠŞŜȘ]/i, 'S') - n.gsub!(/[ŤŢŦȚ]/i, 'T') - n.gsub!(/[ÙÚÛÜŪŮŰŬŨŲ]/i, 'U') - n.gsub!(/Ŵ/i, 'W') - n.gsub!(/[ÝŸŶ]/i, 'Y') - n.gsub!(/[ŽŻŹ]/i, 'Z') + n.gsub!(/[ÀÁÂÃÄÅĀĂ]/, 'A') + n.gsub!(/Æ/, 'AE') + n.gsub!(/[ĎĐ]/, 'D') + n.gsub!(/[ÇĆČĈĊ]/, 'C') + n.gsub!(/[ÈÉÊËĒĘĚĔĖ]/, 'E') + n.gsub!(/Ƒ/i, 'F') + n.gsub!(/[ĜĞĠĢ]/, 'G') + n.gsub!(/[ĤĦ]/i, 'H') + n.gsub!(/[ÌÌÍÎÏĪĨĬ]/, 'I') + n.gsub!(/[IJĴ]/, 'J') + n.gsub!(/[Ķĸ]/, 'K') + n.gsub!(/[ŁĽĹĻĿ]/, 'L') + n.gsub!(/[ÑŃŇŅʼnŊ]/, 'N') + n.gsub!(/[ÒÓÔÕÖØŌŐŎŎ]/, 'O') + n.gsub!(/Œ/, 'OE') + n.gsub!(/Ą/, 'Q') + n.gsub!(/[ŔŘŖ]/, 'R') + n.gsub!(/[ŚŠŞŜȘ]/, 'S') + n.gsub!(/[ŤŢŦȚ]/, 'T') + n.gsub!(/[ÙÚÛÜŪŮŰŬŨŲ]/, 'U') + n.gsub!(/Ŵ/, 'W') + n.gsub!(/[ÝŸŶ]/, 'Y') + n.gsub!(/[ŽŻŹ]/, 'Z') + if transliterate_cyrillic + n.gsub!(/Б/, 'B') + n.gsub!(/б/, 'b') + n.gsub!(/В/, 'V') + n.gsub!(/в/, 'v') + n.gsub!(/Г/, 'G') + n.gsub!(/г/, 'g') + n.gsub!(/Д/, 'D') + n.gsub!(/д/, 'd') + n.gsub!(/Е/, 'E') + n.gsub!(/е/, 'e') + n.gsub!(/Ё/, 'Yo') + n.gsub!(/ё/, 'yo') + n.gsub!(/Ж/, 'Zh') + n.gsub!(/ж/, 'zh') + n.gsub!(/З/, 'Z') + n.gsub!(/з/, 'z') + n.gsub!(/И/, 'I') + n.gsub!(/и/, 'i') + n.gsub!(/Й/, 'J') + n.gsub!(/й/, 'j') + n.gsub!(/К/, 'K') + n.gsub!(/к/, 'k') + n.gsub!(/Л/, 'L') + n.gsub!(/л/, 'l') + n.gsub!(/М/, 'M') + n.gsub!(/м/, 'm') + n.gsub!(/Н/, 'N') + n.gsub!(/н/, 'n') + n.gsub!(/О/, 'O') + n.gsub!(/о/, 'o') + n.gsub!(/П/, 'P') + n.gsub!(/п/, 'p') + n.gsub!(/Р/, 'R') + n.gsub!(/р/, 'r') + n.gsub!(/С/, 'S') + n.gsub!(/с/, 's') + n.gsub!(/Т/, 'T') + n.gsub!(/т/, 't') + n.gsub!(/У/, 'U') + n.gsub!(/у/, 'u') + n.gsub!(/Ф/, 'F') + n.gsub!(/ф/, 'f') + n.gsub!(/Х/, 'X') + n.gsub!(/х/, 'x') + n.gsub!(/Ц/, 'Cz') + n.gsub!(/ц/, 'cz') + n.gsub!(/Ч/, 'Ch') + n.gsub!(/ч/, 'ch') + n.gsub!(/Ш/, 'Sh') + n.gsub!(/ш/, 'sh') + n.gsub!(/Щ/, 'Shh') + n.gsub!(/щ/, 'shh') + n.gsub!(/Ъ/, '') + n.gsub!(/ъ/, '') + n.gsub!(/Ы/, 'Y') + n.gsub!(/ы/, 'y') + n.gsub!(/Ь/, '') + n.gsub!(/ь/, '') + n.gsub!(/Э/, 'E') + n.gsub!(/э/, 'e') + n.gsub!(/Ю/, 'Yu') + n.gsub!(/ю/, 'yu') + n.gsub!(/Я/, 'Ya') + n.gsub!(/я/, 'ya') + end n end #normalize - def sanitize(string) - return '' if string.nil? || string.empty? + def legacy_sanitize(string) + return '' if string.nil? || string.empty? + normalize(string.downcase.gsub(/<[^>]+>/m,''), transliterate_cyrillic: false) + .gsub(/&.+?;/,'-') + .gsub(/[^a-z0-9 _-]/,'-').strip + .gsub(/\s+/,'-') + .gsub(/-+/,'-') + .gsub(/-/,' ').strip + .gsub(/ /,'-') + .gsub(/-/,'_') + end - normalize(string.gsub(/<[^>]+>/m,'')) + def sanitize(string, transliterate_cyrillic: false) + return '' if string.nil? || string.empty? + normalize(string.gsub(/<[^>]+>/m,''), transliterate_cyrillic: transliterate_cyrillic) + .downcase .gsub(/&.+?;/,'-') .gsub(/[^a-z0-9 _-]/,'-').strip .gsub(/\s+/,'-') diff --git a/services/importer/spec/unit/column_spec.rb b/services/importer/spec/unit/column_spec.rb index 5435119819e5..b105a93e7fd1 100644 --- a/services/importer/spec/unit/column_spec.rb +++ b/services/importer/spec/unit/column_spec.rb @@ -243,15 +243,15 @@ describe '#reserved?' do it 'returns true if name is a reserved keyword' do - @column.reserved?('select').should eq true - @column.reserved?('bogus').should eq false + Column.reserved?('select').should eq true + Column.reserved?('bogus').should eq false end end #reserved? describe '#unsupported?' do it 'returns true if name is not supported by Postgres' do - @column.unsupported?('9name').should eq true - @column.unsupported?('name9').should eq false + Column.unsupported?('9name').should eq true + Column.unsupported?('name9').should eq false end end #unsupported? @@ -345,5 +345,153 @@ def random_kml_multi_record ogc_fid: 1 } end #random_kml_multi_record + + + LEGACY_SANITIZATION_EXAMPLES = { + "abc" => "abc", + "abc xyz" => "abc_xyz", + "2abc" => "column_2abc", + "Abc" => "_bc", + "\u0432\u044b\u0445\u043b\u043e\u043f\u044b \u0430\u0432\u0442\u043e\u0442\u0440\u0430\u043d\u0441\u043f\u043e\u0440\u0442\u04302" => "_2", + "\u043d\u0435\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a2" => "_2", + "\u0432\u044b\u0431\u0440\u043e\u0441\u044b \u043f\u0440\u0435\u0434\u043f\u0440\u0438\u044f\u0442\u0438\u04392" => "_2", + "\u013ar" => "_r", + "CONVERT(BlueNumber USING utf8)" => '_lue_umber_utf8_', + "is growing site fenced?" => "is_growing_site_fenced_", + "if it\u2019s a community garden, is it collective or allotment?" => "if_it_s_a_community_garden_is_it_collective_or_allotment_", + "Paddock" => "_addock", + "Date Due" => "_ate_ue", + "__5" => "_5", + "__1" => "_1", + "tel\u00e9fono" => "tel_fono", + ":@computed_region_wvic_k925" => "_computed_region_wvic_k925", + "\u0420\u0435\u0433\u0438\u043e\u043d" => "_", + "\u043d\u0435\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a" => "_", + "> min" => "_min", + "12_ schedule of visits.0" => "column_12_schedule_of_visits_0", + "previous rent (\u00a3 per sq ft)" => "previous_rent_per_sq_ft_", + "description/\u540d\u7a31" => "description_", + "description/\u5730\u5740" => "description_", + "@relations" => "_relations", + "EntityName" => "_ntity_ame", + "_ injured" => "_injured", + "trips 11 _ 15 miles" => "trips_11_15_miles", + "as" => "as", + "any" => "any", + "xmin" => "xmin", + "action" => "action", + } + + LEGACY_SANITIZATION_COLS = { + ['выбросы предприятий2', 'выхлопы автотранспорта2', 'неустановленный источник2'] => ['_2', '_2_1', '_2_2'], + ["description/\u540d\u7a31", "description/\u5730\u5740"] => ["description_", "description__1"] + } + + VERSION_2_SANITIZATION_EXAMPLES = { + "abc" => "abc", + "abc xyz" => "abc_xyz", + "2abc" => "_2abc", + "Abc" => "abc", + "выхлопы автотранспорта2" => "vyxlopy_vtotr_nsport_2", + "неустановленный источник2" => "neust_novlennyj_istochnik2", + "выбросы предприятий2" => "vybrosy_predpriyatij2", + "ĺr" => "lr", + "CONVERT(BlueNumber USING utf8)" => "convert_bluenumber_using_utf8", + "is growing site fenced?" => "is_growing_site_fenced", + "if it’s a community garden, is it collective or allotment?" => "if_it_s_a_community_garden_is_it_collective_or_allotment", + "Paddock" => "paddock", + "Date Due" => "date_due", + "__5" => "_5", + "__1" => "_1", + "teléfono" => "telefono", + ":@computed_region_wvic_k925" => "computed_region_wvic_k925", + "Регион" => "region", + "неустановленный источник" => "neust_novlennyj_istochnik", + "> min" => "min", + "12_ schedule of visits.0" => "_12_schedule_of_visits_0", + "previous rent (£ per sq ft)" => "previous_rent_per_sq_ft", + "description/名稱" => "description", + "description/地址" => "description", + "@relations" => "relations", + "EntityName" => "entityname", + "_ injured" => "_injured", + "trips 11 _ 15 miles" => "trips_11_15_miles", + "as" => "_as", + "any" => "_any", + "xmin" => "_xmin", + "action" => "_action", + } + + VERSION_2_SANITIZATION_COLS = { + ['выбросы предприятий2', 'выхлопы автотранспорта2', 'неустановленный источник2'] => ['vybrosy_predpriyatij2', 'vyxlopy_vtotr_nsport_2', 'neust_novlennyj_istochnik2'], + ["description/\u540d\u7a31", "description/\u5730\u5740"] => ["description", "description_1"], + ["abc", "Abc", "aBc", "ABC"] => ["abc", "abc_1", "abc_2", "abc_3"] + } + + describe '.get_valid_column_name' do + + it 'can apply legacy sanitization to single columns' do + LEGACY_SANITIZATION_EXAMPLES.each do |input_name, output_name| + name = Column::get_valid_column_name(input_name, Column::INITIAL_COLUMN_SANITIZATION_VERSION, []) + name.should eq output_name + end + end + + it 'can apply legacy sanitization to multiple columns' do + LEGACY_SANITIZATION_COLS.each do |input_columns, output_columns| + columns = [] + input_columns.zip(output_columns).each do |input_column, output_column| + column = Column::get_valid_column_name(input_column, Column::INITIAL_COLUMN_SANITIZATION_VERSION, columns) + columns << column + column.should eq output_column + end + end + end + + it 'can apply sanitization v2 to single columns' do + VERSION_2_SANITIZATION_EXAMPLES.each do |input_name, output_name| + name = Column::get_valid_column_name(input_name, 2, []) + name.should eq output_name + end + end + + it 'v2 sanitization is idempotent' do + VERSION_2_SANITIZATION_EXAMPLES.each_key do |input_name| + first_name = Column::get_valid_column_name(input_name, 2, []) + second_name = Column::get_valid_column_name(first_name, 2, []) + second_name.should eq first_name + end + end + + it 'can apply v2 sanitization to multiple columns' do + VERSION_2_SANITIZATION_COLS.each do |input_columns, output_columns| + columns = [] + input_columns.zip(output_columns).each do |input_column, output_column| + column = Column::get_valid_column_name(input_column, 2, columns) + puts "--- ADDING COL #{column}" + columns << column + puts " >> #{columns.inspect}" + column.should eq output_column + end + end + end + + it 'multiple column sanitization is idempotent' do + VERSION_2_SANITIZATION_COLS.each_key do |input_columns| + columns1 = [] + input_columns.each do |input_column| + column1 = Column::get_valid_column_name(input_column, 2, columns1) + columns1 << column1 + end + columns2 = [] + columns1.each do |input_column| + column2 = Column::get_valid_column_name(input_column, 2, columns2) + columns2 << column2 + column2.should eq input_column + end + end + end + end # .get_valid_column_name + end # Column diff --git a/spec/models/table_spec.rb b/spec/models/table_spec.rb index 3a73f24e2c54..9ac1576a7aa9 100644 --- a/spec/models/table_spec.rb +++ b/spec/models/table_spec.rb @@ -552,7 +552,7 @@ def expect_save_to_fail_validation(table) table.name = 'Wadus table #23' table.save table.reload - table.name.should == "Wadus table #23".sanitize + table.name.should == CartoDB::Importer2::StringSanitizer.sanitize("Wadus table #23") @user.in_database do |user_database| user_database.table_exists?('wadus_table'.to_sym).should be_false user_database.table_exists?('wadus_table_23'.to_sym).should be_true @@ -561,7 +561,7 @@ def expect_save_to_fail_validation(table) table.name = '' table.save table.reload - table.name.should == "Wadus table #23".sanitize + table.name.should == CartoDB::Importer2::StringSanitizer.sanitize("Wadus table #23") @user.in_database do |user_database| user_database.table_exists?('wadus_table_23'.to_sym).should be_true end @@ -2219,23 +2219,28 @@ def check_query_geometry(query, schema) describe 'self.get_valid_column_name' do it 'returns the same candidate name if it is ok' do Table.expects(:get_column_names).once.returns(%w{a b c}) - Table.get_valid_column_name('dummy_table_name', 'a').should == 'a' + version = CartoDB::Importer2::Column::CURRENT_COLUMN_SANITIZATION_VERSION + Table.get_valid_column_name('dummy_table_name', 'a', version).should == 'a' end it 'returns an alternative name if using a reserved word' do Table.expects(:get_column_names).once.returns(%w{column b c}) + version = CartoDB::Importer2::Column::CURRENT_COLUMN_SANITIZATION_VERSION Table.get_valid_column_name( 'dummy_table_name', 'column', - reserved_words: %w{ INSERT SELECT COLUMN }).should == 'column_1' + version + ).should == '_column' end it 'avoids collisions when a renamed column already exists' do - Table.expects(:get_column_names).once.returns(%w{column_1 column c}) + Table.expects(:get_column_names).once.returns(%w{_column column c}) + version = CartoDB::Importer2::Column::CURRENT_COLUMN_SANITIZATION_VERSION Table.get_valid_column_name( 'dummy_table_name', 'column', - reserved_words: %w{ INSERT SELECT COLUMN }).should == 'column_2' + version + ).should == '_column_1' end end