diff --git a/classes/OpenXdmod/Migration/Version900To950/DatabaseMigration.php b/classes/OpenXdmod/Migration/Version900To950/DatabaseMigration.php index 6576c899f0..aaf32a6210 100644 --- a/classes/OpenXdmod/Migration/Version900To950/DatabaseMigration.php +++ b/classes/OpenXdmod/Migration/Version900To950/DatabaseMigration.php @@ -7,6 +7,7 @@ use CCR\DB; use ETL\Utilities; +use OpenXdmod\Setup\Console; /** * Migrate databases from version 9.0.0 to 9.5.0. @@ -17,9 +18,34 @@ public function execute() { parent::execute(); + $console = Console::factory(); + $dbh = DB::factory('datawarehouse'); $mysql_helper = \CCR\DB\MySQLHelper::factory($dbh); if ($mysql_helper->tableExists('modw_cloud.cloud_resource_specs')) { + + $staging_resource_sql = "SELECT + COUNT(*) + FROM + modw_cloud.staging_resource_specifications + GROUP BY + resource_id, hostname, fact_date + HAVING + COUNT(*) > 1"; + + $staging_result = $dbh->query($staging_resource_sql); + + if(count($staging_result) > 0) { + $console->displayMessage(<<<"EOT" +This version of Open XDMoD changes the schema on two tables related to cloud utilization metrics. It appears that +data in the table modw_cloud.staging_resource_specifications will violate these schema changes. The violation is that +there cannot be two rows with the same resource ID, hostname, and date. Before you next ingest your cloud resource +specification files you should either remove any extra rows or truncate this table and then shred and ingest all of +your cloud resource specification files. +EOT + ); + } + Utilities::runEtlPipeline( ['cloud-migration-9_0_0-9_5_0'], $this->logger, diff --git a/configuration/etl/etl_tables.d/cloud_common/cloud_resource_specs.json b/configuration/etl/etl_tables.d/cloud_common/cloud_resource_specs.json index 681a0414b6..b2b59aff48 100644 --- a/configuration/etl/etl_tables.d/cloud_common/cloud_resource_specs.json +++ b/configuration/etl/etl_tables.d/cloud_common/cloud_resource_specs.json @@ -1,10 +1,10 @@ { - "#": "Record types are global to all cloud resources", + "#": "Contains the start and end time for a specific set of vcpus and memory for a cloud host.", "table_definition": { "name": "cloud_resource_specs", "engine": "MyISAM", - "comment": "Record type: accounting, administrative, derived, etc.", + "comment": "Contains the start and end time for a specific set of vcpus and memory for a cloud host", "columns": [ { "name": "host_id", @@ -62,20 +62,13 @@ "extra": "on update CURRENT_TIMESTAMP" } ], - - "#": "Note that the primary key is (resource, type, identifier, create_time) rather than", - "#": "asset_id so we can take advantage of ON DUPLICATE KEY UPDATE to update existing entries", - "#": "and still use auto_increment for new entries.", - "indexes": [ { "name": "PRIMARY", "columns": [ "resource_id", "hostname", - "memory_mb", - "vcpus", - "start_date_ts" + "start_day_id" ], "is_unique": true }, diff --git a/configuration/etl/etl_tables.d/cloud_common/staging_resource_specifications.json b/configuration/etl/etl_tables.d/cloud_common/staging_resource_specifications.json index b1fa119f01..ae687f2b12 100644 --- a/configuration/etl/etl_tables.d/cloud_common/staging_resource_specifications.json +++ b/configuration/etl/etl_tables.d/cloud_common/staging_resource_specifications.json @@ -1,10 +1,11 @@ { - "#": "Record types are global to all cloud resources", + "#": "This table contains a specific set of vcpus and memory size for a host for", + "#": "a day. There should be no more than one record for a host on a day.", "table_definition": { "name": "staging_resource_specifications", "engine": "MyISAM", - "comment": "Record type: accounting, administrative, derived, etc.", + "comment": "Contains a specific set of vcpus and memory size for a host for a day", "columns": [ { "name": "hostname", @@ -42,8 +43,6 @@ "columns": [ "resource_id", "hostname", - "memory_mb", - "vcpus", "fact_date" ], "is_unique": true