diff --git a/bin/xdmod-ingestor b/bin/xdmod-ingestor index 9377d9d72a..d75b2b0d98 100755 --- a/bin/xdmod-ingestor +++ b/bin/xdmod-ingestor @@ -263,11 +263,11 @@ function main() } if($datatypeValue == 'openstack'){ - $dwi->ingestCloudDataOpenStack(); + $dwi->ingestCloudDataOpenStack($lastModifiedStartDate); } if($datatypeValue == 'genericcloud'){ - $dwi->ingestCloudDataGeneric(); + $dwi->ingestCloudDataGeneric($lastModifiedStartDate); } if ($datatypeValue == 'storage') { @@ -293,7 +293,7 @@ function main() } if($realmToAggregate == 'cloud' || $realmToAggregate === false){ - $dwi->aggregateCloudData(); + $dwi->aggregateCloudData($lastModifiedStartDate); } if ($realmToAggregate == 'storage' || $realmToAggregate === false) { diff --git a/classes/ETL/Ingestor/CloudStateReconstructorTransformIngestor.php b/classes/ETL/Ingestor/CloudStateReconstructorTransformIngestor.php index 0a8797b8c6..d33d5305f1 100644 --- a/classes/ETL/Ingestor/CloudStateReconstructorTransformIngestor.php +++ b/classes/ETL/Ingestor/CloudStateReconstructorTransformIngestor.php @@ -58,7 +58,7 @@ public function __construct(aOptions $options, EtlConfiguration $etlConfig, Log $this->_start_event_ids = array(self::START, self::RESUME, self::STATE_REPORT, self::UNSHELVE, self::UNPAUSE, self::UNSUSPEND, self::POWER_ON); $this->_all_event_ids = array_merge($this->_start_event_ids, $this->_stop_event_ids); $this->_end_time = $etlConfig->getVariableStore()->endDate ? date('Y-m-d H:i:s', strtotime($etlConfig->getVariableStore()->endDate)) : null; - + $this->resetInstance(); } @@ -132,8 +132,9 @@ protected function getSourceQueryString() // is lost. To work around this we add a dummy row filled with zeroes. $colCount = count($this->etlSourceQuery->records); $unionValues = array_fill(0, $colCount, 0); + $subSelect = "(SELECT DISTINCT instance_id from modw_cloud.event WHERE last_modified > \"" . $this->getEtlOverseerOptions()->getLastModifiedStartDate() . "\")"; - $sql = "$sql WHERE event_type_id IN (" . implode(',', $this->_all_event_ids) . ")\nUNION ALL\nSELECT " . implode(',', $unionValues) . "\nORDER BY 1 DESC, 2 DESC, 3 ASC, 4 DESC"; + $sql = "$sql WHERE instance_id IN " . $subSelect . " AND event_type_id IN (" . implode(',', $this->_all_event_ids) . ")\nUNION ALL\nSELECT " . implode(',', $unionValues) . "\nORDER BY 1 DESC, 2 DESC, 3 ASC, 4 DESC"; return $sql; } diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index bd99cc0611..01f7c54391 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -202,12 +202,16 @@ public function ingestAllHpcdb($startDate = null, $endDate = null) * tables do not exist then catch the resulting exception and display a message * saying that there is no OpenStack data to ingest. */ - public function ingestCloudDataOpenStack() + public function ingestCloudDataOpenStack($lastModifiedStartDate) { if( $this->isRealmEnabled('Cloud') ){ - try{ + try { $this->logger->notice('Ingesting OpenStack event log data'); - Utilities::runEtlPipeline(array('jobs-cloud-import-users-openstack', 'jobs-cloud-extract-openstack'), $this->logger); + Utilities::runEtlPipeline( + array('jobs-cloud-import-users-openstack', 'jobs-cloud-extract-openstack'), + $this->logger, + array('last-modified-start-date' => $lastModifiedStartDate) + ); } catch( Exception $e ){ if( $e->getCode() == 1146 ){ @@ -225,12 +229,16 @@ public function ingestCloudDataOpenStack() * tables do not exist then catch the resulting exception and display a message * saying that there is no generic cloud data to ingest. */ - public function ingestCloudDataGeneric() + public function ingestCloudDataGeneric($lastModifiedStartDate) { if( $this->isRealmEnabled('Cloud') ){ - try{ + try { $this->logger->notice('Ingesting generic cloud log files'); - Utilities::runEtlPipeline(array('jobs-cloud-import-users-generic', 'jobs-cloud-extract-generic'), $this->logger); + Utilities::runEtlPipeline( + array('jobs-cloud-import-users-generic', 'jobs-cloud-extract-generic'), + $this->logger, + array('last-modified-start-date' => $lastModifiedStartDate) + ); } catch( Exception $e ){ if( $e->getCode() == 1146 ){ @@ -273,11 +281,15 @@ public function ingestStorageData() * catch the resulting exception and display a message saying that there * is no cloud data to aggregate and cloud aggregation is being skipped. */ - public function aggregateCloudData() + public function aggregateCloudData($lastModifiedStartDate) { if( $this->isRealmEnabled('Cloud') ){ $this->logger->notice('Aggregating Cloud data'); - Utilities::runEtlPipeline(array('cloud-state-pipeline'), $this->logger); + Utilities::runEtlPipeline( + array('cloud-state-pipeline'), + $this->logger, + array('last-modified-start-date' => $lastModifiedStartDate) + ); $filterListBuilder = new FilterListBuilder(); $filterListBuilder->setLogger($this->logger); diff --git a/configuration/etl/etl.d/cloud_state_machine.json b/configuration/etl/etl.d/cloud_state_machine.json index ba60980fb5..32be00f222 100644 --- a/configuration/etl/etl.d/cloud_state_machine.json +++ b/configuration/etl/etl.d/cloud_state_machine.json @@ -35,7 +35,6 @@ "name": "cloud-transient", "class": "DatabaseIngestor", "definition_file": "cloud_common/cloud_transient.json", - "truncate_destination": true, "description": "Builds intermediate cloud event table" }, { @@ -46,7 +45,6 @@ "class": "SimpleAggregator", "description": "Aggregate cloud records.", "definition_file": "cloud_common/cloud_metrics_aggregation.json", - "truncate_destination": true, "table_prefix": "cloudfact_by_", "aggregation_units": [ "day", "month", "quarter", "year" diff --git a/configuration/etl/etl_action_defs.d/cloud_common/cloud_metrics_aggregation.json b/configuration/etl/etl_action_defs.d/cloud_common/cloud_metrics_aggregation.json index bfd8ccc4d5..1ce3a25b6f 100644 --- a/configuration/etl/etl_action_defs.d/cloud_common/cloud_metrics_aggregation.json +++ b/configuration/etl/etl_action_defs.d/cloud_common/cloud_metrics_aggregation.json @@ -5,8 +5,8 @@ }, "aggregation_period_query": { "overseer_restrictions": { - "#last_modified_start_date": "last_modified >= ${VALUE}", - "#last_modified_end_date": "last_modified <= ${VALUE}", + "last_modified_start_date": "last_modified >= ${VALUE}", + "last_modified_end_date": "last_modified <= ${VALUE}", "include_only_resource_codes": "resource_id IN ${VALUE}", "exclude_resource_codes": "resource_id NOT IN ${VALUE}" } diff --git a/configuration/etl/etl_tables.d/cloud_common/cloud_transient.json b/configuration/etl/etl_tables.d/cloud_common/cloud_transient.json index 1e9991890d..e126eea171 100644 --- a/configuration/etl/etl_tables.d/cloud_common/cloud_transient.json +++ b/configuration/etl/etl_tables.d/cloud_common/cloud_transient.json @@ -103,6 +103,13 @@ "name": "submission_venue_id", "type": "int(5)", "nullable": true + }, + { + "name": "last_modified", + "type": "timestamp", + "nullable": false, + "default": "CURRENT_TIMESTAMP", + "extra": "on update CURRENT_TIMESTAMP" } ], "indexes": [ @@ -111,7 +118,7 @@ "columns": [ "resource_id", "instance", - "start_time" + "start_time_ts" ], "is_unique": true }, @@ -120,9 +127,15 @@ "columns": [ "instance_id", "resource_id", - "start_time" + "start_time_ts" ], "is_unique": true + }, + { + "name": "index_last_modified", + "columns": [ + "last_modified" + ] } ] } diff --git a/configuration/etl/etl_tables.d/cloud_common/event.json b/configuration/etl/etl_tables.d/cloud_common/event.json index 097035bb63..fa469ee42e 100644 --- a/configuration/etl/etl_tables.d/cloud_common/event.json +++ b/configuration/etl/etl_tables.d/cloud_common/event.json @@ -68,6 +68,13 @@ "type": "int(5)", "nullable": false, "default": -1 + }, + { + "name": "last_modified", + "type": "timestamp", + "nullable": false, + "default": "CURRENT_TIMESTAMP", + "extra": "on update CURRENT_TIMESTAMP" } ], "indexes": [ @@ -110,6 +117,12 @@ "resource_id" ], "is_unique": false + }, + { + "name": "index_last_modified", + "columns": [ + "last_modified" + ] } ] } diff --git a/open_xdmod/modules/xdmod/regression_tests/lib/Controllers/UsageExplorerCloudPostIngestTest.php b/open_xdmod/modules/xdmod/regression_tests/lib/Controllers/UsageExplorerCloudPostIngestTest.php new file mode 100644 index 0000000000..32b702adc2 --- /dev/null +++ b/open_xdmod/modules/xdmod/regression_tests/lib/Controllers/UsageExplorerCloudPostIngestTest.php @@ -0,0 +1,32 @@ + array('Cloud'), + 'dataset_type' => array('aggregate', 'timeseries'), + 'statistic' => $statistics, + 'group_by' => $group_bys, + 'aggregation_unit' => array_keys($this->aggregationUnits) + ); + + return parent::generateTests($varSettings, '2018-05-19', '2018-05-19'); + } +} diff --git a/open_xdmod/modules/xdmod/regression_tests/post_ingest_test.sh b/open_xdmod/modules/xdmod/regression_tests/post_ingest_test.sh new file mode 100755 index 0000000000..f4f12310e4 --- /dev/null +++ b/open_xdmod/modules/xdmod/regression_tests/post_ingest_test.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# Tests that cloud ETL works after upgrade + reingestion + +BASEDIR=./open_xdmod/modules/xdmod/regression_tests +REF_DIR=/var/tmp/referencedata +last_modified_start_date=$(date +'%F %T') + +oldCount=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 4;") + +if [ $oldCount -ne 53 ] +then + echo " Count $oldCount did not match expected result of 53" + exit 1 +fi + +sudo -u xdmod xdmod-shredder -r openstack -d $REF_DIR/openstack_upgrade -f openstack +sudo -u xdmod xdmod-ingestor --datatype=openstack --last-modified-start-date "$last_modified_start_date" +sudo -u xdmod xdmod-ingestor --aggregate=cloud --last-modified-start-date "$last_modified_start_date" + +newCount=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 4;") + +if [ $newCount -ne 52 ] +then + echo " Count $newCount did not match expected result of 52" + exit 1 + +fi + +newRows=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 5;") + +if [ $newRows -ne 2 ] +then + echo " Count $newRows did not match expected result of 2" + exit 1 + +fi diff --git a/shippable.yml b/shippable.yml index b7db276b45..2cb6254201 100644 --- a/shippable.yml +++ b/shippable.yml @@ -25,6 +25,7 @@ build: - cp ./configuration/portal_settings.ini ./configuration/portal_settings.ini.old - cp -f /etc/xdmod/portal_settings.ini ./configuration/portal_settings.ini - ./open_xdmod/modules/xdmod/integration_tests/runtests.sh --junit-output-dir `pwd`/shippable/testresults/ + - ./open_xdmod/modules/xdmod/regression_tests/post_ingest_test.sh --junit-output-dir `pwd`/shippable/testresults/ - ./open_xdmod/modules/xdmod/component_tests/runtests.sh --log-junit `pwd`/shippable/testresults/xdmod-component.xml - ./open_xdmod/modules/xdmod/automated_tests/runtests.sh --headless --log-junit `pwd`/shippable/testresults - ./open_xdmod/modules/xdmod/integration_tests/scripts/samlSetup.sh diff --git a/tests/artifacts/xdmod-test-artifacts/xdmod/referencedata/openstack_upgrade/2018-05-19T00:00:00_2018-05-19T23:59:59.json b/tests/artifacts/xdmod-test-artifacts/xdmod/referencedata/openstack_upgrade/2018-05-19T00:00:00_2018-05-19T23:59:59.json new file mode 100644 index 0000000000..5f83c2d99b --- /dev/null +++ b/tests/artifacts/xdmod-test-artifacts/xdmod/referencedata/openstack_upgrade/2018-05-19T00:00:00_2018-05-19T23:59:59.json @@ -0,0 +1,151 @@ +[{ + "disk_gb": "20", + "ephemeral_gb": "0", + "event_type": "compute.instance.power_on.start", + "generated": "2018-05-19T18:09:40.154919", + "host": "srv-p24-35.cbls.ccr.buffalo.edu", + "instance_id": "85c3287c-c9a7-47d6-9019-52bb41235795", + "instance_type": "c1.m4", + "instance_type_id": "9", + "launched_at": "2018-05-19T14:47:59", + "memory_mb": "4096", + "message_id": "b9c11cb3-b9a2-44af-a69d-cba8986ac67e", + "project_id": "4aeb007a4f9020333a1a1be224bef276", + "project_name": "zealous", + "raw": {}, + "request_id": "req-bd0507cf-69db-4edb-be09-5b386f86da92", + "resource_id": "85c3287c-c9a7-47d6-9019-52bb41235795", + "root_gb": "20", + "service": "compute", + "state": "stopped", + "tenant_id": "4aeb007a4f9020333a1a1be224bef276", + "user_id": "0ce3dbe7d215b55feb049c557f52dc84", + "user_name": "setusca", + "vcpus": "1" + }, + { + "disk_gb": "20", + "ephemeral_gb": "0", + "event_type": "compute.instance.power_on.end", + "generated": "2018-05-19T18:09:41.061655", + "host": "srv-p24-35.cbls.ccr.buffalo.edu", + "instance_id": "85c3287c-c9a7-47d6-9019-52bb41235795", + "instance_type": "c1.m4", + "instance_type_id": "9", + "launched_at": "2018-05-19T14:47:59", + "memory_mb": "4096", + "message_id": "aa5b4ed2-b65c-4dfd-b2c2-4c876805e7fa", + "project_id": "4aeb007a4f9020333a1a1be224bef276", + "project_name": "zealous", + "raw": {}, + "request_id": "req-bd0507cf-69db-4edb-be09-5b386f86da92", + "resource_id": "85c3287c-c9a7-47d6-9019-52bb41235795", + "root_gb": "20", + "service": "compute", + "state": "active", + "tenant_id": "4aeb007a4f9020333a1a1be224bef276", + "user_id": "0ce3dbe7d215b55feb049c557f52dc84", + "user_name": "setusca", + "vcpus": "1" + }, + { + "disk_gb": "20", + "ephemeral_gb": "0", + "event_type": "compute.instance.power_off.start", + "generated": "2018-05-19T16:47:16.798308", + "host": "srv-p24-35.cbls.ccr.buffalo.edu", + "instance_id": "85c3287c-c9a7-47d6-9019-52bb41235795", + "instance_type": "c1.m4", + "instance_type_id": "9", + "launched_at": "2018-05-19T14:47:59", + "memory_mb": "4096", + "message_id": "53a271f0-b30b-4f0d-9cbf-b6c9733777e4", + "project_id": "4aeb007a4f9020333a1a1be224bef276", + "project_name": "zealous", + "raw": {}, + "request_id": "req-6fcc1c24-3303-448d-8c72-2789ca1b74f3", + "resource_id": "85c3287c-c9a7-47d6-9019-52bb41235795", + "root_gb": "20", + "service": "compute", + "state": "active", + "tenant_id": "4aeb007a4f9020333a1a1be224bef276", + "user_id": "0ce3dbe7d215b55feb049c557f52dc84", + "user_name": "setusca", + "vcpus": "1" + }, + { + "disk_gb": "20", + "ephemeral_gb": "0", + "event_type": "compute.instance.power_off.end", + "generated": "2018-05-19T16:47:16.905308", + "host": "srv-p24-35.cbls.ccr.buffalo.edu", + "instance_id": "85c3287c-c9a7-47d6-9019-52bb41235795", + "instance_type": "c1.m4", + "instance_type_id": "9", + "launched_at": "2018-05-19T14:47:59", + "memory_mb": "4096", + "message_id": "65f0085b-3781-47e4-98df-fcdc5e8ece44", + "project_id": "4aeb007a4f9020333a1a1be224bef276", + "project_name": "zealous", + "raw": {}, + "request_id": "req-6fcc1c24-3303-448d-8c72-2789ca1b74f3", + "resource_id": "85c3287c-c9a7-47d6-9019-52bb41235795", + "root_gb": "20", + "service": "compute", + "state": "stopped", + "tenant_id": "4aeb007a4f9020333a1a1be224bef276", + "user_id": "0ce3dbe7d215b55feb049c557f52dc84", + "user_name": "setusca", + "vcpus": "1" + }, + { + "disk_gb": "20", + "ephemeral_gb": "0", + "event_type": "compute.instance.power_off.start", + "generated": "2018-05-19T23:00:54.193570", + "host": "srv-p24-34.cbls.ccr.buffalo.edu", + "instance_id": "565e8a1e-a83e-4104-a32c-85c79b7737fd", + "instance_type": "c4.m16", + "instance_type_id": "2", + "launched_at": "2018-05-19T17:38:51", + "memory_mb": "16384", + "message_id": "22249a01-917c-47ed-988a-ccd94fbfb389", + "project_id": "4aeb007a4f9020333a1a1be224bef276", + "project_name": "zealous", + "raw": {}, + "request_id": "req-020331fc-0d2d-461c-ab18-06c635bd4e9b", + "resource_id": "565e8a1e-a83e-4104-a32c-85c79b7737fd", + "root_gb": "20", + "service": "compute", + "state": "active", + "tenant_id": "4aeb007a4f9020333a1a1be224bef276", + "user_id": "3abcb51ff942a45b52ac90915ef4c7fb", + "user_name": "yerwa", + "vcpus": "4" + }, + { + "disk_gb": "20", + "ephemeral_gb": "0", + "event_type": "compute.instance.power_off.end", + "generated": "2018-05-19T23:00:56.194680", + "host": "srv-p24-34.cbls.ccr.buffalo.edu", + "instance_id": "565e8a1e-a83e-4104-a32c-85c79b7737fd", + "instance_type": "c4.m16", + "instance_type_id": "2", + "launched_at": "2018-05-19T17:38:54", + "memory_mb": "16384", + "message_id": "4d0c12a9-9e66-4c08-98dc-1982443bcb20", + "project_id": "4aeb007a4f9020333a1a1be224bef276", + "project_name": "zealous", + "raw": {}, + "request_id": "req-020331fc-0d2d-461c-ab18-06c635bd4e9b", + "resource_id": "565e8a1e-a83e-4104-a32c-85c79b7737fd", + "root_gb": "20", + "service": "compute", + "state": "active", + "tenant_id": "4aeb007a4f9020333a1a1be224bef276", + "user_id": "3abcb51ff942a45b52ac90915ef4c7fb", + "user_name": "yerwa", + "vcpus": "4" + } +]