Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not truncate aggregate tables on each ingest #841

Merged
merged 8 commits into from
Mar 18, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions bin/xdmod-ingestor
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,11 @@ function main()
}

if($datatypeValue == 'openstack'){
$dwi->ingestCloudDataOpenStack();
$dwi->ingestCloudDataOpenStack($lastModifiedStartDate);
}

if($datatypeValue == 'genericcloud'){
$dwi->ingestCloudDataGeneric();
$dwi->ingestCloudDataGeneric($lastModifiedStartDate);
}

if ($datatypeValue == 'storage') {
Expand All @@ -293,7 +293,7 @@ function main()
}

if($realmToAggregate == 'cloud' || $realmToAggregate === false){
$dwi->aggregateCloudData();
$dwi->aggregateCloudData($lastModifiedStartDate);
}

if ($realmToAggregate == 'storage' || $realmToAggregate === false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public function __construct(aOptions $options, EtlConfiguration $etlConfig, Log
$this->_start_event_ids = array(self::START, self::RESUME, self::STATE_REPORT, self::UNSHELVE, self::UNPAUSE, self::UNSUSPEND, self::POWER_ON);
$this->_all_event_ids = array_merge($this->_start_event_ids, $this->_stop_event_ids);
$this->_end_time = $etlConfig->getVariableStore()->endDate ? date('Y-m-d H:i:s', strtotime($etlConfig->getVariableStore()->endDate)) : null;

Copy link
Member

@jpwhite4 jpwhite4 Mar 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should come up with a code parser rule that flags this class of whitespace change (and marks it as FAIL).

$this->resetInstance();
}

Expand Down Expand Up @@ -132,8 +132,9 @@ protected function getSourceQueryString()
// is lost. To work around this we add a dummy row filled with zeroes.
$colCount = count($this->etlSourceQuery->records);
$unionValues = array_fill(0, $colCount, 0);
$subSelect = "(SELECT DISTINCT instance_id from modw_cloud.event WHERE last_modified > \"" . $this->getEtlOverseerOptions()->getLastModifiedStartDate() . "\")";

$sql = "$sql WHERE event_type_id IN (" . implode(',', $this->_all_event_ids) . ")\nUNION ALL\nSELECT " . implode(',', $unionValues) . "\nORDER BY 1 DESC, 2 DESC, 3 ASC, 4 DESC";
$sql = "$sql WHERE instance_id IN " . $subSelect . " AND event_type_id IN (" . implode(',', $this->_all_event_ids) . ")\nUNION ALL\nSELECT " . implode(',', $unionValues) . "\nORDER BY 1 DESC, 2 DESC, 3 ASC, 4 DESC";

return $sql;
}
Expand Down
28 changes: 20 additions & 8 deletions classes/OpenXdmod/DataWarehouseInitializer.php
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,16 @@ public function ingestAllHpcdb($startDate = null, $endDate = null)
* tables do not exist then catch the resulting exception and display a message
* saying that there is no OpenStack data to ingest.
*/
public function ingestCloudDataOpenStack()
public function ingestCloudDataOpenStack($lastModifiedStartDate)
{
if( $this->isRealmEnabled('Cloud') ){
try{
try {
$this->logger->notice('Ingesting OpenStack event log data');
Utilities::runEtlPipeline(array('jobs-cloud-import-users-openstack', 'jobs-cloud-extract-openstack'), $this->logger);
Utilities::runEtlPipeline(
array('jobs-cloud-import-users-openstack', 'jobs-cloud-extract-openstack'),
$this->logger,
array('last-modified-start-date' => $lastModifiedStartDate)
);
}
catch( Exception $e ){
if( $e->getCode() == 1146 ){
Expand All @@ -225,12 +229,16 @@ public function ingestCloudDataOpenStack()
* tables do not exist then catch the resulting exception and display a message
* saying that there is no generic cloud data to ingest.
*/
public function ingestCloudDataGeneric()
public function ingestCloudDataGeneric($lastModifiedStartDate)
{
if( $this->isRealmEnabled('Cloud') ){
try{
try {
$this->logger->notice('Ingesting generic cloud log files');
Utilities::runEtlPipeline(array('jobs-cloud-import-users-generic', 'jobs-cloud-extract-generic'), $this->logger);
Utilities::runEtlPipeline(
array('jobs-cloud-import-users-generic', 'jobs-cloud-extract-generic'),
$this->logger,
array('last-modified-start-date' => $lastModifiedStartDate)
);
}
catch( Exception $e ){
if( $e->getCode() == 1146 ){
Expand Down Expand Up @@ -273,11 +281,15 @@ public function ingestStorageData()
* catch the resulting exception and display a message saying that there
* is no cloud data to aggregate and cloud aggregation is being skipped.
*/
public function aggregateCloudData()
public function aggregateCloudData($lastModifiedStartDate)
{
if( $this->isRealmEnabled('Cloud') ){
$this->logger->notice('Aggregating Cloud data');
Utilities::runEtlPipeline(array('cloud-state-pipeline'), $this->logger);
Utilities::runEtlPipeline(
array('cloud-state-pipeline'),
$this->logger,
array('last-modified-start-date' => $lastModifiedStartDate)
);

$filterListBuilder = new FilterListBuilder();
$filterListBuilder->setLogger($this->logger);
Expand Down
2 changes: 0 additions & 2 deletions configuration/etl/etl.d/cloud_state_machine.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
"name": "cloud-transient",
"class": "DatabaseIngestor",
"definition_file": "cloud_common/cloud_transient.json",
"truncate_destination": true,
"description": "Builds intermediate cloud event table"
},
{
Expand All @@ -46,7 +45,6 @@
"class": "SimpleAggregator",
"description": "Aggregate cloud records.",
"definition_file": "cloud_common/cloud_metrics_aggregation.json",
"truncate_destination": true,
"table_prefix": "cloudfact_by_",
"aggregation_units": [
"day", "month", "quarter", "year"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
},
"aggregation_period_query": {
"overseer_restrictions": {
"#last_modified_start_date": "last_modified >= ${VALUE}",
"#last_modified_end_date": "last_modified <= ${VALUE}",
"last_modified_start_date": "last_modified >= ${VALUE}",
"last_modified_end_date": "last_modified <= ${VALUE}",
"include_only_resource_codes": "resource_id IN ${VALUE}",
"exclude_resource_codes": "resource_id NOT IN ${VALUE}"
}
Expand Down
17 changes: 15 additions & 2 deletions configuration/etl/etl_tables.d/cloud_common/cloud_transient.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@
"name": "submission_venue_id",
"type": "int(5)",
"nullable": true
},
{
"name": "last_modified",
"type": "timestamp",
"nullable": false,
"default": "CURRENT_TIMESTAMP",
"extra": "on update CURRENT_TIMESTAMP"
}
],
"indexes": [
Expand All @@ -111,7 +118,7 @@
"columns": [
"resource_id",
"instance",
"start_time"
"start_time_ts"
chakrabortyr marked this conversation as resolved.
Show resolved Hide resolved
],
"is_unique": true
},
Expand All @@ -120,9 +127,15 @@
"columns": [
"instance_id",
"resource_id",
"start_time"
jpwhite4 marked this conversation as resolved.
Show resolved Hide resolved
"start_time_ts"
],
"is_unique": true
},
{
"name": "index_last_modified",
"columns": [
"last_modified"
chakrabortyr marked this conversation as resolved.
Show resolved Hide resolved
]
}
]
}
Expand Down
13 changes: 13 additions & 0 deletions configuration/etl/etl_tables.d/cloud_common/event.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@
"type": "int(5)",
"nullable": false,
"default": -1
},
{
"name": "last_modified",
jpwhite4 marked this conversation as resolved.
Show resolved Hide resolved
"type": "timestamp",
"nullable": false,
"default": "CURRENT_TIMESTAMP",
"extra": "on update CURRENT_TIMESTAMP"
}
],
"indexes": [
Expand Down Expand Up @@ -110,6 +117,12 @@
"resource_id"
],
"is_unique": false
},
{
"name": "index_last_modified",
"columns": [
"last_modified"
]
}
]
}
Expand Down
36 changes: 36 additions & 0 deletions open_xdmod/modules/xdmod/regression_tests/post_ingest_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/sh
# Tests that cloud ETL works after upgrade + reingestion

BASEDIR=./open_xdmod/modules/xdmod/regression_tests
REF_DIR=/var/tmp/referencedata
last_modified_start_date=$(date +'%F %T')

oldCount=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 4;")

if [ $oldCount -ne 53 ]
then
echo " Count $oldCount did not match expected result of 53"
exit 1
fi

sudo -u xdmod xdmod-shredder -r openstack -d $REF_DIR/openstack_upgrade -f openstack
sudo -u xdmod xdmod-ingestor --datatype=openstack --last-modified-start-date "$last_modified_start_date"
sudo -u xdmod xdmod-ingestor --aggregate=cloud --last-modified-start-date "$last_modified_start_date"

newCount=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 4;")

if [ $newCount -ne 52 ]
then
echo " Count $newCount did not match expected result of 52"
exit 1

fi

newRows=$(mysql -N -B -e "SELECT SUM(num_sessions_ended) FROM modw_cloud.cloudfact_by_month WHERE month = 5;")

if [ $newRows -ne 2 ]
then
echo " Count $newRows did not match expected result of 2"
exit 1

fi
1 change: 1 addition & 0 deletions shippable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ build:
- cp ./configuration/portal_settings.ini ./configuration/portal_settings.ini.old
- cp -f /etc/xdmod/portal_settings.ini ./configuration/portal_settings.ini
- ./open_xdmod/modules/xdmod/integration_tests/runtests.sh --junit-output-dir `pwd`/shippable/testresults/
- ./open_xdmod/modules/xdmod/regression_tests/post_ingest_test.sh --junit-output-dir `pwd`/shippable/testresults/
- ./open_xdmod/modules/xdmod/component_tests/runtests.sh --log-junit `pwd`/shippable/testresults/xdmod-component.xml
- ./open_xdmod/modules/xdmod/automated_tests/runtests.sh --headless --log-junit `pwd`/shippable/testresults
- ./open_xdmod/modules/xdmod/integration_tests/scripts/samlSetup.sh
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
[{
"disk_gb": "20",
"ephemeral_gb": "0",
"event_type": "compute.instance.power_on.start",
"generated": "2018-05-19T18:09:40.154919",
"host": "srv-p24-35.cbls.ccr.buffalo.edu",
"instance_id": "85c3287c-c9a7-47d6-9019-52bb41235795",
"instance_type": "c1.m4",
"instance_type_id": "9",
"launched_at": "2018-05-19T14:47:59",
"memory_mb": "4096",
"message_id": "b9c11cb3-b9a2-44af-a69d-cba8986ac67e",
"project_id": "4aeb007a4f9020333a1a1be224bef276",
"project_name": "zealous",
"raw": {},
"request_id": "req-bd0507cf-69db-4edb-be09-5b386f86da92",
"resource_id": "85c3287c-c9a7-47d6-9019-52bb41235795",
"root_gb": "20",
"service": "compute",
"state": "stopped",
"tenant_id": "4aeb007a4f9020333a1a1be224bef276",
"user_id": "0ce3dbe7d215b55feb049c557f52dc84",
"user_name": "setusca",
"vcpus": "1"
},
{
"disk_gb": "20",
"ephemeral_gb": "0",
"event_type": "compute.instance.power_on.end",
"generated": "2018-05-19T18:09:41.061655",
"host": "srv-p24-35.cbls.ccr.buffalo.edu",
"instance_id": "85c3287c-c9a7-47d6-9019-52bb41235795",
"instance_type": "c1.m4",
"instance_type_id": "9",
"launched_at": "2018-05-19T14:47:59",
"memory_mb": "4096",
"message_id": "aa5b4ed2-b65c-4dfd-b2c2-4c876805e7fa",
"project_id": "4aeb007a4f9020333a1a1be224bef276",
"project_name": "zealous",
"raw": {},
"request_id": "req-bd0507cf-69db-4edb-be09-5b386f86da92",
"resource_id": "85c3287c-c9a7-47d6-9019-52bb41235795",
"root_gb": "20",
"service": "compute",
"state": "active",
"tenant_id": "4aeb007a4f9020333a1a1be224bef276",
"user_id": "0ce3dbe7d215b55feb049c557f52dc84",
"user_name": "setusca",
"vcpus": "1"
},
{
"disk_gb": "20",
"ephemeral_gb": "0",
"event_type": "compute.instance.power_off.start",
"generated": "2018-05-19T16:47:16.798308",
"host": "srv-p24-35.cbls.ccr.buffalo.edu",
"instance_id": "85c3287c-c9a7-47d6-9019-52bb41235795",
"instance_type": "c1.m4",
"instance_type_id": "9",
"launched_at": "2018-05-19T14:47:59",
"memory_mb": "4096",
"message_id": "53a271f0-b30b-4f0d-9cbf-b6c9733777e4",
"project_id": "4aeb007a4f9020333a1a1be224bef276",
"project_name": "zealous",
"raw": {},
"request_id": "req-6fcc1c24-3303-448d-8c72-2789ca1b74f3",
"resource_id": "85c3287c-c9a7-47d6-9019-52bb41235795",
"root_gb": "20",
"service": "compute",
"state": "active",
"tenant_id": "4aeb007a4f9020333a1a1be224bef276",
"user_id": "0ce3dbe7d215b55feb049c557f52dc84",
"user_name": "setusca",
"vcpus": "1"
},
{
"disk_gb": "20",
"ephemeral_gb": "0",
"event_type": "compute.instance.power_off.end",
"generated": "2018-05-19T16:47:16.905308",
"host": "srv-p24-35.cbls.ccr.buffalo.edu",
"instance_id": "85c3287c-c9a7-47d6-9019-52bb41235795",
"instance_type": "c1.m4",
"instance_type_id": "9",
"launched_at": "2018-05-19T14:47:59",
"memory_mb": "4096",
"message_id": "65f0085b-3781-47e4-98df-fcdc5e8ece44",
"project_id": "4aeb007a4f9020333a1a1be224bef276",
"project_name": "zealous",
"raw": {},
"request_id": "req-6fcc1c24-3303-448d-8c72-2789ca1b74f3",
"resource_id": "85c3287c-c9a7-47d6-9019-52bb41235795",
"root_gb": "20",
"service": "compute",
"state": "stopped",
"tenant_id": "4aeb007a4f9020333a1a1be224bef276",
"user_id": "0ce3dbe7d215b55feb049c557f52dc84",
"user_name": "setusca",
"vcpus": "1"
},
{
"disk_gb": "20",
"ephemeral_gb": "0",
"event_type": "compute.instance.power_off.start",
"generated": "2018-05-19T23:00:54.193570",
"host": "srv-p24-34.cbls.ccr.buffalo.edu",
"instance_id": "565e8a1e-a83e-4104-a32c-85c79b7737fd",
"instance_type": "c4.m16",
"instance_type_id": "2",
"launched_at": "2018-05-19T17:38:51",
"memory_mb": "16384",
"message_id": "22249a01-917c-47ed-988a-ccd94fbfb389",
"project_id": "4aeb007a4f9020333a1a1be224bef276",
"project_name": "zealous",
"raw": {},
"request_id": "req-020331fc-0d2d-461c-ab18-06c635bd4e9b",
"resource_id": "565e8a1e-a83e-4104-a32c-85c79b7737fd",
"root_gb": "20",
"service": "compute",
"state": "active",
"tenant_id": "4aeb007a4f9020333a1a1be224bef276",
"user_id": "3abcb51ff942a45b52ac90915ef4c7fb",
"user_name": "yerwa",
"vcpus": "4"
},
{
"disk_gb": "20",
"ephemeral_gb": "0",
"event_type": "compute.instance.power_off.end",
"generated": "2018-05-19T23:00:56.194680",
"host": "srv-p24-34.cbls.ccr.buffalo.edu",
"instance_id": "565e8a1e-a83e-4104-a32c-85c79b7737fd",
"instance_type": "c4.m16",
"instance_type_id": "2",
"launched_at": "2018-05-19T17:38:54",
"memory_mb": "16384",
"message_id": "4d0c12a9-9e66-4c08-98dc-1982443bcb20",
"project_id": "4aeb007a4f9020333a1a1be224bef276",
"project_name": "zealous",
"raw": {},
"request_id": "req-020331fc-0d2d-461c-ab18-06c635bd4e9b",
"resource_id": "565e8a1e-a83e-4104-a32c-85c79b7737fd",
"root_gb": "20",
"service": "compute",
"state": "active",
"tenant_id": "4aeb007a4f9020333a1a1be224bef276",
"user_id": "3abcb51ff942a45b52ac90915ef4c7fb",
"user_name": "yerwa",
"vcpus": "4"
}
]