diff --git a/maintenance/includes/LoggedUpdateMaintenance.php b/maintenance/includes/LoggedUpdateMaintenance.php index 27bc114ddc0c0..d84c82ff7cea0 100644 --- a/maintenance/includes/LoggedUpdateMaintenance.php +++ b/maintenance/includes/LoggedUpdateMaintenance.php @@ -48,9 +48,18 @@ public function execute() { return true; } + // Fandom-start PLATFORM-10268 + // log execution time of each migration script, so we could easily find migration bottlenecks + $start = microtime( true ); + $this->output( "Running '$key'\n" ); if ( !$this->doDBUpdates() ) { + $elapsed = microtime( true ) - $start; + $this->output( "'$key' failed after {$elapsed}s\n" ); return false; } + $elapsed = microtime( true ) - $start; + $this->output( "'$key' finished after {$elapsed}s\n" ); + // Fandom-end $db->newInsertQueryBuilder() ->insertInto( 'updatelog' ) diff --git a/maintenance/migrateLinksTable.php b/maintenance/migrateLinksTable.php index 73dbf1825a59e..20a66b97371fb 100644 --- a/maintenance/migrateLinksTable.php +++ b/maintenance/migrateLinksTable.php @@ -75,6 +75,18 @@ protected function doDBUpdates() { } $highestPageId = $highestPageId[0]; $pageId = 0; + + if ( $table === 'pagelinks' ) { + $createdLinkTargetRows = $this->fillLinkTargetTable(); + $this->output( "In total created $createdLinkTargetRows linktarget rows\n" ); + + $updatedPageLinksRows = $this->handlePagelinksUpdate(); + $this->output( "In total updated $updatedPageLinksRows pagelinks rows\n" ); + + $updated = $updatedPageLinksRows + $createdLinkTargetRows; + $this->output( "Completed normalization of $table, $updated rows updated.\n" ); + return true; + } while ( $pageId <= $highestPageId ) { // Given the indexes and the structure of links tables, // we need to split the update into batches of pages. @@ -88,6 +100,47 @@ protected function doDBUpdates() { return true; } + private function fillLinkTargetTable(): int { + $batchSize = $this->getBatchSize(); + $query = "INSERT INTO linktarget(lt_namespace, lt_title) + SELECT pl_namespace, pl_title FROM pagelinks + WHERE NOT exists (SELECT * FROM linktarget WHERE pl_namespace = lt_namespace AND pl_title = lt_title) + GROUP BY pl_namespace, pl_title + LIMIT $batchSize"; + $dbw = $this->getPrimaryDB(); + $createdRows = 0; + while ( true ) { + $dbw->query( $query, __METHOD__ ); + $affectedRows = $dbw->affectedRows(); + $createdRows += $affectedRows; + $this->output( "Created $affectedRows linktarget rows\n" ); + $this->waitForReplication(); + if ( $batchSize > $affectedRows ) { + return $createdRows; + } + } + } + + private function handlePagelinksUpdate(): int { + $batchSize = $this->getBatchSize(); + $query = "UPDATE pagelinks + SET pl_target_id = (SELECT lt_id FROM linktarget WHERE pl_namespace = lt_namespace AND pl_title = lt_title) + WHERE pl_target_id IS NULL OR pl_target_id = 0 + LIMIT $batchSize"; + $dbw = $this->getPrimaryDB(); + $updatedRows = 0; + while ( true ) { + $dbw->query( $query, __METHOD__ ); + $affectedRows = $dbw->affectedRows(); + $updatedRows += $affectedRows; + $this->output( "Updated $affectedRows pagelinks rows\n" ); + $this->waitForReplication(); + if ( $batchSize > $affectedRows ) { + return $updatedRows; + } + } + } + private function handlePageBatch( $lowPageId, $mapping, $table ) { $batchSize = $this->getBatchSize(); $targetColumn = $mapping[$table]['target_id']; diff --git a/maintenance/migrateRevisionCommentTemp.php b/maintenance/migrateRevisionCommentTemp.php index 25e004de41196..6c09b1f8ef20a 100644 --- a/maintenance/migrateRevisionCommentTemp.php +++ b/maintenance/migrateRevisionCommentTemp.php @@ -48,8 +48,6 @@ protected function getUpdateKey() { } protected function doDBUpdates() { - $batchSize = $this->getBatchSize(); - $dbw = $this->getDB( DB_PRIMARY ); if ( !$dbw->fieldExists( 'revision', 'rev_comment_id', __METHOD__ ) ) { $this->output( "Run update.php to create rev_comment_id.\n" ); @@ -61,48 +59,41 @@ protected function doDBUpdates() { } $this->output( "Merging the revision_comment_temp table into the revision table...\n" ); - $conds = []; $updated = 0; - $sleep = (int)$this->getOption( 'sleep', 0 ); + $highestRevId = (int)$dbw->newSelectQueryBuilder() + ->select( 'rev_id' ) + ->from( 'revision' ) + ->limit( 1 ) + ->caller( __METHOD__ ) + ->orderBy( 'rev_id', 'DESC' ) + ->fetchField(); + $this->output( "Max rev_id $highestRevId.\n" ); + // Default batchSize from "$this->getBatchSize()" is 200, use 1000 to speed migration up + // There is "$this->waitForReplication()" after each batch anyway + $batchSize = 1000; + $lowId = -1; + $highId = $batchSize; while ( true ) { - $res = $dbw->newSelectQueryBuilder() - ->select( [ 'rev_id', 'revcomment_comment_id' ] ) - ->from( 'revision' ) - ->join( 'revision_comment_temp', null, 'rev_id=revcomment_rev' ) - ->where( [ 'rev_comment_id' => 0 ] ) - ->andWhere( $conds ) - ->limit( $batchSize ) - ->orderBy( 'rev_id' ) - ->caller( __METHOD__ ) - ->fetchResultSet(); - - $numRows = $res->numRows(); - - $last = null; - foreach ( $res as $row ) { - $last = $row->rev_id; - $dbw->newUpdateQueryBuilder() - ->update( 'revision' ) - ->set( [ 'rev_comment_id' => $row->revcomment_comment_id ] ) - ->where( [ 'rev_id' => $row->rev_id ] ) - ->caller( __METHOD__ )->execute(); - $updated += $dbw->affectedRows(); - } + // `coalesce` covers case when some row is missing in revision_comment_temp. + // Original script used `join` which skipped revision row when `revision_comment_temp` was null. + // + // Not sure whether we should try to fix the data first + // RevisionSelectQueryBuilder::joinComment suggest that all revisions should have rev_comment_id set + $query = "UPDATE revision + SET rev_comment_id = COALESCE((SELECT revcomment_comment_id FROM revision_comment_temp WHERE rev_id=revcomment_rev), rev_comment_id) + WHERE rev_id > $lowId AND rev_id <= $highId"; + $dbw->query( $query, __METHOD__ ); + $affected = $dbw->affectedRows(); + $updated += $affected; + $this->output( "Updated $affected revision rows from $lowId to $highId\n" ); + $this->waitForReplication(); - if ( $numRows < $batchSize ) { - // We must have reached the end + if ( $highId > $highestRevId ) { + // We reached the end break; } - - // @phan-suppress-next-line PhanTypeSuspiciousStringExpression last is not-null when used - $this->output( "... rev_id=$last, updated $updated\n" ); - $conds = [ $dbw->expr( 'rev_id', '>', $last ) ]; - - // Sleep between batches for replication to catch up - $this->waitForReplication(); - if ( $sleep > 0 ) { - sleep( $sleep ); - } + $lowId = $highId; + $highId = $lowId + $batchSize; } $this->output( "Completed merge of revision_comment_temp into the revision table, "