Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(PLATFORM-10268): optimize migrateLinksTable.php for pagelinks #112

Merged
merged 3 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions maintenance/includes/LoggedUpdateMaintenance.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,18 @@ public function execute() {
return true;
}

// Fandom-start PLATFORM-10268
// log execution time of each migration script, so we could easily find migration bottlenecks
$start = microtime( true );
$this->output( "Running '$key'\n" );
if ( !$this->doDBUpdates() ) {
$elapsed = microtime( true ) - $start;
$this->output( "'$key' failed after {$elapsed}s\n" );
return false;
}
$elapsed = microtime( true ) - $start;
$this->output( "'$key' finished after {$elapsed}s\n" );
// Fandom-end

$db->newInsertQueryBuilder()
->insertInto( 'updatelog' )
Expand Down
53 changes: 53 additions & 0 deletions maintenance/migrateLinksTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ protected function doDBUpdates() {
}
$highestPageId = $highestPageId[0];
$pageId = 0;

if ( $table === 'pagelinks' ) {
$createdLinkTargetRows = $this->fillLinkTargetTable();
$this->output( "In total created $createdLinkTargetRows linktarget rows\n" );

$updatedPageLinksRows = $this->handlePagelinksUpdate();
$this->output( "In total updated $updatedPageLinksRows pagelinks rows\n" );

$updated = $updatedPageLinksRows + $createdLinkTargetRows;
$this->output( "Completed normalization of $table, $updated rows updated.\n" );
return true;
}
while ( $pageId <= $highestPageId ) {
// Given the indexes and the structure of links tables,
// we need to split the update into batches of pages.
Expand All @@ -88,6 +100,47 @@ protected function doDBUpdates() {
return true;
}

private function fillLinkTargetTable(): int {
$batchSize = $this->getBatchSize();
$query = "INSERT INTO linktarget(lt_namespace, lt_title)
SELECT pl_namespace, pl_title FROM pagelinks
WHERE NOT exists (SELECT * FROM linktarget WHERE pl_namespace = lt_namespace AND pl_title = lt_title)
GROUP BY pl_namespace, pl_title
LIMIT $batchSize";
$dbw = $this->getPrimaryDB();
$createdRows = 0;
while ( true ) {
$dbw->query( $query, __METHOD__ );
$affectedRows = $dbw->affectedRows();
$createdRows += $affectedRows;
$this->output( "Created $affectedRows linktarget rows\n" );
$this->waitForReplication();
if ( $batchSize > $affectedRows ) {
return $createdRows;
}
}
}

private function handlePagelinksUpdate(): int {
$batchSize = $this->getBatchSize();
$query = "UPDATE pagelinks
SET pl_target_id = (SELECT lt_id FROM linktarget WHERE pl_namespace = lt_namespace AND pl_title = lt_title)
WHERE pl_target_id IS NULL OR pl_target_id = 0
LIMIT $batchSize";
$dbw = $this->getPrimaryDB();
$updatedRows = 0;
while ( true ) {
$dbw->query( $query, __METHOD__ );
$affectedRows = $dbw->affectedRows();
$updatedRows += $affectedRows;
$this->output( "Updated $affectedRows pagelinks rows\n" );
$this->waitForReplication();
if ( $batchSize > $affectedRows ) {
return $updatedRows;
}
}
}

private function handlePageBatch( $lowPageId, $mapping, $table ) {
$batchSize = $this->getBatchSize();
$targetColumn = $mapping[$table]['target_id'];
Expand Down
64 changes: 25 additions & 39 deletions maintenance/migrateRevisionCommentTemp.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ protected function getUpdateKey() {
}

protected function doDBUpdates() {
$batchSize = $this->getBatchSize();

$dbw = $this->getDB( DB_PRIMARY );
if ( !$dbw->fieldExists( 'revision', 'rev_comment_id', __METHOD__ ) ) {
$this->output( "Run update.php to create rev_comment_id.\n" );
Expand All @@ -61,48 +59,36 @@ protected function doDBUpdates() {
}

$this->output( "Merging the revision_comment_temp table into the revision table...\n" );
$conds = [];
$updated = 0;
$sleep = (int)$this->getOption( 'sleep', 0 );
$highestRevId = (int)$dbw->newSelectQueryBuilder()
->select( 'rev_id' )
->from( 'revision' )
->limit( 1 )
->caller( __METHOD__ )
->orderBy( 'rev_id', 'DESC' )
->fetchField();
$this->output( "Max rev_id $highestRevId.\n" );
// Default batchSize from "$this->getBatchSize()" is 200, use 1000 to speed migration up
// There is "$this->waitForReplication()" after each batch anyway
$batchSize = 1000;
$lowId = -1;
$highId = $batchSize;
while ( true ) {
$res = $dbw->newSelectQueryBuilder()
->select( [ 'rev_id', 'revcomment_comment_id' ] )
->from( 'revision' )
->join( 'revision_comment_temp', null, 'rev_id=revcomment_rev' )
->where( [ 'rev_comment_id' => 0 ] )
->andWhere( $conds )
->limit( $batchSize )
->orderBy( 'rev_id' )
->caller( __METHOD__ )
->fetchResultSet();

$numRows = $res->numRows();

$last = null;
foreach ( $res as $row ) {
$last = $row->rev_id;
$dbw->newUpdateQueryBuilder()
->update( 'revision' )
->set( [ 'rev_comment_id' => $row->revcomment_comment_id ] )
->where( [ 'rev_id' => $row->rev_id ] )
->caller( __METHOD__ )->execute();
$updated += $dbw->affectedRows();
}
$query = "UPDATE revision
SET rev_comment_id = (SELECT revcomment_comment_id FROM revision_comment_temp WHERE rev_id=revcomment_rev)
WHERE rev_id > $lowId AND rev_id <= $highId";
$dbw->query( $query, __METHOD__ );
$affected = $dbw->affectedRows();
$updated += $affected;
$this->output( "Updated $affected revision rows from $lowId to $highId\n" );
$this->waitForReplication();

if ( $numRows < $batchSize ) {
// We must have reached the end
if ( $highId > $highestRevId ) {
// We reached the end
break;
}

// @phan-suppress-next-line PhanTypeSuspiciousStringExpression last is not-null when used
$this->output( "... rev_id=$last, updated $updated\n" );
$conds = [ $dbw->expr( 'rev_id', '>', $last ) ];

// Sleep between batches for replication to catch up
$this->waitForReplication();
if ( $sleep > 0 ) {
sleep( $sleep );
}
$lowId = $highId;
$highId = $lowId + $batchSize;
}
$this->output(
"Completed merge of revision_comment_temp into the revision table, "
Expand Down