Skip to content

Commit

Permalink
fix: fixes the array for the parents
Browse files Browse the repository at this point in the history
  • Loading branch information
sriramkanakam87 committed Nov 21, 2024
1 parent 885c2db commit 1ba14ec
Showing 1 changed file with 115 additions and 99 deletions.
214 changes: 115 additions & 99 deletions app/Console/Commands/DedupeFixCollectionSourceLinks.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,123 +35,139 @@ public function handle()
54 => 'https://go.drugbank.com/drugs/',
58 => 'https://www.way2drug.com/phyto4health/compound_card.php?compound_id=',
];
$batchCount = 1;

// Process all molecules in chunks to avoid memory exhaustion
DB::table('collection_molecule')
->select('id', 'collection_id', 'molecule_id', 'url', 'reference')
->orderBy('id')
->chunk($batchSize, function ($collection_molecules) use (&$data, $db_links, &$batchCount) {
$this->info('started batch ');
if ($batchCount >= 29) {
foreach ($collection_molecules as $collection_molecule) {
$url = null;
$reference = null;

// Get the source URL and reference for the molecule
$entries = DB::select("SELECT link, reference_id FROM entries WHERE collection_id = {$collection_molecule->collection_id} and molecule_id = {$collection_molecule->molecule_id};");

foreach ($entries as $index => $entry) {

if ($index == 0) {
switch ($collection_molecule->collection_id) {
case 30:
case 42:
case 43:
case 54:
case 58:
$url = $db_links[$collection_molecule->collection_id].$entry->reference_id;
break;
default:
$url = $entry->link;
break;
}
$reference = $entry->reference_id;
} else {
switch ($collection_molecule->collection_id) {
case 30:
case 42:
case 43:
case 54:
case 58:
$url .= '|'.$db_links[$collection_molecule->collection_id].$entry->reference_id;
break;
default:
$url .= '|'.$entry->link;
break;
}
$reference .= '|'.$entry->reference_id;
}
}

// Prepare data for batch update
array_push($data, [
'collection_id' => $collection_molecule->collection_id,
'molecule_id' => $collection_molecule->molecule_id,
'url' => $url,
'reference' => $reference,
]);
}

// Update the database with the calculated scores in batch
if (! empty($data)) {
$this->info('Updating batch '.$batchCount);
$this->updateBatch($data);
$data = []; // Reset the data array for the next batch
}
}
$batchCount = $batchCount + 1;
});

// Ensure any remaining data is updated after the last chunk
if (! empty($data)) {
$this->updateBatch($data);
}
// $batchCount = 1;

// // Process all molecules in chunks to avoid memory exhaustion
// DB::table('collection_molecule')
// ->select('id', 'collection_id', 'molecule_id', 'url', 'reference')
// ->orderBy('id')
// ->chunk($batchSize, function ($collection_molecules) use (&$data, $db_links, &$batchCount) {
// $this->info('started batch ');
// if ($batchCount >= 29) {
// foreach ($collection_molecules as $collection_molecule) {
// $url = null;
// $reference = null;

// // Get the source URL and reference for the molecule
// $entries = DB::select("SELECT link, reference_id FROM entries WHERE collection_id = {$collection_molecule->collection_id} and molecule_id = {$collection_molecule->molecule_id};");

// foreach ($entries as $index => $entry) {

// if ($index == 0) {
// switch ($collection_molecule->collection_id) {
// case 30:
// case 42:
// case 43:
// case 54:
// case 58:
// $url = $db_links[$collection_molecule->collection_id].$entry->reference_id;
// break;
// default:
// $url = $entry->link;
// break;
// }
// $reference = $entry->reference_id;
// } else {
// switch ($collection_molecule->collection_id) {
// case 30:
// case 42:
// case 43:
// case 54:
// case 58:
// $url .= '|'.$db_links[$collection_molecule->collection_id].$entry->reference_id;
// break;
// default:
// $url .= '|'.$entry->link;
// break;
// }
// $reference .= '|'.$entry->reference_id;
// }
// }

// // Prepare data for batch update
// array_push($data, [
// 'collection_id' => $collection_molecule->collection_id,
// 'molecule_id' => $collection_molecule->molecule_id,
// 'url' => $url,
// 'reference' => $reference,
// ]);
// }

// // Update the database with the calculated scores in batch
// if (! empty($data)) {
// $this->info('Updating batch '.$batchCount);
// $this->updateBatch($data);
// $data = []; // Reset the data array for the next batch
// }
// }
// $batchCount = $batchCount + 1;
// });

// // Ensure any remaining data is updated after the last chunk
// if (! empty($data)) {
// $this->updateBatch($data);
// }

// Process parent molecules
$data = [];
$batchCount = 1;
$total_parent_molecules = DB::select('SELECT count(*) FROM molecules WHERE is_parent = true;')[0]->count;
DB::table('molecules')
->select('id')
->where('is_parent', true)
->orderBy('id')
->chunk($batchSize, function ($parent_molecules) use (&$data, &$batchCount) {
$this->info('started parent batch ');
$ids_string = implode(',', $parent_molecules->pluck('id')->toArray());

$patents_pivot_rows = DB::select("SELECT collection_id, molecule_id, url, reference FROM collection_molecule WHERE molecule_id in ({$ids_string});");

foreach ($patents_pivot_rows as $parent_pivot_row) {
$url = null;
$reference = null;
$children_ids = collect(DB::select("SELECT id FROM molecules WHERE parent_id = {$parent_pivot_row->molecule_id};"));
$children_ids_string = implode(',', $children_ids->pluck('id')->toArray());
$children_pivot_rows = DB::select("SELECT collection_id, molecule_id, url, reference FROM collection_molecule WHERE collection_id = {$parent_pivot_row->collection_id} and molecule_id in ({$children_ids_string});");

foreach ($children_pivot_rows as $children_pivot_row) {
if (! $parent_pivot_row->url) {
$url = $children_pivot_row->url;
$reference = $children_pivot_row->reference;
} else {
$url .= '|'.$children_pivot_row->url;
$reference .= '|'.$children_pivot_row->reference;
->chunk($batchSize, function ($parent_molecules) use (&$data, &$batchCount, $total_parent_molecules, $batchSize) {
$this->info('started parent batch '.$batchCount.' of '.ceil($total_parent_molecules / $batchSize));
// $ids_string = implode(',', $parent_molecules->pluck('id')->toArray());
if ($batchCount >= 2) {
$patents_pivot_rows = DB::table('collection_molecule')
->selectRaw('collection_id, molecule_id, url, reference')
->whereIntegerInRaw('molecule_id', $parent_molecules->pluck('id')->toArray())
->get();
$total_parent_molecules = count($patents_pivot_rows);

// $molecule_number = 1;
$progressBar = $this->output->createProgressBar($total_parent_molecules);
foreach ($patents_pivot_rows as $parent_pivot_row) {
$url = null;
$reference = null;
$children_ids = collect(DB::select("SELECT id FROM molecules WHERE parent_id = {$parent_pivot_row->molecule_id};"));
// $children_ids_string = implode(',', $children_ids->pluck('id')->toArray());

$children_pivot_rows = DB::table('collection_molecule')
->selectRaw('collection_id, molecule_id, url, reference')
->whereRaw('collection_id=?', [$parent_pivot_row->collection_id])
->whereIntegerInRaw('molecule_id', $children_ids->pluck('id')->toArray())
->get();

foreach ($children_pivot_rows as $children_pivot_row) {
if (! $parent_pivot_row->url) {
$url = $children_pivot_row->url;
$reference = $children_pivot_row->reference;
} else {
$url .= '|'.$children_pivot_row->url;
$reference .= '|'.$children_pivot_row->reference;
}
}

// push each parent data for update
array_push($data, [
'collection_id' => $parent_pivot_row->collection_id,
'molecule_id' => $parent_pivot_row->molecule_id,
'url' => $url,
'reference' => $reference,
]);
$progressBar->advance();
// $this->info($molecule_number .' of '.$total_parent_molecules);
// $molecule_number++;
}
}

// Update the database with the calculated scores in batch
if (! empty($data)) {
$this->info('Updating parent batch '.$batchCount);
$this->updateBatch($data);
$data = []; // Reset the data array for the next batch
// Update the database with the calculated scores in batch
if (! empty($data)) {
$this->info('Updating parent batch '.$batchCount);
$this->updateBatch($data);
$progressBar->finish();
$data = []; // Reset the data array for the next batch
}
}

$batchCount = $batchCount + 1;
Expand Down

0 comments on commit 1ba14ec

Please sign in to comment.