Skip to content

Commit

Permalink
fix: corrected the queries, handling datatypes and skipped collection…
Browse files Browse the repository at this point in the history
… wise stats for unnecessary columns
  • Loading branch information
sriramkanakam87 committed Dec 9, 2024
1 parent e7e2624 commit 00b2d9a
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 53 deletions.
124 changes: 87 additions & 37 deletions app/Console/Commands/GenerateDensityCharts.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class GenerateDensityCharts extends Command

protected $description = 'Generate density charts data for molecular tables and columns';

private $density_bins = 50;
private $density_bins = 30;

private $columnsToSkip = [
'properties' => [
Expand All @@ -25,10 +25,25 @@ class GenerateDensityCharts extends Command
'created_at',
'updated_at',
],
'molecules' => [
'id',
'name_trust_level',
'parent_id',
'variants_count',
'ticker',
'organism_count',
'geo_count',
'citation_count',
'collection_count',
'synonym_count',
'created_at',
'updated_at',
],
];

private $defaultTables = [
'properties',
'molecules',
];

public function handle()
Expand All @@ -40,7 +55,6 @@ public function handle()
if (! Schema::hasIndex('collection_molecule', 'fk_collection_molecule_collection_id')) {
$table->index('collection_id', 'fk_collection_molecule_collection_id');
}

});

$tableColumnMap = $this->parseArguments();
Expand All @@ -63,8 +77,8 @@ public function handle()

private function determineType($dbType)
{
$numericTypes = ['integer', 'float', 'decimal', 'double precision', 'numeric'];
$stringTypes = ['text', 'jsonb', 'character varying', 'varchar'];
$numericTypes = ['int2', 'int4', 'int8', 'float4', 'float8', 'numeric', 'decimal'];
$stringTypes = ['text', 'jsonb', 'character varying', 'varchar', 'char', 'bpchar', 'uuid'];

if (in_array($dbType, $numericTypes)) {
return 'range';
Expand Down Expand Up @@ -194,15 +208,30 @@ private function calculateDensity(string $table, string $column, string $dataTyp
});

// For overall stats
$stats = DB::table($table)
->selectRaw("
MIN($column) as min_val,
MAX($column) as max_val,
AVG($column) as mean,
COUNT($column) as count,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY $column) as median
")
->first();
if ($table != 'molecules') {
$stats = DB::select("
SELECT MIN($column) as min_val,
MAX($column) as max_val,
AVG($column) as mean,
COUNT($column) as count,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY $column) as median
FROM $table p
LEFT JOIN molecules m ON p.molecule_id = m.id
WHERE m.active = TRUE
AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE);
")[0];
} else {
$stats = DB::select("
SELECT MIN($column) as min_val,
MAX($column) as max_val,
AVG($column) as mean,
COUNT($column) as count,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY $column) as median
FROM $table m
WHERE m.active = TRUE
AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE);
")[0];
}

$statsForColumn = [
'type' => $dataType,
Expand All @@ -211,13 +240,16 @@ private function calculateDensity(string $table, string $column, string $dataTyp
];

// For collection-wise stats
if ($table !== 'molecules') {
$collectionsStats = DB::table('collections as c')
->join('collection_molecule as cm', 'c.id', '=', 'cm.collection_id')
->join($table.' as p', 'cm.molecule_id', '=', 'p.molecule_id')
->whereNotNull("p.$column")
->groupBy('c.id', 'c.title')
->selectRaw("
if (in_array($column, ['annotation_level', 'np_likeness'])) {
if ($table !== 'molecules') {
$collectionsStats = DB::table('collections as c')
->join('collection_molecule as cm', 'c.id', '=', 'cm.collection_id')
->join($table.' as p', 'cm.molecule_id', '=', 'p.molecule_id')
->join('molecules as m', 'p.molecule_id', '=', 'm.id')
->whereRaw('m.active = TRUE AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE)')
->whereNotNull("p.$column")
->groupBy('c.id', 'c.title')
->selectRaw("
c.id,
c.title,
MIN(p.$column) as min_val,
Expand All @@ -226,14 +258,15 @@ private function calculateDensity(string $table, string $column, string $dataTyp
COUNT(p.$column) as count,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY p.$column) as median
")
->get();
} else {
$collectionsStats = DB::table('collections as c')
->join('collection_molecule as cm', 'c.id', '=', 'cm.collection_id')
->join($table.' as m', 'cm.molecule_id', '=', 'm.id')
->whereNotNull("m.$column")
->groupBy('c.id', 'c.title')
->selectRaw("
->get();
} else {
$collectionsStats = DB::table('collections as c')
->join('collection_molecule as cm', 'c.id', '=', 'cm.collection_id')
->join($table.' as m', 'cm.molecule_id', '=', 'm.id')
->whereRaw('m.active = TRUE AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE)')
->whereNotNull("m.$column")
->groupBy('c.id', 'c.title')
->selectRaw("
c.id as id,
c.title as title,
MIN(m.$column) as min_val,
Expand All @@ -242,12 +275,13 @@ private function calculateDensity(string $table, string $column, string $dataTyp
COUNT(m.$column) as count,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY m.$column) as median
")
->get();
}
->get();
}

foreach ($collectionsStats as $collectionStats) {
$statsForColumn['collections'][$collectionStats->title] = $this->getDensityStats($collectionStats, $table, $column);
// $this->info("For column: $column Calculated density for collection: {$collectionStats->title}");
foreach ($collectionsStats as $collectionStats) {
$statsForColumn['collections'][$collectionStats->title] = $this->getDensityStats($collectionStats, $table, $column);
// $this->info("For column: $column Calculated density for collection: {$collectionStats->title}");
}
}

$this->info("Calculated density for column: $column");
Expand Down Expand Up @@ -292,16 +326,32 @@ private function getDensityStats($stats, $table, $column)
$binStart = $min + ($i * $binWidth);
$binEnd = $binStart + $binWidth;

$count = DB::table($table)
->selectRaw('count(*) as count')
->whereBetween($column, [$binStart, $binEnd])
->pluck('count')->first();
if ($table != 'molecules') {
$count = DB::select("
SELECT count(*) as count
FROM $table p
JOIN molecules m ON p.molecule_id = m.id
WHERE (CAST(p.$column AS DECIMAL(10,2)) BETWEEN $binStart AND $binEnd)
AND m.active = TRUE
AND NOT (m.is_parent = TRUE AND m.has_variants = TRUE);
")[0]->count;
} else {
$count = DB::select("
SELECT count(*) as count
FROM $table m
WHERE (CAST($column AS DECIMAL(10,2)) BETWEEN $binStart AND $binEnd)
AND active = TRUE
AND NOT (is_parent = TRUE AND has_variants = TRUE);
")[0]->count;
}

// if($count > 0){
$bins[] = [
'x' => ($binStart + $binEnd) / 2,
'y' => $count,
'range' => [$binStart, $binEnd],
];
// }
}

// Normalize
Expand Down
13 changes: 13 additions & 0 deletions app/Livewire/Stats.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php

namespace App\Livewire;

use Livewire\Component;

class Stats extends Component
{
public function render()
{
return view('livewire.stats');
}
}
13 changes: 0 additions & 13 deletions app/Rest/Resources/MoleculeResource.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
namespace App\Rest\Resources;

use App\Rest\Resource as RestResource;
use Lomkit\Rest\Http\Requests\RestRequest;
use Lomkit\Rest\Relations\HasOne;

class MoleculeResource extends RestResource
Expand All @@ -17,8 +16,6 @@ class MoleculeResource extends RestResource

/**
* The exposed fields that could be provided
*
* @param RestRequest $request
*/
public function fields(\Lomkit\Rest\Http\Requests\RestRequest $request): array
{
Expand Down Expand Up @@ -50,8 +47,6 @@ public function fields(\Lomkit\Rest\Http\Requests\RestRequest $request): array

/**
* The exposed relations that could be provided
*
* @param RestRequest $request
*/
public function relations(\Lomkit\Rest\Http\Requests\RestRequest $request): array
{
Expand All @@ -62,8 +57,6 @@ public function relations(\Lomkit\Rest\Http\Requests\RestRequest $request): arra

/**
* The exposed scopes that could be provided
*
* @param RestRequest $request
*/
public function scopes(\Lomkit\Rest\Http\Requests\RestRequest $request): array
{
Expand All @@ -72,8 +65,6 @@ public function scopes(\Lomkit\Rest\Http\Requests\RestRequest $request): array

/**
* The exposed limits that could be provided
*
* @param RestRequest $request
*/
public function limits(\Lomkit\Rest\Http\Requests\RestRequest $request): array
{
Expand All @@ -86,8 +77,6 @@ public function limits(\Lomkit\Rest\Http\Requests\RestRequest $request): array

/**
* The actions that should be linked
*
* @param RestRequest $request
*/
public function actions(\Lomkit\Rest\Http\Requests\RestRequest $request): array
{
Expand All @@ -96,8 +85,6 @@ public function actions(\Lomkit\Rest\Http\Requests\RestRequest $request): array

/**
* The instructions that should be linked
*
* @param RestRequest $request
*/
public function instructions(\Lomkit\Rest\Http\Requests\RestRequest $request): array
{
Expand Down
2 changes: 0 additions & 2 deletions resources/js/app.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import "./bootstrap";
import * as d3 from 'd3';

window.d3 = d3;
import OCL from "openchemlib/full";
window.OCL = OCL;
2 changes: 1 addition & 1 deletion resources/views/livewire/density-plot.blade.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
const data = @js($data['properties']['np_likeness']);
const datasets = [{
label: 'Overall',
label: 'COCONUT',
data: data.overall.density_data.map(point => ({
x: point.x,
y: point.y
Expand Down

0 comments on commit 00b2d9a

Please sign in to comment.