Skip to content

Commit

Permalink
More tests and some trimming of the hierarchy in anoph
Browse files Browse the repository at this point in the history
  • Loading branch information
jonbrenas committed Dec 13, 2024
1 parent 75b4cc3 commit 6b9767d
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 11 deletions.
6 changes: 4 additions & 2 deletions malariagen_data/anoph/dipclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@
cnv_params,
)
from .snp_frq import AnophelesSnpFrequencyAnalysis
from .cnv_data import AnophelesCnvData
from .cnv_frq import AnophelesCnvFrequencyAnalysis

AA_CHANGE_QUERY = (
"effect in ['NON_SYNONYMOUS_CODING', 'START_LOST', 'STOP_LOST', 'STOP_GAINED']"
)


class AnophelesDipClustAnalysis(AnophelesSnpFrequencyAnalysis, AnophelesCnvData):
class AnophelesDipClustAnalysis(
AnophelesCnvFrequencyAnalysis, AnophelesSnpFrequencyAnalysis
):
def __init__(
self,
**kwargs,
Expand Down
9 changes: 0 additions & 9 deletions malariagen_data/anopheles.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@
import plotly.graph_objects as go # type: ignore
from numpydoc_decorator import doc # type: ignore

from malariagen_data.anoph.snp_frq import (
AnophelesSnpFrequencyAnalysis,
)

from .anoph.cnv_frq import AnophelesCnvFrequencyAnalysis

from .anoph import (
aim_params,
Expand All @@ -31,7 +26,6 @@
)
from .anoph.aim_data import AnophelesAimData
from .anoph.base import AnophelesBase
from .anoph.cnv_data import AnophelesCnvData
from .anoph.genome_features import AnophelesGenomeFeaturesData
from .anoph.genome_sequence import AnophelesGenomeSequenceData
from .anoph.hap_data import AnophelesHapData, hap_params
Expand Down Expand Up @@ -87,8 +81,6 @@ class AnophelesDataResource(
AnophelesH12Analysis,
AnophelesG123Analysis,
AnophelesFstAnalysis,
AnophelesCnvFrequencyAnalysis,
AnophelesSnpFrequencyAnalysis,
AnophelesHapFrequencyAnalysis,
AnophelesDistanceAnalysis,
AnophelesPca,
Expand All @@ -97,7 +89,6 @@ class AnophelesDataResource(
AnophelesAimData,
AnophelesHapData,
AnophelesSnpData,
AnophelesCnvData,
AnophelesSampleMetadata,
AnophelesGenomeFeaturesData,
AnophelesGenomeSequenceData,
Expand Down
102 changes: 102 additions & 0 deletions tests/anoph/test_dipclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
from malariagen_data.anoph.dipclust import AnophelesDipClustAnalysis


def random_transcripts_contig(*, api, contig, n):
df_gff = api.genome_features(attributes=["ID", "Parent"])
df_transcripts = df_gff.query(f"type == 'mRNA' and contig == '{contig}'")
transcript_ids = df_transcripts["ID"].dropna().to_list()
transcripts = random.sample(transcript_ids, n)
return transcripts


@pytest.fixture
def ag3_sim_api(ag3_sim_fixture):
return AnophelesDipClustAnalysis(
Expand Down Expand Up @@ -98,3 +106,97 @@ def test_plot_diplotype_clustering(

# Run checks.
api.plot_diplotype_clustering(**dipclust_params)


@pytest.mark.parametrize("distance_metric", ["cityblock", "euclidean"])
@parametrize_with_cases("fixture,api", cases=".")
def test_plot_diplotype_clustering_advanced(
fixture, api: AnophelesDipClustAnalysis, distance_metric
):
# Set up test parameters.
all_sample_sets = api.sample_sets()["sample_set"].to_list()
linkage_methods = (
"single",
"complete",
"average",
"weighted",
"centroid",
"median",
"ward",
)
sample_queries = (None, "sex_call == 'F'")
dipclust_params = dict(
region=fixture.random_region_str(region_size=5000),
sample_sets=[random.choice(all_sample_sets)],
linkage_method=random.choice(linkage_methods),
distance_metric=distance_metric,
sample_query=random.choice(sample_queries),
show=False,
)

# Run checks.
api.plot_diplotype_clustering_advanced(**dipclust_params)


@pytest.mark.parametrize("n", [1, 2])
@parametrize_with_cases("fixture,api", cases=".")
def test_plot_diplotype_clustering_advanced_with_transcript(
fixture, api: AnophelesDipClustAnalysis, n
):
# Set up test parameters.
contig = fixture.random_contig()
transcripts = random_transcripts_contig(api=api, contig=contig, n=n)
all_sample_sets = api.sample_sets()["sample_set"].to_list()
linkage_methods = (
"single",
"complete",
"average",
"weighted",
"centroid",
"median",
"ward",
)
sample_queries = (None, "sex_call == 'F'")
dipclust_params = dict(
region=contig,
snp_transcripts=transcripts,
sample_sets=[random.choice(all_sample_sets)],
linkage_method=random.choice(linkage_methods),
distance_metric="cityblock",
sample_query=random.choice(sample_queries),
show=False,
)

# Run checks.
api.plot_diplotype_clustering_advanced(**dipclust_params)


@parametrize_with_cases("fixture,api", cases=".")
def test_plot_diplotype_clustering_advanced_with_cnv_region(
fixture, api: AnophelesDipClustAnalysis
):
# Set up test parameters.
region = fixture.random_region_str(region_size=5000)
all_sample_sets = api.sample_sets()["sample_set"].to_list()
linkage_methods = (
"single",
"complete",
"average",
"weighted",
"centroid",
"median",
"ward",
)
sample_queries = (None, "sex_call == 'F'")
dipclust_params = dict(
region=region,
cnv_region=region,
sample_sets=[random.choice(all_sample_sets)],
linkage_method=random.choice(linkage_methods),
distance_metric="cityblock",
sample_query=random.choice(sample_queries),
show=False,
)

# Run checks.
api.plot_diplotype_clustering_advanced(**dipclust_params)

0 comments on commit 6b9767d

Please sign in to comment.