Skip to content

Commit

Permalink
Merge branch 'master' into GH375_allow_region_instead_of_contig_param
Browse files Browse the repository at this point in the history
  • Loading branch information
leehart committed Dec 6, 2024
2 parents 1409756 + 70ad53d commit 3825a92
Show file tree
Hide file tree
Showing 23 changed files with 894 additions and 689 deletions.
13 changes: 5 additions & 8 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,17 @@ jobs:
- name: Checkout source
uses: actions/checkout@v4

- name: Install poetry
run: pipx install poetry==1.8.3

- name: Setup python
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'poetry'
cache: 'pip'

- name: Install dependencies
run: poetry install
- name: Install package
run: pip install .[dev]

- name: Run tests with coverage
run: poetry run pytest --durations=20 -v --cov malariagen_data/anoph --cov-report=xml tests/anoph
- name: Run unit tests with coverage
run: pytest -v tests --ignore tests/integration --cov malariagen_data/anoph --cov-report=xml

- name: Upload coverage report
uses: codecov/codecov-action@v3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: legacy_tests
name: integration_tests
on:
push:
branches:
Expand All @@ -7,7 +7,7 @@ on:
branches:
- master
jobs:
legacy_tests:
integration_tests:
strategy:
fail-fast: true
matrix:
Expand All @@ -23,17 +23,14 @@ jobs:
- name: Checkout source
uses: actions/checkout@v4

- name: Install poetry
run: pipx install poetry==1.8.3

- name: Setup python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "poetry"
cache: "pip"

- name: Install dependencies
run: poetry install
- name: Install package
run: pip install .[dev]

- id: 'auth'
name: 'Set up Google Cloud authentication'
Expand All @@ -53,14 +50,14 @@ jobs:
uses: actions/cache/restore@v3
with:
path: gcs_cache
key: gcs_cache_tests_20240922
key: gcs_cache_integration_tests_20240922

- name: Run full test suite
run: poetry run pytest --durations=20 --ignore=tests/anoph -v tests
- name: Run integration tests
run: pytest --durations=20 -v tests/integration

- name: Save GCS cache
uses: actions/cache/save@v3
if: always()
with:
path: gcs_cache
key: gcs_cache_tests_20240922
key: gcs_cache_integration_tests_20240922
18 changes: 13 additions & 5 deletions .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,21 @@ jobs:
fail-fast: true
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- name: Checkout source
uses: actions/checkout@v4

- name: Setup python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- uses: pre-commit/action@v3.0.0
- name: Install mypy
run: pip install mypy
cache: 'pip'

- name: Run pre-commit checks
uses: pre-commit/action@v3.0.0

- name: Install package
run: pip install .[dev]

- name: Run mypy
run: |
mypy malariagen_data tests --ignore-missing-imports
11 changes: 4 additions & 7 deletions .github/workflows/notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,14 @@ jobs:
- name: Checkout source
uses: actions/checkout@v4

- name: Install poetry
run: pipx install poetry==1.8.3

- name: Setup python
uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "poetry"
cache: "pip"

- name: Install dependencies
run: poetry install
- name: Install package
run: pip install .[dev]

- id: 'auth'
name: 'Set up Google Cloud authentication'
Expand All @@ -52,7 +49,7 @@ jobs:
key: gcs_cache_notebooks_20240922

- name: Run notebooks
run: poetry run jupyter nbconvert --execute notebooks/*.ipynb --inplace
run: jupyter nbconvert --execute notebooks/*.ipynb --inplace

- name: Save GCS cache
uses: actions/cache/save@v3
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ jobs:
- name: Install package
run: pip install "${{ matrix.numpy-version }}" .[dev]

- name: Run fast unit tests
run: pytest -v tests/anoph --typeguard-packages=malariagen_data,malariagen_data.anoph
- name: Run unit tests
run: pytest -v tests --ignore tests/integration --typeguard-packages=malariagen_data,malariagen_data.anoph
4 changes: 3 additions & 1 deletion malariagen_data/anoph/cnv_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,7 @@ def plot_cnv_hmm_heatmap_track(
width: gplt_params.width = gplt_params.width_default,
row_height: gplt_params.row_height = 7,
height: Optional[gplt_params.height] = None,
palette: Optional[gplt_params.colors] = cnv_params.colorscale_default,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
) -> gplt_params.optional_figure:
Expand Down Expand Up @@ -929,7 +930,6 @@ def plot_cnv_hmm_heatmap_track(
)

debug("set up palette and color mapping")
palette = cnv_params.colorscale_default
color_mapper = bkmod.LinearColorMapper(low=-1.5, high=4.5, palette=palette)

debug("plot the HMM copy number data as an image")
Expand Down Expand Up @@ -999,6 +999,7 @@ def plot_cnv_hmm_heatmap(
width: gplt_params.width = gplt_params.width_default,
row_height: gplt_params.row_height = 7,
track_height: Optional[gplt_params.track_height] = None,
palette: Optional[gplt_params.colors] = cnv_params.colorscale_default,
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
gene_labels: Optional[gplt_params.gene_labels] = None,
Expand All @@ -1020,6 +1021,7 @@ def plot_cnv_hmm_heatmap(
width=width,
row_height=row_height,
height=track_height,
palette=palette,
show=False,
)
fig1.xaxis.visible = False
Expand Down
9 changes: 5 additions & 4 deletions malariagen_data/anoph/genome_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,11 +415,12 @@ def plot_genes(

# Increase the figure height by a certain factor, to accommodate labels.
height_increase_factor = 1.3
assert fig.height is not None
fig.height = int(fig.height * height_increase_factor)

# Get the original y_range.
# Note: fig.y_range is not subscriptable.
orig_y_range = fig.y_range.start, fig.y_range.end
orig_y_range = fig.y_range.start, fig.y_range.end # type: ignore

# Determine the midpoint of the original range, to rescale outward from there.
orig_mid_y_range = (orig_y_range[0] + orig_y_range[1]) / 2
Expand All @@ -432,7 +433,7 @@ def plot_genes(
new_y_end = orig_mid_y_range + new_y_range_extent_half

# Set the new y_range.
fig.y_range = bokeh.models.Range1d(new_y_start, new_y_end)
fig.y_range = bokeh.models.Range1d(new_y_start, new_y_end) # type: ignore

debug("determine midpoint of each gene rectangle")
data["mid_x"] = (data["start"] + data["end"]) / 2
Expand Down Expand Up @@ -474,7 +475,7 @@ def plot_genes(
data_as_cds = bokeh.models.ColumnDataSource(data)

# Create a LabelSet for the gene pointers.
gene_pointers_ls = bokeh.models.LabelSet(
gene_pointers_ls = bokeh.models.LabelSet( # type: ignore
source=data_as_cds,
x="mid_x",
y="pointer_y",
Expand All @@ -486,7 +487,7 @@ def plot_genes(
)

# Create a LabelSet for the gene labels.
gene_labels_ls = bokeh.models.LabelSet(
gene_labels_ls = bokeh.models.LabelSet( # type: ignore
source=data_as_cds,
x="mid_x",
y="label_y",
Expand Down
2 changes: 1 addition & 1 deletion malariagen_data/anoph/gplt_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,6 @@
]

gene_labelset: TypeAlias = Annotated[
bokeh.models.LabelSet,
bokeh.models.LabelSet, # type: ignore
"A LabelSet to use in the plot.",
]
2 changes: 1 addition & 1 deletion malariagen_data/anoph/h12.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ def plot_h12_gwss_multi_panel(
figs.append(fig2)

# Combine plots into a single figure.
fig = bokeh.layouts.gridplot(
fig = bokeh.layouts.gridplot( # type: ignore
figs,
ncols=1,
toolbar_location="above",
Expand Down
8 changes: 4 additions & 4 deletions malariagen_data/anoph/hap_frq.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def haplotypes_frequencies(
hap_dict.update(f)
freq_cols["frq_" + coh] = list(hap_dict.values())

df_freqs = pd.DataFrame(freq_cols, index=hap_dict.keys())
df_freqs = pd.DataFrame(freq_cols, index=list(hap_dict.keys()))

# Compute max_af.
df_max_af = pd.DataFrame({"max_af": df_freqs.max(axis=1)})
Expand Down Expand Up @@ -230,14 +230,14 @@ def haplotypes_frequencies_advanced(
freq_cols["frq_" + cohort_key_str] = list(hap_freq.values())
nobs_cols["nobs_" + cohort_key_str] = list(hap_nob.values())

df_freqs = pd.DataFrame(freq_cols, index=hap_freq.keys())
df_freqs = pd.DataFrame(freq_cols, index=list(hap_freq.keys()))

# Compute max_af.
df_max_af = pd.DataFrame({"max_af": df_freqs.max(axis=1)})

df_counts = pd.DataFrame(count_cols, index=hap_count.keys())
df_counts = pd.DataFrame(count_cols, index=list(hap_count.keys()))

df_nobs = pd.DataFrame(nobs_cols, index=hap_nob.keys())
df_nobs = pd.DataFrame(nobs_cols, index=list(hap_nob.keys()))

# Build the final dataframe.
df_haps = pd.concat([df_freqs, df_counts, df_nobs, df_max_af], axis=1)
Expand Down
2 changes: 1 addition & 1 deletion malariagen_data/anoph/sample_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,7 +1270,7 @@ def cohorts(
df_cohorts = pd.read_csv(f, sep=",", na_values="")

# Ensure all column names are lower case.
df_cohorts.columns = [c.lower() for c in df_cohorts.columns]
df_cohorts.columns = [c.lower() for c in df_cohorts.columns] # type: ignore

return df_cohorts

Expand Down
11 changes: 7 additions & 4 deletions malariagen_data/anoph/snp_frq.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ def snp_allele_frequencies_advanced(

# Apply variant query.
if variant_query is not None:
loc_variants = df_variants.eval(variant_query).values
loc_variants = np.asarray(df_variants.eval(variant_query))

# Check for no SNPs remaining after applying variant query.
if np.count_nonzero(loc_variants) == 0:
Expand Down Expand Up @@ -1192,8 +1192,8 @@ def plot_frequencies_interactive_map(

# Set up interactive controls.
variants = ds["variant_label"].values
taxa = ds["cohort_taxon"].to_pandas().dropna().unique()
periods = ds["cohort_period"].to_pandas().dropna().unique()
taxa = ds["cohort_taxon"].to_pandas().dropna().unique() # type: ignore
periods = ds["cohort_period"].to_pandas().dropna().unique() # type: ignore
controls = ipywidgets.interactive(
self.plot_frequencies_map_markers,
m=ipywidgets.fixed(freq_map),
Expand Down Expand Up @@ -1281,7 +1281,10 @@ def snp_genotype_allele_counts(
loc_sites = df_snps[f"pass_{site_mask}"]
df_snps = df_snps.loc[loc_sites]

return df_snps.query(snp_query)
if snp_query is not None:
df_snps = df_snps.query(snp_query)

return df_snps


@numba.jit(nopython=True)
Expand Down
1 change: 1 addition & 0 deletions malariagen_data/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ def da_compress(

# Load the indexer temporarily for chunk size computations.
if indexer_computed is None:
assert isinstance(indexer, da.Array)
indexer_computed = indexer.compute()

# Ensure indexer and data are chunked in the same way.
Expand Down
Loading

0 comments on commit 3825a92

Please sign in to comment.