Skip to content

Commit ce01a15

Browse files
committed
wip
1 parent 901cebd commit ce01a15

File tree

2 files changed

+7
-13
lines changed

2 files changed

+7
-13
lines changed

spd/clustering/merge_history.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import io
22
import json
3-
import sys
43
import zipfile
54
from dataclasses import dataclass
65
from pathlib import Path
@@ -280,13 +279,12 @@ def normalized(self) -> tuple[MergesArray, dict[str, Any]]:
280279
dtype=np.int16,
281280
)
282281
except Exception as e:
283-
print(
282+
err_msg = (
284283
f"failed to create merge array, probably due to issues with getting shape.\n"
285284
f"{self = }\n"
286-
f"{self.data = }\n",
287-
file=sys.stderr,
285+
f"{self.data = }\n"
288286
)
289-
raise e
287+
raise RuntimeError(err_msg) from e
290288

291289
overlap_stats: Float[np.ndarray, " n_ens"] = np.full(
292290
self.n_ensemble,
@@ -342,6 +340,7 @@ def normalized(self) -> tuple[MergesArray, dict[str, Any]]:
342340
history_metadatas.append(None)
343341

344342
return (
343+
# TODO: dataclass this
345344
merges_array,
346345
dict(
347346
component_labels=unique_labels,

spd/clustering/pipeline/s3_normalize_histories.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@ def normalize_and_save(
1515
) -> MergesArray:
1616
"""Main function to load merge histories and compute distances"""
1717
# load
18-
data = [MergeHistory.read(p) for p in history_paths]
19-
ensemble = MergeHistoryEnsemble(data=data)
18+
data: list[MergeHistory] = [MergeHistory.read(p) for p in history_paths]
19+
ensemble: MergeHistoryEnsemble = MergeHistoryEnsemble(data=data)
2020

2121
# normalize
22+
normalized_merge_array: MergesArray
2223
normalized_merge_array, normalized_merge_meta = ensemble.normalized()
2324

2425
# save
@@ -30,10 +31,4 @@ def normalize_and_save(
3031
np.savez_compressed(enseble_merge_arr_path, merges=normalized_merge_array)
3132
logger.info(f"merge array saved to {enseble_merge_arr_path}")
3233

33-
# TODO: double check we're already saving everything we need outside of the zanj file
34-
35-
# path_hist_ensemble: Path = output_dir / "ensemble_raw.zanj"
36-
# ZANJ().save(ensemble, path_hist_ensemble)
37-
# logger.info(f"Ensemble saved to {path_hist_ensemble}")
38-
3934
return normalized_merge_array

0 commit comments

Comments
 (0)