Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: update test bench cli arguments for clustering examples #205

Merged
merged 5 commits into from
Feb 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions dance/modules/spatial/spatial_domain/spagcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def target_distribution(self, q):
p = p / torch.sum(p, dim=1, keepdim=True)
return p

def fit(self, X, adj, lr=0.001, max_epochs=5000, update_interval=3, trajectory_interval=50, weight_decay=5e-4,
def fit(self, X, adj, lr=0.001, epochs=5000, update_interval=3, trajectory_interval=50, weight_decay=5e-4,
opt="sgd", init="louvain", n_neighbors=10, res=0.4, n_clusters=10, init_spa=True, tol=1e-3):
"""Fit function for model training.

Expand All @@ -215,7 +215,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=5000, update_interval=3, trajectory_i
Adjacent matrix.
lr : float
Learning rate.
max_epochs : int
epochs : int
Maximum number of epochs.
update_interval : int
Interval for update.
Expand Down Expand Up @@ -288,7 +288,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=5000, update_interval=3, trajectory_i
adj = adj.to(device)

self.train()
for epoch in range(max_epochs):
for epoch in range(epochs):
if epoch % update_interval == 0:
_, q = self.forward(X, adj)
p = self.target_distribution(q).data
Expand Down Expand Up @@ -317,7 +317,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=5000, update_interval=3, trajectory_i
X = X.cpu()
adj = adj.cpu()

def fit_with_init(self, X, adj, init_y, lr=0.001, max_epochs=5000, update_interval=1, weight_decay=5e-4, opt="sgd"):
def fit_with_init(self, X, adj, init_y, lr=0.001, epochs=5000, update_interval=1, weight_decay=5e-4, opt="sgd"):
"""Initializing cluster centers with kmeans."""
logger.info("Initializing cluster centers with kmeans.")
if opt == "sgd":
Expand All @@ -340,7 +340,7 @@ def fit_with_init(self, X, adj, init_y, lr=0.001, max_epochs=5000, update_interv
adj = adj.to(device)

self.train()
for epoch in range(max_epochs):
for epoch in range(epochs):
if epoch % update_interval == 0:
_, q = self.forward(torch.FloatTensor(X), torch.FloatTensor(adj))
p = self.target_distribution(q).data
Expand Down Expand Up @@ -399,7 +399,7 @@ def target_distribution(self, q):
p = p / torch.sum(p, dim=1, keepdim=True)
return p

def fit(self, X, adj, lr=0.001, max_epochs=10, update_interval=5, weight_decay=5e-4, opt="sgd", init="louvain",
def fit(self, X, adj, lr=0.001, epochs=10, update_interval=5, weight_decay=5e-4, opt="sgd", init="louvain",
n_neighbors=10, res=0.4):
self.trajectory = []
logger.info("Initializing cluster centers with kmeans.")
Expand Down Expand Up @@ -431,7 +431,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=10, update_interval=5, weight_decay=5

self.mu.data.copy_(torch.Tensor(cluster_centers))
self.train()
for epoch in range(max_epochs):
for epoch in range(epochs):
if epoch % update_interval == 0:
_, q = self.forward(X, adj)
p = self.target_distribution(q).data
Expand All @@ -444,7 +444,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=10, update_interval=5, weight_decay=5
optimizer.step()
self.trajectory.append(torch.argmax(q, dim=1).data.cpu().numpy())

def fit_with_init(self, X, adj, init_y, lr=0.001, max_epochs=10, update_interval=1, weight_decay=5e-4, opt="sgd"):
def fit_with_init(self, X, adj, init_y, lr=0.001, epochs=10, update_interval=1, weight_decay=5e-4, opt="sgd"):
logger.info("Initializing cluster centers with kmeans.")
if opt == "sgd":
optimizer = optim.SGD(self.parameters(), lr=lr, momentum=0.9)
Expand All @@ -459,7 +459,7 @@ def fit_with_init(self, X, adj, init_y, lr=0.001, max_epochs=10, update_interval
cluster_centers = np.asarray(Mergefeature.groupby("Group").mean())
self.mu.data.copy_(torch.Tensor(cluster_centers))
self.train()
for epoch in range(max_epochs):
for epoch in range(epochs):
if epoch % update_interval == 0:
_, q = self.forward(torch.FloatTensor(X), torch.FloatTensor(adj))
p = self.target_distribution(q).data
Expand Down Expand Up @@ -546,20 +546,20 @@ def set_l(self, l):
"""
self.l = l

def search_set_res(self, x, l, target_num, start=0.4, step=0.1, tol=5e-3, lr=0.05, max_epochs=10, max_run=10):
def search_set_res(self, x, l, target_num, start=0.4, step=0.1, tol=5e-3, lr=0.05, epochs=10, max_run=10):
"""Search for optimal resolution parameter."""
res = start
logger.info(f"Start at {res = :.4f}, {step = :.4f}")
clf = SpaGCN(l)
y_pred = clf.fit_predict(x, init_spa=True, init="louvain", res=res, tol=tol, lr=lr, max_epochs=max_epochs)
y_pred = clf.fit_predict(x, init_spa=True, init="louvain", res=res, tol=tol, lr=lr, epochs=epochs)
old_num = len(set(y_pred))
logger.info(f"Res = {res:.4f}, Num of clusters = {old_num}")
run = 0
while old_num != target_num:
old_sign = 1 if (old_num < target_num) else -1
clf = SpaGCN(l)
y_pred = clf.fit_predict(x, init_spa=True, init="louvain", res=res + step * old_sign, tol=tol, lr=lr,
max_epochs=max_epochs)
epochs=epochs)
new_num = len(set(y_pred))
logger.info(f"Res = {res + step * old_sign:.3e}, Num of clusters = {new_num}")
if new_num == target_num:
Expand All @@ -586,7 +586,7 @@ def calc_adj_exp(self, adj: np.ndarray) -> np.ndarray:
adj_exp = np.exp(-1 * (adj**2) / (2 * (self.l**2)))
return adj_exp

def fit(self, x, y=None, *, num_pcs=50, lr=0.005, max_epochs=2000, weight_decay=0, opt="admin", init_spa=True,
def fit(self, x, y=None, *, num_pcs=50, lr=0.005, epochs=2000, weight_decay=0, opt="admin", init_spa=True,
init="louvain", n_neighbors=10, n_clusters=None, res=0.4, tol=1e-3):
"""Fit function for model training.

Expand All @@ -600,7 +600,7 @@ def fit(self, x, y=None, *, num_pcs=50, lr=0.005, max_epochs=2000, weight_decay=
The number of component used in PCA.
lr : float
Learning rate.
max_epochs : int
epochs : int
Maximum number of epochs.
weight_decay : float
Weight decay.
Expand All @@ -624,7 +624,7 @@ def fit(self, x, y=None, *, num_pcs=50, lr=0.005, max_epochs=2000, weight_decay=
self.num_pcs = num_pcs
self.res = res
self.lr = lr
self.max_epochs = max_epochs
self.epochs = epochs
self.weight_decay = weight_decay
self.opt = opt
self.init_spa = init_spa
Expand All @@ -638,9 +638,9 @@ def fit(self, x, y=None, *, num_pcs=50, lr=0.005, max_epochs=2000, weight_decay=

self.model = SimpleGCDEC(embed.shape[1], embed.shape[1])
adj_exp = self.calc_adj_exp(adj)
self.model.fit(embed, adj_exp, lr=self.lr, max_epochs=self.max_epochs, weight_decay=self.weight_decay,
opt=self.opt, init_spa=self.init_spa, init=self.init, n_neighbors=self.n_neighbors,
n_clusters=self.n_clusters, res=self.res, tol=self.tol)
self.model.fit(embed, adj_exp, lr=self.lr, epochs=self.epochs, weight_decay=self.weight_decay, opt=self.opt,
init_spa=self.init_spa, init=self.init, n_neighbors=self.n_neighbors, n_clusters=self.n_clusters,
res=self.res, tol=self.tol)

def predict_proba(self, x):
"""Prediction function.
Expand Down
8 changes: 4 additions & 4 deletions examples/spatial/spatial_domain/spagcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
parser.add_argument("--end", type=float, default=1000, help="ending value for searching l.")
parser.add_argument("--tol", type=float, default=5e-3, help="tolerant value for searching l.")
parser.add_argument("--max_run", type=int, default=200, help="max runs.")
parser.add_argument("--max_epochs", type=int, default=200, help="max epochs.")
parser.add_argument("--epochs", type=int, default=200, help="Number of epochs.")
parser.add_argument("--n_clusters", type=int, default=7, help="the number of clusters")
parser.add_argument("--step", type=float, default=0.1, help="")
parser.add_argument("--lr", type=float, default=0.05, help="learning rate")
Expand All @@ -39,10 +39,10 @@
l = model.search_l(args.p, adj, start=args.start, end=args.end, tol=args.tol, max_run=args.max_run)
model.set_l(l)
res = model.search_set_res((x, adj), l=l, target_num=args.n_clusters, start=0.4, step=args.step, tol=args.tol,
lr=args.lr, max_epochs=args.max_epochs, max_run=args.max_run)
lr=args.lr, epochs=args.epochs, max_run=args.max_run)

pred = model.fit_predict((x, adj), init_spa=True, init="louvain", tol=args.tol, lr=args.lr,
max_epochs=args.max_epochs, res=res)
pred = model.fit_predict((x, adj), init_spa=True, init="louvain", tol=args.tol, lr=args.lr, epochs=args.epochs,
res=res)
score = model.default_score_func(y, pred)
print(f"ARI: {score:.4f}")

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ networkx==3.0
numba==0.56.2
numpy==1.22.4
opencv-python==4.7.0.68
openpyxl==3.1.1
pandas==1.5.3
pyro-ppl==1.8.4
python-igraph==0.10.1
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ install_requires =
numba
numpy
opencv-python
openpyxl
pandas
pyro-ppl
python-igraph
Expand Down
48 changes: 24 additions & 24 deletions tests/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
"cell_type_annotation-singlecellnet-spleen": "--species mouse --tissue Spleen --train_dataset 1970 --test_dataset 1759",
"cell_type_annotation-svm-spleen": "--species mouse --tissue Spleen --train_dataset 1970 --test_dataset 1759",
"clustering-graphsc-10X_PBMC": "--dataset 10X_PBMC --epochs 2",
"clustering-scdcc-10X_PBMC": "--data_file 10X_PBMC --label_cells_files label_10X_PBMC.txt --gamma 1.5 --maxiter 2 --pretrain_epochs 2",
"clustering-scdeepcluster-10X_PBMC": "--data_file 10X_PBMC --pretrain_epochs 2",
"clustering-scdsc-10X_PBMC": "--name 10X_PBMC --method cos --topk 30 --v 7 --binary_crossentropy_loss 0.75 --ce_loss 0.5 --re_loss 0.1 --zinb_loss 2.5 --sigma 0.4 --n_epochs 2 --pretrain_epochs 2",
"clustering-sctag-10X_PBMC": "--pretrain_epochs 2 --epochs 2 --data_file 10X_PBMC --W_a 0.01 --W_x 3 --W_c 0.1 --dropout 0.5",
"clustering-scdcc-10X_PBMC": "--dataset 10X_PBMC --label_cells_files label_10X_PBMC.txt --gamma 1.5 --epochs 2 --pretrain_epochs 2",
"clustering-scdeepcluster-10X_PBMC": "--dataset 10X_PBMC --pretrain_epochs 2",
"clustering-scdsc-10X_PBMC": "--dataset 10X_PBMC --method cosine --topk 30 --v 7 --binary_crossentropy_loss 0.75 --ce_loss 0.5 --re_loss 0.1 --zinb_loss 2.5 --sigma 0.4 --epochs 2 --pretrain_epochs 2",
"clustering-sctag-10X_PBMC": "--pretrain_epochs 2 --epochs 2 --dataset 10X_PBMC --w_a 0.01 --w_x 3 --w_c 0.1 --dropout 0.5",
"imputation-deepimpute-brain": "--train_dataset mouse_brain_data --filetype h5 --hidden_dim 200 --dropout 0.4 --n_epochs 2 --gpu 0",
"imputation-graphsci-brain": "--train_dataset mouse_brain_data --gpu 0 --n_epochs 2",
"imputation-scgnn-brain": "--train_dataset mouse_brain_data --Regu_epochs 2 --EM_epochs 2 --cluster_epochs 2 --GAEepochs 2 --gpu 0",
Expand Down Expand Up @@ -80,22 +80,22 @@
"clustering-graphsc-mouse_ES_cell": "--dataset mouse_ES_cell",
"clustering-graphsc-mouse_bladder_cell": "--dataset mouse_bladder_cell",
"clustering-graphsc-worm_neuron_cell": "--dataset worm_neuron_cell",
"clustering-scdcc-10X_PBMC": "--data_file 10X_PBMC --label_cells_files label_10X_PBMC.txt --gamma 1.5",
"clustering-scdcc-mouse_ES_cell": "--data_file mouse_ES_cell --label_cells_files label_mouse_ES_cell.txt --gamma 1 --ml_weight 0.8 --cl_weight 0.8",
"clustering-scdcc-mouse_bladder_cell": "--data_file mouse_bladder_cell --label_cells_files label_mouse_bladder_cell.txt --gamma 1.5 --pretrain_epochs 100 --sigma 3",
"clustering-scdcc-worm_neuron_cell": "--data_file worm_neuron_cell --label_cells_files label_worm_neuron_cell.txt --gamma 1 --pretrain_epochs 300",
"clustering-scdeepcluster-10X_PBMC": "--data_file 10X_PBMC",
"clustering-scdeepcluster-mouse_ES_cell": "--data_file mouse_ES_cell",
"clustering-scdeepcluster-mouse_bladder_cell": "--data_file mouse_bladder_cell --pretrain_epochs 300 --sigma 2.75",
"clustering-scdeepcluster-worm_neuron_cell": "--data_file worm_neuron_cell --pretrain_epochs 300",
"clustering-scdsc-10X_PBMC": "--name 10X_PBMC --method cos --topk 30 --v 7 --binary_crossentropy_loss 0.75 --ce_loss 0.5 --re_loss 0.1 --zinb_loss 2.5 --sigma 0.4",
"clustering-scdsc-mouse_ES_cell": "--name mouse_ES_cell --method heat --topk 50 --v 7 --binary_crossentropy_loss 0.1 --ce_loss 0.01 --re_loss 1.5 --zinb_loss 0.5 --sigma 0.1",
"clustering-scdsc-mouse_bladder_cell": "--name mouse_bladder_cell --method p --topk 50 --v 7 --binary_crossentropy_loss 2.5 --ce_loss 0.1 --re_loss 0.5 --zinb_loss 1.5 --sigma 0.6",
"clustering-scdsc-worm_neuron_cell": "--name worm_neuron_cell --method p --topk 20 --v 7 --binary_crossentropy_loss 2 --ce_loss 2 --re_loss 3 --zinb_loss 0.1 --sigma 0.4",
"clustering-sctag-10X_PBMC": "--pretrain_epochs 100 --data_file 10X_PBMC --W_a 0.01 --W_x 3 --W_c 0.1 --dropout 0.5",
"clustering-sctag-mouse_ES_cell": "--data_file mouse_ES_cell --W_a 0.01 --W_x 2 --W_c 0.25 --k 1",
"clustering-sctag-mouse_bladder_cell": "--pretrain_epochs 100 --data_file mouse_bladder_cell --W_a 0.01 --W_x 0.75 --W_c 1",
"clustering-sctag-worm_neuron_cell": "--pretrain_epochs 100 --data_file worm_neuron_cell --W_a 0.1 --W_x 2.5 --W_c 3",
"clustering-scdcc-10X_PBMC": "--dataset 10X_PBMC --label_cells_files label_10X_PBMC.txt --gamma 1.5",
"clustering-scdcc-mouse_ES_cell": "--dataset mouse_ES_cell --label_cells_files label_mouse_ES_cell.txt --gamma 1 --ml_weight 0.8 --cl_weight 0.8",
"clustering-scdcc-mouse_bladder_cell": "--dataset mouse_bladder_cell --label_cells_files label_mouse_bladder_cell.txt --gamma 1.5 --pretrain_epochs 100 --sigma 3",
"clustering-scdcc-worm_neuron_cell": "--dataset worm_neuron_cell --label_cells_files label_worm_neuron_cell.txt --gamma 1 --pretrain_epochs 300",
"clustering-scdeepcluster-10X_PBMC": "--dataset 10X_PBMC",
"clustering-scdeepcluster-mouse_ES_cell": "--dataset mouse_ES_cell",
"clustering-scdeepcluster-mouse_bladder_cell": "--dataset mouse_bladder_cell --pretrain_epochs 300 --sigma 2.75",
"clustering-scdeepcluster-worm_neuron_cell": "--dataset worm_neuron_cell --pretrain_epochs 300",
"clustering-scdsc-10X_PBMC": "--dataset 10X_PBMC --method cosine --topk 30 --v 7 --binary_crossentropy_loss 0.75 --ce_loss 0.5 --re_loss 0.1 --zinb_loss 2.5 --sigma 0.4",
"clustering-scdsc-mouse_ES_cell": "--dataset mouse_ES_cell --method cosine --topk 50 --v 7 --binary_crossentropy_loss 0.1 --ce_loss 0.01 --re_loss 1.5 --zinb_loss 0.5 --sigma 0.1",
"clustering-scdsc-mouse_bladder_cell": "--dataset mouse_bladder_cell --method correlation --topk 50 --v 7 --binary_crossentropy_loss 2.5 --ce_loss 0.1 --re_loss 0.5 --zinb_loss 1.5 --sigma 0.6",
"clustering-scdsc-worm_neuron_cell": "--dataset worm_neuron_cell --method correlation --topk 20 --v 7 --binary_crossentropy_loss 2 --ce_loss 2 --re_loss 3 --zinb_loss 0.1 --sigma 0.4",
"clustering-sctag-10X_PBMC": "--pretrain_epochs 100 --dataset 10X_PBMC --w_a 0.01 --w_x 3 --w_c 0.1 --dropout 0.5",
"clustering-sctag-mouse_ES_cell": "--dataset mouse_ES_cell --w_a 0.01 --w_x 2 --w_c 0.25 --k 1",
"clustering-sctag-mouse_bladder_cell": "--pretrain_epochs 100 --dataset mouse_bladder_cell --w_a 0.01 --w_x 0.75 --w_c 1",
"clustering-sctag-worm_neuron_cell": "--pretrain_epochs 100 --dataset worm_neuron_cell --w_a 0.1 --w_x 2.5 --w_c 3",
"imputation-deepimpute-brain": "--train_dataset mouse_brain_data --filetype h5 --hidden_dim 200 --dropout 0.4",
"imputation-deepimpute-embryo": "--train_dataset mouse_embryo_data --filetype gz --hidden_dim 200 --dropout 0.4",
"imputation-graphsci-brain": "--train_dataset mouse_brain_data --gpu 0",
Expand All @@ -104,13 +104,13 @@
"imputation-scgnn-embryo": "--train_dataset mouse_embryo_data --gpu 0",
# Multi modality
"joint_embedding-dcca-gex_adt": "--subtask openproblems_bmmc_cite_phase2 --device cuda",
"joint_embedding-dcca-gex_adt": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
"joint_embedding-dcca-gex_atac": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
"joint_embedding-jae-gex_adt": "--subtask openproblems_bmmc_cite_phase2 --device cuda",
"joint_embedding-jae-gex_adt": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
"joint_embedding-jae-gex_atac": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
"joint_embedding-scmogcn-gex_adt": "--subtask openproblems_bmmc_cite_phase2 --device cuda",
"joint_embedding-scmogcn-gex_adt": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
"joint_embedding-scmogcn-gex_atac": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
"joint_embedding-scmvae-gex_adt": "--subtask openproblems_bmmc_cite_phase2 --device cuda",
"joint_embedding-scmvae-gex_adt": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
"joint_embedding-scmvae-gex_atac": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
"match_modality-cmae-gex_adt": "--subtask openproblems_bmmc_cite_phase2_rna --device cuda",
"match_modality-cmae-gex_atac": "--subtask openproblems_bmmc_multiome_phase2_rna --device cuda",
"match_modality-scmm-gex_adt": "--subtask openproblems_bmmc_cite_phase2_rna --device cuda",
Expand Down