OmicsML · RemyLau · Feb 20, 2023 · Feb 20, 2023 · Feb 20, 2023 · Feb 20, 2023
diff --git a/dance/modules/spatial/spatial_domain/spagcn.py b/dance/modules/spatial/spatial_domain/spagcn.py
@@ -203,7 +203,7 @@ def target_distribution(self, q):
  p = p / torch.sum(p, dim=1, keepdim=True)
  return p
 
- def fit(self, X, adj, lr=0.001, max_epochs=5000, update_interval=3, trajectory_interval=50, weight_decay=5e-4,
+ def fit(self, X, adj, lr=0.001, epochs=5000, update_interval=3, trajectory_interval=50, weight_decay=5e-4,
  opt="sgd", init="louvain", n_neighbors=10, res=0.4, n_clusters=10, init_spa=True, tol=1e-3):
  """Fit function for model training.
 
@@ -215,7 +215,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=5000, update_interval=3, trajectory_i
  Adjacent matrix.
  lr : float
  Learning rate.
- max_epochs : int
+ epochs : int
  Maximum number of epochs.
  update_interval : int
  Interval for update.
@@ -288,7 +288,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=5000, update_interval=3, trajectory_i
  adj = adj.to(device)
 
  self.train()
- for epoch in range(max_epochs):
+ for epoch in range(epochs):
  if epoch % update_interval == 0:
  _, q = self.forward(X, adj)
  p = self.target_distribution(q).data
@@ -317,7 +317,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=5000, update_interval=3, trajectory_i
  X = X.cpu()
  adj = adj.cpu()
 
- def fit_with_init(self, X, adj, init_y, lr=0.001, max_epochs=5000, update_interval=1, weight_decay=5e-4, opt="sgd"):
+ def fit_with_init(self, X, adj, init_y, lr=0.001, epochs=5000, update_interval=1, weight_decay=5e-4, opt="sgd"):
  """Initializing cluster centers with kmeans."""
  logger.info("Initializing cluster centers with kmeans.")
  if opt == "sgd":
@@ -340,7 +340,7 @@ def fit_with_init(self, X, adj, init_y, lr=0.001, max_epochs=5000, update_interv
  adj = adj.to(device)
 
  self.train()
- for epoch in range(max_epochs):
+ for epoch in range(epochs):
  if epoch % update_interval == 0:
  _, q = self.forward(torch.FloatTensor(X), torch.FloatTensor(adj))
  p = self.target_distribution(q).data
@@ -399,7 +399,7 @@ def target_distribution(self, q):
  p = p / torch.sum(p, dim=1, keepdim=True)
  return p
 
- def fit(self, X, adj, lr=0.001, max_epochs=10, update_interval=5, weight_decay=5e-4, opt="sgd", init="louvain",
+ def fit(self, X, adj, lr=0.001, epochs=10, update_interval=5, weight_decay=5e-4, opt="sgd", init="louvain",
  n_neighbors=10, res=0.4):
  self.trajectory = []
  logger.info("Initializing cluster centers with kmeans.")
@@ -431,7 +431,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=10, update_interval=5, weight_decay=5
 
  self.mu.data.copy_(torch.Tensor(cluster_centers))
  self.train()
- for epoch in range(max_epochs):
+ for epoch in range(epochs):
  if epoch % update_interval == 0:
  _, q = self.forward(X, adj)
  p = self.target_distribution(q).data
@@ -444,7 +444,7 @@ def fit(self, X, adj, lr=0.001, max_epochs=10, update_interval=5, weight_decay=5
  optimizer.step()
  self.trajectory.append(torch.argmax(q, dim=1).data.cpu().numpy())
 
- def fit_with_init(self, X, adj, init_y, lr=0.001, max_epochs=10, update_interval=1, weight_decay=5e-4, opt="sgd"):
+ def fit_with_init(self, X, adj, init_y, lr=0.001, epochs=10, update_interval=1, weight_decay=5e-4, opt="sgd"):
  logger.info("Initializing cluster centers with kmeans.")
  if opt == "sgd":
  optimizer = optim.SGD(self.parameters(), lr=lr, momentum=0.9)
@@ -459,7 +459,7 @@ def fit_with_init(self, X, adj, init_y, lr=0.001, max_epochs=10, update_interval
  cluster_centers = np.asarray(Mergefeature.groupby("Group").mean())
  self.mu.data.copy_(torch.Tensor(cluster_centers))
  self.train()
- for epoch in range(max_epochs):
+ for epoch in range(epochs):
  if epoch % update_interval == 0:
  _, q = self.forward(torch.FloatTensor(X), torch.FloatTensor(adj))
  p = self.target_distribution(q).data
@@ -546,20 +546,20 @@ def set_l(self, l):
  """
  self.l = l
 
- def search_set_res(self, x, l, target_num, start=0.4, step=0.1, tol=5e-3, lr=0.05, max_epochs=10, max_run=10):
+ def search_set_res(self, x, l, target_num, start=0.4, step=0.1, tol=5e-3, lr=0.05, epochs=10, max_run=10):
  """Search for optimal resolution parameter."""
  res = start
  logger.info(f"Start at {res = :.4f}, {step = :.4f}")
  clf = SpaGCN(l)
- y_pred = clf.fit_predict(x, init_spa=True, init="louvain", res=res, tol=tol, lr=lr, max_epochs=max_epochs)
+ y_pred = clf.fit_predict(x, init_spa=True, init="louvain", res=res, tol=tol, lr=lr, epochs=epochs)
  old_num = len(set(y_pred))
  logger.info(f"Res = {res:.4f}, Num of clusters = {old_num}")
  run = 0
  while old_num != target_num:
  old_sign = 1 if (old_num < target_num) else -1
  clf = SpaGCN(l)
  y_pred = clf.fit_predict(x, init_spa=True, init="louvain", res=res + step * old_sign, tol=tol, lr=lr,
- max_epochs=max_epochs)
+ epochs=epochs)
  new_num = len(set(y_pred))
  logger.info(f"Res = {res + step * old_sign:.3e}, Num of clusters = {new_num}")
  if new_num == target_num:
@@ -586,7 +586,7 @@ def calc_adj_exp(self, adj: np.ndarray) -> np.ndarray:
  adj_exp = np.exp(-1 * (adj**2) / (2 * (self.l**2)))
  return adj_exp
 
- def fit(self, x, y=None, *, num_pcs=50, lr=0.005, max_epochs=2000, weight_decay=0, opt="admin", init_spa=True,
+ def fit(self, x, y=None, *, num_pcs=50, lr=0.005, epochs=2000, weight_decay=0, opt="admin", init_spa=True,
  init="louvain", n_neighbors=10, n_clusters=None, res=0.4, tol=1e-3):
  """Fit function for model training.
 
@@ -600,7 +600,7 @@ def fit(self, x, y=None, *, num_pcs=50, lr=0.005, max_epochs=2000, weight_decay=
  The number of component used in PCA.
  lr : float
  Learning rate.
- max_epochs : int
+ epochs : int
  Maximum number of epochs.
  weight_decay : float
  Weight decay.
@@ -624,7 +624,7 @@ def fit(self, x, y=None, *, num_pcs=50, lr=0.005, max_epochs=2000, weight_decay=
  self.num_pcs = num_pcs
  self.res = res
  self.lr = lr
- self.max_epochs = max_epochs
+ self.epochs = epochs
  self.weight_decay = weight_decay
  self.opt = opt
  self.init_spa = init_spa
@@ -638,9 +638,9 @@ def fit(self, x, y=None, *, num_pcs=50, lr=0.005, max_epochs=2000, weight_decay=
 
  self.model = SimpleGCDEC(embed.shape[1], embed.shape[1])
  adj_exp = self.calc_adj_exp(adj)
- self.model.fit(embed, adj_exp, lr=self.lr, max_epochs=self.max_epochs, weight_decay=self.weight_decay,
- opt=self.opt, init_spa=self.init_spa, init=self.init, n_neighbors=self.n_neighbors,
- n_clusters=self.n_clusters, res=self.res, tol=self.tol)
+ self.model.fit(embed, adj_exp, lr=self.lr, epochs=self.epochs, weight_decay=self.weight_decay, opt=self.opt,
+ init_spa=self.init_spa, init=self.init, n_neighbors=self.n_neighbors, n_clusters=self.n_clusters,
+ res=self.res, tol=self.tol)
 
  def predict_proba(self, x):
  """Prediction function.

diff --git a/examples/spatial/spatial_domain/spagcn.py b/examples/spatial/spatial_domain/spagcn.py
@@ -18,7 +18,7 @@
  parser.add_argument("--end", type=float, default=1000, help="ending value for searching l.")
  parser.add_argument("--tol", type=float, default=5e-3, help="tolerant value for searching l.")
  parser.add_argument("--max_run", type=int, default=200, help="max runs.")
- parser.add_argument("--max_epochs", type=int, default=200, help="max epochs.")
+ parser.add_argument("--epochs", type=int, default=200, help="Number of epochs.")
  parser.add_argument("--n_clusters", type=int, default=7, help="the number of clusters")
  parser.add_argument("--step", type=float, default=0.1, help="")
  parser.add_argument("--lr", type=float, default=0.05, help="learning rate")
@@ -39,10 +39,10 @@
  l = model.search_l(args.p, adj, start=args.start, end=args.end, tol=args.tol, max_run=args.max_run)
  model.set_l(l)
  res = model.search_set_res((x, adj), l=l, target_num=args.n_clusters, start=0.4, step=args.step, tol=args.tol,
- lr=args.lr, max_epochs=args.max_epochs, max_run=args.max_run)
+ lr=args.lr, epochs=args.epochs, max_run=args.max_run)
 
- pred = model.fit_predict((x, adj), init_spa=True, init="louvain", tol=args.tol, lr=args.lr,
- max_epochs=args.max_epochs, res=res)
+ pred = model.fit_predict((x, adj), init_spa=True, init="louvain", tol=args.tol, lr=args.lr, epochs=args.epochs,
+ res=res)
  score = model.default_score_func(y, pred)
  print(f"ARI: {score:.4f}")
 

diff --git a/requirements.txt b/requirements.txt
@@ -5,6 +5,7 @@ networkx==3.0
 numba==0.56.2
 numpy==1.22.4
 opencv-python==4.7.0.68
+openpyxl==3.1.1
 pandas==1.5.3
 pyro-ppl==1.8.4
 python-igraph==0.10.1

diff --git a/setup.cfg b/setup.cfg
@@ -38,6 +38,7 @@ install_requires =
  numba
  numpy
  opencv-python
+ openpyxl
  pandas
  pyro-ppl
  python-igraph

diff --git a/tests/test_bench.py b/tests/test_bench.py
@@ -29,10 +29,10 @@
  "cell_type_annotation-singlecellnet-spleen": "--species mouse --tissue Spleen --train_dataset 1970 --test_dataset 1759",
  "cell_type_annotation-svm-spleen": "--species mouse --tissue Spleen --train_dataset 1970 --test_dataset 1759",
  "clustering-graphsc-10X_PBMC": "--dataset 10X_PBMC --epochs 2",
- "clustering-scdcc-10X_PBMC": "--data_file 10X_PBMC --label_cells_files label_10X_PBMC.txt --gamma 1.5 --maxiter 2 --pretrain_epochs 2",
- "clustering-scdeepcluster-10X_PBMC": "--data_file 10X_PBMC --pretrain_epochs 2",
- "clustering-scdsc-10X_PBMC": "--name 10X_PBMC --method cos --topk 30 --v 7 --binary_crossentropy_loss 0.75 --ce_loss 0.5 --re_loss 0.1 --zinb_loss 2.5 --sigma 0.4 --n_epochs 2 --pretrain_epochs 2",
- "clustering-sctag-10X_PBMC": "--pretrain_epochs 2 --epochs 2 --data_file 10X_PBMC --W_a 0.01 --W_x 3 --W_c 0.1 --dropout 0.5",
+ "clustering-scdcc-10X_PBMC": "--dataset 10X_PBMC --label_cells_files label_10X_PBMC.txt --gamma 1.5 --epochs 2 --pretrain_epochs 2",
+ "clustering-scdeepcluster-10X_PBMC": "--dataset 10X_PBMC --pretrain_epochs 2",
+ "clustering-scdsc-10X_PBMC": "--dataset 10X_PBMC --method cosine --topk 30 --v 7 --binary_crossentropy_loss 0.75 --ce_loss 0.5 --re_loss 0.1 --zinb_loss 2.5 --sigma 0.4 --epochs 2 --pretrain_epochs 2",
+ "clustering-sctag-10X_PBMC": "--pretrain_epochs 2 --epochs 2 --dataset 10X_PBMC --w_a 0.01 --w_x 3 --w_c 0.1 --dropout 0.5",
  "imputation-deepimpute-brain": "--train_dataset mouse_brain_data --filetype h5 --hidden_dim 200 --dropout 0.4 --n_epochs 2 --gpu 0",
  "imputation-graphsci-brain": "--train_dataset mouse_brain_data --gpu 0 --n_epochs 2",
  "imputation-scgnn-brain": "--train_dataset mouse_brain_data --Regu_epochs 2 --EM_epochs 2 --cluster_epochs 2 --GAEepochs 2 --gpu 0",
@@ -80,22 +80,22 @@
  "clustering-graphsc-mouse_ES_cell": "--dataset mouse_ES_cell",
  "clustering-graphsc-mouse_bladder_cell": "--dataset mouse_bladder_cell",
  "clustering-graphsc-worm_neuron_cell": "--dataset worm_neuron_cell",
- "clustering-scdcc-10X_PBMC": "--data_file 10X_PBMC --label_cells_files label_10X_PBMC.txt --gamma 1.5",
- "clustering-scdcc-mouse_ES_cell": "--data_file mouse_ES_cell --label_cells_files label_mouse_ES_cell.txt --gamma 1 --ml_weight 0.8 --cl_weight 0.8",
- "clustering-scdcc-mouse_bladder_cell": "--data_file mouse_bladder_cell --label_cells_files label_mouse_bladder_cell.txt --gamma 1.5 --pretrain_epochs 100 --sigma 3",
- "clustering-scdcc-worm_neuron_cell": "--data_file worm_neuron_cell --label_cells_files label_worm_neuron_cell.txt --gamma 1 --pretrain_epochs 300",
- "clustering-scdeepcluster-10X_PBMC": "--data_file 10X_PBMC",
- "clustering-scdeepcluster-mouse_ES_cell": "--data_file mouse_ES_cell",
- "clustering-scdeepcluster-mouse_bladder_cell": "--data_file mouse_bladder_cell --pretrain_epochs 300 --sigma 2.75",
- "clustering-scdeepcluster-worm_neuron_cell": "--data_file worm_neuron_cell --pretrain_epochs 300",
- "clustering-scdsc-10X_PBMC": "--name 10X_PBMC --method cos --topk 30 --v 7 --binary_crossentropy_loss 0.75 --ce_loss 0.5 --re_loss 0.1 --zinb_loss 2.5 --sigma 0.4",
- "clustering-scdsc-mouse_ES_cell": "--name mouse_ES_cell --method heat --topk 50 --v 7 --binary_crossentropy_loss 0.1 --ce_loss 0.01 --re_loss 1.5 --zinb_loss 0.5 --sigma 0.1",
- "clustering-scdsc-mouse_bladder_cell": "--name mouse_bladder_cell --method p --topk 50 --v 7 --binary_crossentropy_loss 2.5 --ce_loss 0.1 --re_loss 0.5 --zinb_loss 1.5 --sigma 0.6",
- "clustering-scdsc-worm_neuron_cell": "--name worm_neuron_cell --method p --topk 20 --v 7 --binary_crossentropy_loss 2 --ce_loss 2 --re_loss 3 --zinb_loss 0.1 --sigma 0.4",
- "clustering-sctag-10X_PBMC": "--pretrain_epochs 100 --data_file 10X_PBMC --W_a 0.01 --W_x 3 --W_c 0.1 --dropout 0.5",
- "clustering-sctag-mouse_ES_cell": "--data_file mouse_ES_cell --W_a 0.01 --W_x 2 --W_c 0.25 --k 1",
- "clustering-sctag-mouse_bladder_cell": "--pretrain_epochs 100 --data_file mouse_bladder_cell --W_a 0.01 --W_x 0.75 --W_c 1",
- "clustering-sctag-worm_neuron_cell": "--pretrain_epochs 100 --data_file worm_neuron_cell --W_a 0.1 --W_x 2.5 --W_c 3",
+ "clustering-scdcc-10X_PBMC": "--dataset 10X_PBMC --label_cells_files label_10X_PBMC.txt --gamma 1.5",
+ "clustering-scdcc-mouse_ES_cell": "--dataset mouse_ES_cell --label_cells_files label_mouse_ES_cell.txt --gamma 1 --ml_weight 0.8 --cl_weight 0.8",
+ "clustering-scdcc-mouse_bladder_cell": "--dataset mouse_bladder_cell --label_cells_files label_mouse_bladder_cell.txt --gamma 1.5 --pretrain_epochs 100 --sigma 3",
+ "clustering-scdcc-worm_neuron_cell": "--dataset worm_neuron_cell --label_cells_files label_worm_neuron_cell.txt --gamma 1 --pretrain_epochs 300",
+ "clustering-scdeepcluster-10X_PBMC": "--dataset 10X_PBMC",
+ "clustering-scdeepcluster-mouse_ES_cell": "--dataset mouse_ES_cell",
+ "clustering-scdeepcluster-mouse_bladder_cell": "--dataset mouse_bladder_cell --pretrain_epochs 300 --sigma 2.75",
+ "clustering-scdeepcluster-worm_neuron_cell": "--dataset worm_neuron_cell --pretrain_epochs 300",
+ "clustering-scdsc-10X_PBMC": "--dataset 10X_PBMC --method cosine --topk 30 --v 7 --binary_crossentropy_loss 0.75 --ce_loss 0.5 --re_loss 0.1 --zinb_loss 2.5 --sigma 0.4",
+ "clustering-scdsc-mouse_ES_cell": "--dataset mouse_ES_cell --method cosine --topk 50 --v 7 --binary_crossentropy_loss 0.1 --ce_loss 0.01 --re_loss 1.5 --zinb_loss 0.5 --sigma 0.1",
+ "clustering-scdsc-mouse_bladder_cell": "--dataset mouse_bladder_cell --method correlation --topk 50 --v 7 --binary_crossentropy_loss 2.5 --ce_loss 0.1 --re_loss 0.5 --zinb_loss 1.5 --sigma 0.6",
+ "clustering-scdsc-worm_neuron_cell": "--dataset worm_neuron_cell --method correlation --topk 20 --v 7 --binary_crossentropy_loss 2 --ce_loss 2 --re_loss 3 --zinb_loss 0.1 --sigma 0.4",
+ "clustering-sctag-10X_PBMC": "--pretrain_epochs 100 --dataset 10X_PBMC --w_a 0.01 --w_x 3 --w_c 0.1 --dropout 0.5",
+ "clustering-sctag-mouse_ES_cell": "--dataset mouse_ES_cell --w_a 0.01 --w_x 2 --w_c 0.25 --k 1",
+ "clustering-sctag-mouse_bladder_cell": "--pretrain_epochs 100 --dataset mouse_bladder_cell --w_a 0.01 --w_x 0.75 --w_c 1",
+ "clustering-sctag-worm_neuron_cell": "--pretrain_epochs 100 --dataset worm_neuron_cell --w_a 0.1 --w_x 2.5 --w_c 3",
  "imputation-deepimpute-brain": "--train_dataset mouse_brain_data --filetype h5 --hidden_dim 200 --dropout 0.4",
  "imputation-deepimpute-embryo": "--train_dataset mouse_embryo_data --filetype gz --hidden_dim 200 --dropout 0.4",
  "imputation-graphsci-brain": "--train_dataset mouse_brain_data --gpu 0",
@@ -104,13 +104,13 @@
  "imputation-scgnn-embryo": "--train_dataset mouse_embryo_data --gpu 0",
  # Multi modality
  "joint_embedding-dcca-gex_adt": "--subtask openproblems_bmmc_cite_phase2 --device cuda",
- "joint_embedding-dcca-gex_adt": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
+ "joint_embedding-dcca-gex_atac": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
  "joint_embedding-jae-gex_adt": "--subtask openproblems_bmmc_cite_phase2 --device cuda",
- "joint_embedding-jae-gex_adt": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
+ "joint_embedding-jae-gex_atac": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
  "joint_embedding-scmogcn-gex_adt": "--subtask openproblems_bmmc_cite_phase2 --device cuda",
- "joint_embedding-scmogcn-gex_adt": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
+ "joint_embedding-scmogcn-gex_atac": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
  "joint_embedding-scmvae-gex_adt": "--subtask openproblems_bmmc_cite_phase2 --device cuda",
- "joint_embedding-scmvae-gex_adt": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
+ "joint_embedding-scmvae-gex_atac": "--subtask openproblems_bmmc_multiome_phase2 --device cuda",
  "match_modality-cmae-gex_adt": "--subtask openproblems_bmmc_cite_phase2_rna --device cuda",
  "match_modality-cmae-gex_atac": "--subtask openproblems_bmmc_multiome_phase2_rna --device cuda",
  "match_modality-scmm-gex_adt": "--subtask openproblems_bmmc_cite_phase2_rna --device cuda",