Skip to content

Commit

Permalink
refine figures
Browse files Browse the repository at this point in the history
  • Loading branch information
yuehhua committed Jun 26, 2022
1 parent ec3aa81 commit 5fc9cbb
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 28 deletions.
10 changes: 5 additions & 5 deletions notebook/GMM_tf_and_targ_bonemarrow.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Logging

using CDGRN
using CDGRNs
using SnowyOwl
using DataFrames
using JLD2
Expand All @@ -11,9 +11,9 @@ Gadfly.set_default_plot_size(8inch, 6inch)

## Load data

dir = joinpath(CDGRN.PROJECT_PATH, "results", "bonemarrow")
dir = joinpath(CDGRNs.PROJECT_PATH, "results", "bonemarrow")
prof = load_profile(dir)
tf_set = CDGRN.load_tfs(joinpath(dir, "tf_set.jld2"))
tf_set = CDGRNs.load_tfs(joinpath(dir, "tf_set.jld2"))
tfs = select_genes!(copy(prof), tf_set)

select_high_likelihood!(prof)
Expand All @@ -32,7 +32,7 @@ function log_likelihood_plot(df, tf_name, gene_name, mix_logpdf;
coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
xlabel = "log spliced RNA of TF gene, $(tf_name)"
ylabel = "log unspliced RNA of target gene, $(gene_name)"
filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "bonemarrow", "$(tf_name)-$(gene_name) log likelihood plot.svg")
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "bonemarrow", "$(tf_name)-$(gene_name) log likelihood plot.svg")
plot(l1, l2, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch)
end

Expand All @@ -42,7 +42,7 @@ function cluster_plot(df, tf_name, gene_name; xmax=ceil(maximum(df.logX)), xmin=
coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
xlabel = "log spliced RNA of TF gene, $(tf_name)"
ylabel = "log unspliced RNA of target gene, $(gene_name)"
filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "bonemarrow", "$(tf_name)-$(gene_name) log cluster plot.svg")
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "bonemarrow", "$(tf_name)-$(gene_name) log cluster plot.svg")
plot(l, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch)
end

Expand Down
10 changes: 5 additions & 5 deletions notebook/GMM_tf_and_targ_dentategyrus.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Logging

using CDGRN
using CDGRNs
using SnowyOwl
using DataFrames
using JLD2
Expand All @@ -11,9 +11,9 @@ Gadfly.set_default_plot_size(8inch, 6inch)

## Load data

dir = joinpath(CDGRN.PROJECT_PATH, "results", "dentategyrus")
dir = joinpath(CDGRNs.PROJECT_PATH, "results", "dentategyrus")
prof = load_profile(dir)
tf_set = CDGRN.load_tfs(joinpath(dir, "tf_set.jld2"))
tf_set = CDGRNs.load_tfs(joinpath(dir, "tf_set.jld2"))
tfs = select_genes!(copy(prof), tf_set)

select_high_likelihood!(prof)
Expand All @@ -32,7 +32,7 @@ function log_likelihood_plot(df, tf_name, gene_name, mix_logpdf;
coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
xlabel = "log spliced RNA of TF gene, $(tf_name)"
ylabel = "log unspliced RNA of target gene, $(gene_name)"
filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "dentategyrus", "$(tf_name)-$(gene_name) log likelihood plot.svg")
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "$(tf_name)-$(gene_name) log likelihood plot.svg")
plot(l1, l2, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch)
end

Expand All @@ -42,7 +42,7 @@ function cluster_plot(df, tf_name, gene_name; xmax=ceil(maximum(df.logX)), xmin=
coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
xlabel = "log spliced RNA of TF gene, $(tf_name)"
ylabel = "log unspliced RNA of target gene, $(gene_name)"
filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "dentategyrus", "$(tf_name)-$(gene_name) log cluster plot.svg")
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "$(tf_name)-$(gene_name) log cluster plot.svg")
plot(l, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch)
end

Expand Down
10 changes: 5 additions & 5 deletions notebook/GMM_tf_and_targ_gastrulation_erythroid.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Logging

using CDGRN
using CDGRNs
using SnowyOwl
using DataFrames
using JLD2
Expand All @@ -11,9 +11,9 @@ Gadfly.set_default_plot_size(8inch, 6inch)

## Load data

dir = joinpath(CDGRN.PROJECT_PATH, "results", "gastrulation_erythroid")
dir = joinpath(CDGRNs.PROJECT_PATH, "results", "gastrulation_erythroid")
prof = load_profile(dir)
tf_set = CDGRN.load_tfs(joinpath(dir, "tf_set.jld2"))
tf_set = CDGRNs.load_tfs(joinpath(dir, "tf_set.jld2"))
tfs = select_genes!(copy(prof), tf_set)

select_high_likelihood!(prof)
Expand All @@ -32,7 +32,7 @@ function log_likelihood_plot(df, tf_name, gene_name, mix_logpdf;
coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
xlabel = "log spliced RNA of TF gene, $(tf_name)"
ylabel = "log unspliced RNA of target gene, $(gene_name)"
filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "gastrulation_erythroid", "$(tf_name)-$(gene_name) log likelihood plot.svg")
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "gastrulation_erythroid", "$(tf_name)-$(gene_name) log likelihood plot.svg")
plot(l1, l2, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch)
end

Expand All @@ -42,7 +42,7 @@ function cluster_plot(df, tf_name, gene_name; xmax=ceil(maximum(df.logX)), xmin=
coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
xlabel = "log spliced RNA of TF gene, $(tf_name)"
ylabel = "log unspliced RNA of target gene, $(gene_name)"
filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "gastrulation_erythroid", "$(tf_name)-$(gene_name) log cluster plot.svg")
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "gastrulation_erythroid", "$(tf_name)-$(gene_name) log cluster plot.svg")
plot(l, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch)
end

Expand Down
67 changes: 56 additions & 11 deletions notebook/UMAP_bonemarrow.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
using CDGRNs
using DataFrames
using CSV
using JLD2
using FileIO
using UMAP
using Plots
using StatsPlots
Expand All @@ -28,20 +25,68 @@ filename = joinpath(dir, "GMM-model-selection-result.jld2")
cor_pairs, nonsingle_pairs = regulation_correlation(filename)
true_regulations, true_reg_pairs = remove_spurious_pairs(cor_pairs, nonsingle_pairs)

k = 9
tree, cell_clusters = build_tree(prof, true_reg_pairs, col_palette=:default)
extract_context!(cell_clusters, tree, k)

df = get_regulation_expr(prof, tfs, true_regulations)
embedding = umap(Array(df[:, 3:end])', 2, n_neighbors=20, min_dist=0.5)
df.UMAP1 = embedding[1, :]
df.UMAP2 = embedding[2, :]
df.k9 = cell_clusters.k9

plot_configs = (markersize=2, markerstrokewidth=0, dpi=300, size=(1000, 600), thickness_scaling=2, widen=false)

plot_configs = (markersize=2, markerstrokewidth=0, dpi=300)
p = @df df scatter(:UMAP1, :UMAP2; group=:cell, xlabel="UMAP1", ylabel="UMAP2", color_palette=:glasbey_hv_n256,
legend=:outertopright, markersize=2, markerstrokewidth=0,
dpi=300, size=(1000, 600), thickness_scaling=2, widen=false)
savefig(p, joinpath(fig_dir, "UMAP", "umap-cell type.svg"))
savefig(p, joinpath(fig_dir, "UMAP", "umap-cell type.png"))

@df df scatter(:UMAP1, :UMAP2; group=:cell, xlabel="UMAP1", ylabel="UMAP2", color_palette=:glasbey_hv_n256, legend=:outertopright, plot_configs...)
savefig(joinpath(fig_dir, "UMAP", "umap-cell type.svg"))
savefig(joinpath(fig_dir, "UMAP", "umap-cell type.png"))
p = @df df scatter(:UMAP1, :UMAP2; group=:k9, xlabel="UMAP1", ylabel="UMAP2", color_palette=:glasbey_hv_n256,
legend=:outerright, markersize=2, markerstrokewidth=0,
dpi=300, size=(800, 600), thickness_scaling=2, widen=false)
savefig(p, joinpath(fig_dir, "UMAP", "umap-context.svg"))
savefig(p, joinpath(fig_dir, "UMAP", "umap-context.png"))

@df df scatter(:UMAP1, :UMAP2; zcolor=:time, c=:coolwarm, xlabel="UMAP1", ylabel="UMAP2",
p = @df df scatter(:UMAP1, :UMAP2; zcolor=:time, c=:coolwarm, xlabel="UMAP1", ylabel="UMAP2",
legend=false, cb=:outerright, plot_configs...)
savefig(joinpath(fig_dir, "UMAP", "umap-latent time.svg"))
savefig(joinpath(fig_dir, "UMAP", "umap-latent time.png"))
savefig(p, joinpath(fig_dir, "UMAP", "umap-latent time.svg"))
savefig(p, joinpath(fig_dir, "UMAP", "umap-latent time.png"))


# Train CDGRNs over contexts

cortable = train_cdgrns(tfs, prof, true_regulations,
cell_clusters.k9, [2, 3, 6, 7, 8],
joinpath(dir, "cdgrn_k9"))


# Network entropy

cdgrn_stats = DataFrame(cntx=String[], V=Int[], E=Int[], entropy=Float64[])
for i in [2, 7, 6, 3]
E = nrow(cortable[i])
V = length(unique(vcat(cortable[i].tf, cortable[i].target)))
cortable[i].dist = cor2dist.(cortable[i].ρ)
entr = network_entropy(to_graph(cortable[i]))
push!(cdgrn_stats, ("$i", V, E, entr))
end
cdgrn_stats[!, :order] = [1, 2, 3, 4]

p = @df cdgrn_stats plot(:order, [:V, :E],
label=["Number of node" "Number of edge"],
xlabel="Context", ylabel="Network size", legend=:topright,
thickness_scaling=2, widen=false, dpi=300, size=(800, 600)
)
xticks!([1:4;], cdgrn_stats[!,:cntx])
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "bonemarrow", "CDGRN", "network_size k9.png")
savefig(p, filepath)

CSV.write(joinpath(dir, "umap.tsv"), df, delim='\t')
p = @df cdgrn_stats plot(:order, :entropy, label="Network entropy",
xlabel="Context", ylabel="Network entropy", legend=:topright,
thickness_scaling=2, widen=false, dpi=300, size=(800, 600)
)
xticks!([1:4;], cdgrn_stats[!,:cntx])
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "bonemarrow", "CDGRN", "network_entropy k9.png")
savefig(p, filepath)
4 changes: 2 additions & 2 deletions notebook/train_dentategyrus_cdgrn k6.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ p = @df cdgrn_stats plot(:order, [:V, :E],
xlabel="Context", ylabel="Network size"
)
xticks!([1:4;], cdgrn_stats[!,:cntx])
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "CDGRN", "network_size k9.png")
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "CDGRN", "network_size k6.png")
savefig(p, filepath)

p = @df cdgrn_stats plot(:order, :entropy,
xlabel="Context", ylabel="Network entropy", legend=false)
xticks!([1:4;], cdgrn_stats[!,:cntx])
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "CDGRN", "network_entropy k9.png")
filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "CDGRN", "network_entropy k6.png")
savefig(p, filepath)


Expand Down

0 comments on commit 5fc9cbb

Please sign in to comment.