diff --git a/notebook/GMM_tf_and_targ_bonemarrow.jl b/notebook/GMM_tf_and_targ_bonemarrow.jl index 448c406..c715249 100644 --- a/notebook/GMM_tf_and_targ_bonemarrow.jl +++ b/notebook/GMM_tf_and_targ_bonemarrow.jl @@ -1,6 +1,6 @@ using Logging -using CDGRN +using CDGRNs using SnowyOwl using DataFrames using JLD2 @@ -11,9 +11,9 @@ Gadfly.set_default_plot_size(8inch, 6inch) ## Load data -dir = joinpath(CDGRN.PROJECT_PATH, "results", "bonemarrow") +dir = joinpath(CDGRNs.PROJECT_PATH, "results", "bonemarrow") prof = load_profile(dir) -tf_set = CDGRN.load_tfs(joinpath(dir, "tf_set.jld2")) +tf_set = CDGRNs.load_tfs(joinpath(dir, "tf_set.jld2")) tfs = select_genes!(copy(prof), tf_set) select_high_likelihood!(prof) @@ -32,7 +32,7 @@ function log_likelihood_plot(df, tf_name, gene_name, mix_logpdf; coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) xlabel = "log spliced RNA of TF gene, $(tf_name)" ylabel = "log unspliced RNA of target gene, $(gene_name)" - filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "bonemarrow", "$(tf_name)-$(gene_name) log likelihood plot.svg") + filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "bonemarrow", "$(tf_name)-$(gene_name) log likelihood plot.svg") plot(l1, l2, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch) end @@ -42,7 +42,7 @@ function cluster_plot(df, tf_name, gene_name; xmax=ceil(maximum(df.logX)), xmin= coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) xlabel = "log spliced RNA of TF gene, $(tf_name)" ylabel = "log unspliced RNA of target gene, $(gene_name)" - filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "bonemarrow", "$(tf_name)-$(gene_name) log cluster plot.svg") + filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "bonemarrow", "$(tf_name)-$(gene_name) log cluster plot.svg") plot(l, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch) end diff --git a/notebook/GMM_tf_and_targ_dentategyrus.jl b/notebook/GMM_tf_and_targ_dentategyrus.jl index 599eceb..f528e51 100644 --- a/notebook/GMM_tf_and_targ_dentategyrus.jl +++ b/notebook/GMM_tf_and_targ_dentategyrus.jl @@ -1,6 +1,6 @@ using Logging -using CDGRN +using CDGRNs using SnowyOwl using DataFrames using JLD2 @@ -11,9 +11,9 @@ Gadfly.set_default_plot_size(8inch, 6inch) ## Load data -dir = joinpath(CDGRN.PROJECT_PATH, "results", "dentategyrus") +dir = joinpath(CDGRNs.PROJECT_PATH, "results", "dentategyrus") prof = load_profile(dir) -tf_set = CDGRN.load_tfs(joinpath(dir, "tf_set.jld2")) +tf_set = CDGRNs.load_tfs(joinpath(dir, "tf_set.jld2")) tfs = select_genes!(copy(prof), tf_set) select_high_likelihood!(prof) @@ -32,7 +32,7 @@ function log_likelihood_plot(df, tf_name, gene_name, mix_logpdf; coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) xlabel = "log spliced RNA of TF gene, $(tf_name)" ylabel = "log unspliced RNA of target gene, $(gene_name)" - filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "dentategyrus", "$(tf_name)-$(gene_name) log likelihood plot.svg") + filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "$(tf_name)-$(gene_name) log likelihood plot.svg") plot(l1, l2, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch) end @@ -42,7 +42,7 @@ function cluster_plot(df, tf_name, gene_name; xmax=ceil(maximum(df.logX)), xmin= coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) xlabel = "log spliced RNA of TF gene, $(tf_name)" ylabel = "log unspliced RNA of target gene, $(gene_name)" - filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "dentategyrus", "$(tf_name)-$(gene_name) log cluster plot.svg") + filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "$(tf_name)-$(gene_name) log cluster plot.svg") plot(l, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch) end diff --git a/notebook/GMM_tf_and_targ_gastrulation_erythroid.jl b/notebook/GMM_tf_and_targ_gastrulation_erythroid.jl index 33bde62..f4e2f44 100644 --- a/notebook/GMM_tf_and_targ_gastrulation_erythroid.jl +++ b/notebook/GMM_tf_and_targ_gastrulation_erythroid.jl @@ -1,6 +1,6 @@ using Logging -using CDGRN +using CDGRNs using SnowyOwl using DataFrames using JLD2 @@ -11,9 +11,9 @@ Gadfly.set_default_plot_size(8inch, 6inch) ## Load data -dir = joinpath(CDGRN.PROJECT_PATH, "results", "gastrulation_erythroid") +dir = joinpath(CDGRNs.PROJECT_PATH, "results", "gastrulation_erythroid") prof = load_profile(dir) -tf_set = CDGRN.load_tfs(joinpath(dir, "tf_set.jld2")) +tf_set = CDGRNs.load_tfs(joinpath(dir, "tf_set.jld2")) tfs = select_genes!(copy(prof), tf_set) select_high_likelihood!(prof) @@ -32,7 +32,7 @@ function log_likelihood_plot(df, tf_name, gene_name, mix_logpdf; coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) xlabel = "log spliced RNA of TF gene, $(tf_name)" ylabel = "log unspliced RNA of target gene, $(gene_name)" - filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "gastrulation_erythroid", "$(tf_name)-$(gene_name) log likelihood plot.svg") + filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "gastrulation_erythroid", "$(tf_name)-$(gene_name) log likelihood plot.svg") plot(l1, l2, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch) end @@ -42,7 +42,7 @@ function cluster_plot(df, tf_name, gene_name; xmax=ceil(maximum(df.logX)), xmin= coord = Coord.cartesian(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax) xlabel = "log spliced RNA of TF gene, $(tf_name)" ylabel = "log unspliced RNA of target gene, $(gene_name)" - filepath = joinpath(CDGRN.PROJECT_PATH, "pics", "gastrulation_erythroid", "$(tf_name)-$(gene_name) log cluster plot.svg") + filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "gastrulation_erythroid", "$(tf_name)-$(gene_name) log cluster plot.svg") plot(l, coord, Guide.xlabel(xlabel), Guide.ylabel(ylabel)) |> SVG(filepath, 10inch, 6inch) end diff --git a/notebook/UMAP_bonemarrow.jl b/notebook/UMAP_bonemarrow.jl index d165262..edd4f26 100644 --- a/notebook/UMAP_bonemarrow.jl +++ b/notebook/UMAP_bonemarrow.jl @@ -1,8 +1,5 @@ using CDGRNs using DataFrames -using CSV -using JLD2 -using FileIO using UMAP using Plots using StatsPlots @@ -28,20 +25,68 @@ filename = joinpath(dir, "GMM-model-selection-result.jld2") cor_pairs, nonsingle_pairs = regulation_correlation(filename) true_regulations, true_reg_pairs = remove_spurious_pairs(cor_pairs, nonsingle_pairs) +k = 9 +tree, cell_clusters = build_tree(prof, true_reg_pairs, col_palette=:default) +extract_context!(cell_clusters, tree, k) + df = get_regulation_expr(prof, tfs, true_regulations) embedding = umap(Array(df[:, 3:end])', 2, n_neighbors=20, min_dist=0.5) df.UMAP1 = embedding[1, :] df.UMAP2 = embedding[2, :] +df.k9 = cell_clusters.k9 + +plot_configs = (markersize=2, markerstrokewidth=0, dpi=300, size=(1000, 600), thickness_scaling=2, widen=false) -plot_configs = (markersize=2, markerstrokewidth=0, dpi=300) +p = @df df scatter(:UMAP1, :UMAP2; group=:cell, xlabel="UMAP1", ylabel="UMAP2", color_palette=:glasbey_hv_n256, + legend=:outertopright, markersize=2, markerstrokewidth=0, + dpi=300, size=(1000, 600), thickness_scaling=2, widen=false) +savefig(p, joinpath(fig_dir, "UMAP", "umap-cell type.svg")) +savefig(p, joinpath(fig_dir, "UMAP", "umap-cell type.png")) -@df df scatter(:UMAP1, :UMAP2; group=:cell, xlabel="UMAP1", ylabel="UMAP2", color_palette=:glasbey_hv_n256, legend=:outertopright, plot_configs...) -savefig(joinpath(fig_dir, "UMAP", "umap-cell type.svg")) -savefig(joinpath(fig_dir, "UMAP", "umap-cell type.png")) +p = @df df scatter(:UMAP1, :UMAP2; group=:k9, xlabel="UMAP1", ylabel="UMAP2", color_palette=:glasbey_hv_n256, + legend=:outerright, markersize=2, markerstrokewidth=0, + dpi=300, size=(800, 600), thickness_scaling=2, widen=false) +savefig(p, joinpath(fig_dir, "UMAP", "umap-context.svg")) +savefig(p, joinpath(fig_dir, "UMAP", "umap-context.png")) -@df df scatter(:UMAP1, :UMAP2; zcolor=:time, c=:coolwarm, xlabel="UMAP1", ylabel="UMAP2", +p = @df df scatter(:UMAP1, :UMAP2; zcolor=:time, c=:coolwarm, xlabel="UMAP1", ylabel="UMAP2", legend=false, cb=:outerright, plot_configs...) -savefig(joinpath(fig_dir, "UMAP", "umap-latent time.svg")) -savefig(joinpath(fig_dir, "UMAP", "umap-latent time.png")) +savefig(p, joinpath(fig_dir, "UMAP", "umap-latent time.svg")) +savefig(p, joinpath(fig_dir, "UMAP", "umap-latent time.png")) + + +# Train CDGRNs over contexts + +cortable = train_cdgrns(tfs, prof, true_regulations, + cell_clusters.k9, [2, 3, 6, 7, 8], + joinpath(dir, "cdgrn_k9")) + + +# Network entropy + +cdgrn_stats = DataFrame(cntx=String[], V=Int[], E=Int[], entropy=Float64[]) +for i in [2, 7, 6, 3] + E = nrow(cortable[i]) + V = length(unique(vcat(cortable[i].tf, cortable[i].target))) + cortable[i].dist = cor2dist.(cortable[i].ρ) + entr = network_entropy(to_graph(cortable[i])) + push!(cdgrn_stats, ("$i", V, E, entr)) +end +cdgrn_stats[!, :order] = [1, 2, 3, 4] + +p = @df cdgrn_stats plot(:order, [:V, :E], + label=["Number of node" "Number of edge"], + xlabel="Context", ylabel="Network size", legend=:topright, + thickness_scaling=2, widen=false, dpi=300, size=(800, 600) +) +xticks!([1:4;], cdgrn_stats[!,:cntx]) +filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "bonemarrow", "CDGRN", "network_size k9.png") +savefig(p, filepath) -CSV.write(joinpath(dir, "umap.tsv"), df, delim='\t') +p = @df cdgrn_stats plot(:order, :entropy, label="Network entropy", + xlabel="Context", ylabel="Network entropy", legend=:topright, + thickness_scaling=2, widen=false, dpi=300, size=(800, 600) +) +xticks!([1:4;], cdgrn_stats[!,:cntx]) +filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "bonemarrow", "CDGRN", "network_entropy k9.png") +savefig(p, filepath) diff --git a/notebook/train_dentategyrus_cdgrn k6.jl b/notebook/train_dentategyrus_cdgrn k6.jl index dc7cc93..2139728 100644 --- a/notebook/train_dentategyrus_cdgrn k6.jl +++ b/notebook/train_dentategyrus_cdgrn k6.jl @@ -66,13 +66,13 @@ p = @df cdgrn_stats plot(:order, [:V, :E], xlabel="Context", ylabel="Network size" ) xticks!([1:4;], cdgrn_stats[!,:cntx]) -filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "CDGRN", "network_size k9.png") +filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "CDGRN", "network_size k6.png") savefig(p, filepath) p = @df cdgrn_stats plot(:order, :entropy, xlabel="Context", ylabel="Network entropy", legend=false) xticks!([1:4;], cdgrn_stats[!,:cntx]) -filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "CDGRN", "network_entropy k9.png") +filepath = joinpath(CDGRNs.PROJECT_PATH, "pics", "dentategyrus", "CDGRN", "network_entropy k6.png") savefig(p, filepath)