Merge pull request #776 from LCSB-BioCore/mk-make-the-cut

make the semantics cut
LCSB-BioCore · May 8, 2023 · 74ab0d0 · 74ab0d0
2 parents 6272230 + 9173d69
commit 74ab0d0
Show file tree

Hide file tree

Showing 44 changed files with 342 additions and 354 deletions.
diff --git a/docs/src/concepts/3_custom_models.md b/docs/src/concepts/3_custom_models.md
@@ -50,10 +50,10 @@ First, define the reactions and metabolites:
 
 ```julia
 COBREXA.reaction_count(m::CircularModel) = m.size
-COBREXA.n_metabolites(m::CircularModel) = m.size
+COBREXA.metabolite_count(m::CircularModel) = m.size
 
 COBREXA.reaction_ids(m::CircularModel) = ["rxn$i" for i in 1:reaction_count(m)]
-COBREXA.metabolites(m::CircularModel) = ["met$i" for i in 1:n_metabolites(m)]
+COBREXA.metabolite_ids(m::CircularModel) = ["met$i" for i in 1:metabolite_count(m)]
 ```
 
 It is useful to re-use the already defined functions, as that improves the code

diff --git a/docs/src/concepts/4_wrappers.md b/docs/src/concepts/4_wrappers.md
@@ -108,7 +108,7 @@ modifying the reaction list, stoichiometry, and bounds:
 COBREXA.unwrap_model(x::LeakyModel) = x.mdl
 COBREXA.reaction_count(x::LeakyModel) = reaction_count(x.mdl) + 1
 COBREXA.reaction_ids(x::LeakyModel) = [reaction_ids(x.mdl); "The Leak"]
-COBREXA.stoichiometry(x::LeakyModel) = [stoichiometry(x.mdl) [m in x.leaking_metabolites ? -1.0 : 0.0 for m = metabolites(x.mdl)]]
+COBREXA.stoichiometry(x::LeakyModel) = [stoichiometry(x.mdl) [m in x.leaking_metabolites ? -1.0 : 0.0 for m = metabolite_ids(x.mdl)]]
 function COBREXA.variable_bounds(x::LeakyModel)
     (l, u) = variable_bounds(x.mdl)
     return ([l; x.leak_rate], [u; x.leak_rate])

diff --git a/docs/src/examples/02_convert_save.jl b/docs/src/examples/02_convert_save.jl
@@ -41,7 +41,7 @@ open(f -> serialize(f, sm), "myModel.stdmodel", "w")
 # The models can then be loaded back using `deserialize`:
 
 sm2 = deserialize("myModel.stdmodel")
-issetequal(metabolites(sm), metabolites(sm2))
+issetequal(metabolite_ids(sm), metabolite_ids(sm2))
 
 # This form of loading operation is usually pretty quick:
 t = @elapsed deserialize("myModel.stdmodel")

diff --git a/docs/src/examples/04_standardmodel.jl b/docs/src/examples/04_standardmodel.jl
@@ -76,7 +76,7 @@ model.genes[random_gene_id]
 # The same idea holds for both metabolites (stored as `Metabolite`s) and
 # reactions (stored as `Reaction`s). This is demonstrated below.
 
-random_metabolite_id = metabolites(model)[rand(1:n_metabolites(model))]
+random_metabolite_id = metabolite_ids(model)[rand(1:metabolite_count(model))]
 model.metabolites[random_metabolite_id]
 #
 random_reaction_id = variable_ids(model)[rand(1:variable_count(model))]

diff --git a/src/analysis/modifications/community.jl b/src/analysis/modifications/community.jl
@@ -29,7 +29,7 @@ modify_abundances(new_abundances::Vector{Float64}) =
 
         n_vars = variable_count(model)
         n_env_vars = length(model.environmental_links)
-        n_cons = length(opt_model[:mb])
+        n_cons = length(opt_model[:metabolite_eqs])
         n_objs = model isa CommunityModel ? 0 : length(model.inner.members)
 
         row_offset =
@@ -38,7 +38,7 @@ modify_abundances(new_abundances::Vector{Float64}) =
         # fix abundance coefficients of species exchanges
         for (i, j, v) in zip(findnz(env_rows)...)
             ii = i + row_offset
-            set_normalized_coefficient(opt_model[:mb][ii], opt_model[:x][j], v)
+            set_normalized_coefficient(opt_model[:metabolite_eqs][ii], opt_model[:x][j], v)
         end
 
         column_offset =
@@ -48,6 +48,10 @@ modify_abundances(new_abundances::Vector{Float64}) =
         for (i, j, v) in zip(findnz(env_link)...)
             jj = j + column_offset
             ii = i + row_offset
-            set_normalized_coefficient(opt_model[:mb][ii], opt_model[:x][jj], -v)
+            set_normalized_coefficient(
+                opt_model[:metabolite_eqs][ii],
+                opt_model[:x][jj],
+                -v,
+            )
         end
     end
diff --git a/src/analysis/reconstruction/gapfill_minimum_reactions.jl b/src/analysis/reconstruction/gapfill_minimum_reactions.jl
@@ -54,7 +54,7 @@ function gapfill_minimum_reactions(
     precache!(model)
 
     # constraints from universal reactions that can fill gaps
-    univs = _universal_stoichiometry(universal_reactions, metabolites(model))
+    univs = _universal_stoichiometry(universal_reactions, metabolite_ids(model))
 
     # add space for additional metabolites and glue with the universal reaction
     # stoichiometry
@@ -89,7 +89,7 @@ function gapfill_minimum_reactions(
     @constraint(
         opt_model,
         extended_stoichiometry * [x; ux] .==
-        [balance(model); zeros(length(univs.new_mids))]
+        [metabolite_bounds(model); zeros(length(univs.new_mids))]
     )
 
     # objective bounds

diff --git a/src/analysis/sampling/affine_hit_and_run.jl b/src/analysis/sampling/affine_hit_and_run.jl
@@ -27,7 +27,7 @@ warmup_points = warmup_from_variability(model, GLPK.Optimizer)
 samples = affine_hit_and_run(model, warmup_points, sample_iters = 101:105)
 
 # convert the result to flux (for models where the distinction matters):
-fluxes = reaction_variables_matrix(model)' * samples
+fluxes = reaction_variables_matrix(model) * samples
 ```
 """
 function affine_hit_and_run(

diff --git a/src/analysis/variability_analysis.jl b/src/analysis/variability_analysis.jl
@@ -77,7 +77,7 @@ function variability_analysis(
     variability_analysis(
         model,
         optimizer;
-        directions = s.mapping_matrix(model)[:, indexes],
+        directions = (s.mapping_matrix(model)')[:, indexes],
         kwargs...,
     )
 end

diff --git a/src/io/h5.jl b/src/io/h5.jl
@@ -23,12 +23,12 @@ make new HDF5 models.
 function save_h5_model(model::AbstractMetabolicModel, file_name::String)::HDF5Model
     rxns = variable_ids(model)
     rxnp = sortperm(rxns)
-    mets = metabolites(model)
+    mets = metabolite_ids(model)
     metp = sortperm(mets)
     h5open(file_name, "w") do f
         write(f, "metabolites", mets[metp])
         write(f, "reactions", rxns[rxnp])
-        h5_write_sparse(create_group(f, "balance"), balance(model)[metp])
+        h5_write_sparse(create_group(f, "balance"), metabolite_bounds(model)[metp])
         h5_write_sparse(create_group(f, "objective"), objective(model)[rxnp])
         h5_write_sparse(create_group(f, "stoichiometry"), stoichiometry(model)[metp, rxnp])
         let (lbs, ubs) = variable_bounds(model)

diff --git a/src/io/show/AbstractMetabolicModel.jl b/src/io/show/AbstractMetabolicModel.jl
@@ -6,6 +6,6 @@ Pretty printing of everything metabolic-modelish.
 function Base.show(io::Base.IO, ::MIME"text/plain", m::AbstractMetabolicModel)
     print(
         io,
-        "$(typeof(m))(#= $(reaction_count(m)) reactions, $(n_metabolites(m)) metabolites =#)",
+        "$(typeof(m))(#= $(reaction_count(m)) reactions, $(metabolite_count(m)) metabolites =#)",
     )
 end
diff --git a/src/reconstruction/MatrixCoupling.jl b/src/reconstruction/MatrixCoupling.jl
@@ -363,15 +363,21 @@ end
 end
 
 @_remove_fn metabolite MatrixCoupling String inplace plural begin
-    remove_metabolites!(model, Int.(indexin(metabolite_ids, metabolites(model))))
+    remove_metabolites!(
+        model,
+        Int.(indexin(metabolite_ids, Accessors.metabolite_ids(model))),
+    )
 end
 
 @_remove_fn metabolite MatrixCoupling String begin
     remove_metabolites(model, [metabolite_id])
 end
 
 @_remove_fn metabolite MatrixCoupling String plural begin
-    remove_metabolites(model, Int.(indexin(metabolite_ids, metabolites(model))))
+    remove_metabolites(
+        model,
+        Int.(indexin(metabolite_ids, Accessors.metabolite_ids(model))),
+    )
 end
 
 """

diff --git a/src/reconstruction/MatrixModel.jl b/src/reconstruction/MatrixModel.jl
@@ -12,7 +12,7 @@ function add_reactions!(model::MatrixModel, rxns::Vector{Reaction})
     ubs = zeros(length(rxns))
     for (j, rxn) in enumerate(rxns)
         req = rxn.metabolites
-        for (i, v) in zip(indexin(keys(req), metabolites(model)), values(req))
+        for (i, v) in zip(indexin(keys(req), metabolite_ids(model)), values(req))
             push!(J, j)
             push!(I, i)
             push!(V, v)
@@ -21,7 +21,7 @@ function add_reactions!(model::MatrixModel, rxns::Vector{Reaction})
         lbs[j] = rxn.lower_bound
         ubs[j] = rxn.upper_bound
     end
-    Sadd = sparse(I, J, V, n_metabolites(model), length(rxns))
+    Sadd = sparse(I, J, V, metabolite_count(model), length(rxns))
     model.S = [model.S Sadd]
     model.c = dropzeros([model.c; zeros(length(rxns))]) # does not add an objective info from rxns
     model.xu = ubs
@@ -370,7 +370,7 @@ end
             any(in.(findnz(model.S[:, ridx])[1], Ref(metabolite_idxs)))
         ],
     )
-    mask = .!in.(1:n_metabolites(model), Ref(metabolite_idxs))
+    mask = .!in.(1:metabolite_count(model), Ref(metabolite_idxs))
     model.S = model.S[mask, :]
     model.b = model.b[mask]
     model.mets = model.mets[mask]
@@ -392,15 +392,21 @@ end
 end
 
 @_remove_fn metabolite MatrixModel String inplace plural begin
-    remove_metabolites!(model, Int.(indexin(metabolite_ids, metabolites(model))))
+    remove_metabolites!(
+        model,
+        Int.(indexin(metabolite_ids, Accessors.metabolite_ids(model))),
+    )
 end
 
 @_remove_fn metabolite MatrixModel String begin
     remove_metabolites(model, [metabolite_id])
 end
 
 @_remove_fn metabolite MatrixModel String plural begin
-    remove_metabolites(model, Int.(indexin(metabolite_ids, metabolites(model))))
+    remove_metabolites(
+        model,
+        Int.(indexin(metabolite_ids, Accessors.metabolite_ids(model))),
+    )
 end
 
 """

diff --git a/src/solver.jl b/src/solver.jl
@@ -35,6 +35,12 @@ function make_optimization_model(
 
     optimization_model = Model(optimizer)
 
+    function label(semname, suffix, constraints)
+        l = Symbol(semname, :_, suffix)
+        optimization_model[l] = constraints
+        set_name.(constraints, "$l")
+    end
+
     # make the variables
     n = variable_count(model)
     @variable(optimization_model, x[1:n])
@@ -56,22 +62,30 @@ function make_optimization_model(
     end
 
     # go over the semantics and add bounds if there are any
+    # TODO for use in sampling and other things, it would be nice to have
+    # helper functions to make a complete matrix of equality and interval
+    # constraints.
     for (semname, sem) in Accessors.Internal.get_semantics()
         bounds = sem.bounds(model)
         if isnothing(bounds)
             continue
         elseif typeof(bounds) <: AbstractVector{Float64}
             # equality bounds
-            c = @constraint(optimization_model, sem.mapping_matrix(model) * x .== bounds)
-            set_name.(c, "$(semname)_eqs")
+            label(
+                semname,
+                :eqs,
+                @constraint(optimization_model, sem.mapping_matrix(model) * x .== bounds)
+            )
         elseif typeof(bounds) <: Tuple{<:AbstractVector{Float64},<:AbstractVector{Float64}}
             # lower/upper interval bounds
             slb, sub = bounds
             smtx = sem.mapping_matrix(model)
-            c = @constraint(optimization_model, slb .<= smtx * x)
-            set_name.(c, "$(semname)_lbs")
-            c = @constraint(optimization_model, smtx * x .<= sub)
-            set_name.(c, "$(semname)_ubs")
+            label(semname, :lbs, @constraint(optimization_model, slb .<= smtx * x))
+            label(semname, :ubs, @constraint(optimization_model, smtx * x .<= sub))
+            # TODO: this actually uses the semantic matrix transposed, but
+            # that's right. Fix: transpose all other semantics because having
+            # the stoichiometry in the "right" way is quite crucial for folks
+            # being able to reason about stuff.
         else
             # if the bounds returned something weird, complain loudly.
             throw(
@@ -89,9 +103,6 @@ function make_optimization_model(
         end
     end
 
-    # make stoichiometry balanced
-    @constraint(optimization_model, mb, stoichiometry(model) * x .== balance(model)) # mass balance
-
     # add coupling constraints
     C = coupling(model)
     if !isempty(C)
@@ -197,7 +208,7 @@ values_vec(:reaction, flux_balance_analysis(model, ...))
 """
 function values_vec(semantics::Symbol, res::ModelWithResult{<:Model})
     s = Accessors.Internal.semantics(semantics)
-    is_solved(res.result) ? s.mapping_matrix(res.model)' * value.(res.result[:x]) : nothing
+    is_solved(res.result) ? s.mapping_matrix(res.model) * value.(res.result[:x]) : nothing
 end
 
 """
@@ -244,7 +255,7 @@ values_dict(:reaction, flux_balance_analysis(model, ...))
 function values_dict(semantics::Symbol, res::ModelWithResult{<:Model})
     s = Accessors.Internal.semantics(semantics)
     is_solved(res.result) ?
-    Dict(s.ids(res.model) .=> s.mapping_matrix(res.model)' * value.(res.result[:x])) :
+    Dict(s.ids(res.model) .=> s.mapping_matrix(res.model) * value.(res.result[:x])) :
     nothing
 end
 

diff --git a/src/types/accessors/AbstractMetabolicModel.jl b/src/types/accessors/AbstractMetabolicModel.jl
@@ -23,7 +23,7 @@ genetic material and virtual cell volume, etc. To simplify the view of the model
 contents use [`reaction_variables`](@ref).
 """
 function variable_ids(a::AbstractMetabolicModel)::Vector{String}
-    missing_impl_error(variables, (a,))
+    missing_impl_error(variable_ids, (a,))
 end
 
 """
@@ -46,7 +46,7 @@ $(TYPEDSIGNATURES)
 Get the lower and upper solution bounds of a model.
 """
 function variable_bounds(a::AbstractMetabolicModel)::Tuple{Vector{Float64},Vector{Float64}}
-    missing_impl_error(bounds, (a,))
+    missing_impl_error(variable_bounds, (a,))
 end
 
 """
@@ -57,46 +57,6 @@ const bounds = variable_bounds
 """
 $(TYPEDSIGNATURES)
 
-Return a vector of metabolite identifiers in a model. The vector precisely
-corresponds to the rows in [`stoichiometry`](@ref) matrix.
-
-As with [`variables`](@ref)s, some metabolites in models may be virtual,
-representing purely technical equality constraints.
-"""
-function metabolites(a::AbstractMetabolicModel)::Vector{String}
-    missing_impl_error(metabolites, (a,))
-end
-
-"""
-$(TYPEDSIGNATURES)
-
-Get the number of metabolites in a model.
-"""
-function n_metabolites(a::AbstractMetabolicModel)::Int
-    length(metabolites(a))
-end
-
-"""
-$(TYPEDSIGNATURES)
-
-Get the sparse stoichiometry matrix of a model.
-"""
-function stoichiometry(a::AbstractMetabolicModel)::SparseMat
-    missing_impl_error(stoichiometry, (a,))
-end
-
-"""
-$(TYPEDSIGNATURES)
-
-Get the sparse balance vector of a model.
-"""
-function balance(a::AbstractMetabolicModel)::SparseVec
-    return spzeros(n_metabolites(a))
-end
-
-"""
-$(TYPEDSIGNATURES)
-
 Get the linear objective vector or the quadratic objective affine matrix of the
 model.
 
@@ -131,6 +91,26 @@ Shortcut for writing [`reaction_ids`](@ref).
 """
 const reactions = reaction_ids
 
+@make_variable_semantics(
+    :metabolite,
+    "metabolites",
+    """
+Metabolite values represent the over-time change of abundance of individual
+metabolites in the model. To reach a steady state, models typically constraint
+these to be zero.
+"""
+)
+
+"""
+A shortcut for [`metabolite_ids`](@ref).
+"""
+const metabolites = metabolite_ids
+
+"""
+The usual name of [`metabolite_variables_matrix`](@ref).
+"""
+const stoichiometry = metabolite_variables_matrix
+
 @make_variable_semantics(
     :enzyme,
     "enzyme supplies",
@@ -219,7 +199,7 @@ In SBML, these are usually called "gene products" but we write `genes` for
 simplicity.
 """
 function genes(a::AbstractMetabolicModel)::Vector{String}
-    return []
+    String[]
 end
 
 """
@@ -307,7 +287,7 @@ function reaction_stoichiometry(
     m::AbstractMetabolicModel,
     rid::String,
 )::Dict{String,Float64}
-    mets = metabolites(m)
+    mets = metabolite_ids(m)
     Dict(
         mets[k] => v for (k, v) in
         zip(findnz(stoichiometry(m)[:, first(indexin([rid], variable_ids(m)))])...)