"""Methods for the thermodynamic assessment of a metabolic network.
"""
import logging
from typing import List, Optional, Union
import cobra
import numpy as np
import pandas as pd
from cobra.flux_analysis.variability import flux_variability_analysis
from .commons import Q
from .constants import (
default_flux_bound,
default_max_non_intracellular_conc_mM,
default_theta_s,
default_theta_z,
non_intracellular_compartment_ids,
)
from .pmo import PmoProblem
from .utils import (
find_blocked_reactions_from_fva_solution,
get_internal_cycles,
tighten_model_bounds,
)
logger = logging.getLogger(__name__)
[docs]class StructuralAssessment:
"""This class is used to find thermodynamic inconsistencies in the definition of the
network that make the model infeasible. Inconsistencies arise when flux constraints
(steady state and irreversibilities) prevent any thermodynamically feasible non-zero
solution. These inconsistencies occur with any assignment of Gibbs free energies.
Parameters
----------
model : cobra.Model
The target model.
biomass_id : Optional[str], optional
ID of the biomass reactions. If None, the method will try to find it
automatically.
atpm_id : str, optional
ID of the ATP maintenance reaction.
"""
def __init__(
self,
model: cobra.Model,
biomass_id: Optional[str] = None,
atpm_id: str = "ATPM",
):
# Enumerate all the internal cycles of the network and select the ones that must
# be active in any non-zero solution.
cycles = get_internal_cycles(model, biomass_id, atpm_id)
forced_cycles = self._get_forced_internal_cycles(model, cycles)
# Convert the EFMs matrix to a list of cycles, represented by reaction IDs.
self._forced_internal_cycles = []
for cycle in forced_cycles.T:
cycle_reaction_ids = [
model.reactions[i].id for i, v in enumerate(cycle) if v != 0
]
self._forced_internal_cycles.append(cycle_reaction_ids)
@property
[docs] def forced_internal_cycles(self) -> List[List[str]]:
"""Gets a list of all the forced internal cycles of the network, i.e. the
internal cycles that must be active in any non-zero flux solution and
thus make the model thermodynamically inconsistent.
"""
return self._forced_internal_cycles
[docs] def summary(self):
"""Print a summary of the structural assessment."""
print("Strutural thermodynamic assessment summary:")
print("---------------------------------------------")
print("")
_ = pd.option_context(
"display.max_rows",
None,
"display.max_columns",
None,
"display.precision",
3,
)
if len(self.forced_internal_cycles) > 0:
print(
"> The following internal cycles are thermodynamically "
"unfeasible, but must be active in any non-zero flux "
"solution, meaning that the model is thermodynamically "
"inconsistent. For the cycles below, please:\n (1) Verify and "
"correct the direction of the irreversible reactions.\n (2) "
"Remove reactions in cycles that are connected to the rest "
"of the network through a single reaction (e.g. A -> B, "
"B -> A, where only A participates in other reactions).\n "
"Alternatively, you can use the StructuralAssessment."
"autoresolve() function to automatically curate the model."
)
for i, cycle in enumerate(self.forced_internal_cycles):
print(f'{i}. {", ".join(cycle)}')
else:
print("> No thermodynamic inconsistencies detected in the model.")
print("")
[docs] def autoresolve(
self, model: cobra.Model, default_bound: float = default_flux_bound
):
"""Attempt to automatically resolve thermodynamic inconsistencies. This method
removes all reactions involved in dead-end cycles (internal cycles that
exchange flux with the rest of the network through a single metabolite) and
makes all the reactions in the remaining cycles reversible. While this method
should always resolve all inconsistencies, it is still recommended to inspect
the inconsistencies and the curated model manually to verify that the model
behave as expected. The output of the curation can be seen by enabling logging
level INFO.
Parameters
----------
model : cobra.Model
The model in which the curation actions must be applied. This can be the
model on which the assessment was run or a similar model. This is useful for
cases where assessment is run on a condition specific model but actions
should be applied to a base model.
default_bound : float, optional
Default bound to use for unconstrained reactions.
"""
for cycle in self.forced_internal_cycles:
if self._is_semi_disconnected_cycle(model, cycle):
logger.info(
"Removing all reactions of the dead-end cycle:"
f' {", ".join(cycle)}.'
)
model.remove_reactions(cycle, remove_orphans=True)
else:
logger.info(
"Removing bounds for all reactions in the cycle: "
f'{", ".join(cycle)}.'
)
for rxn_id in cycle:
try:
reaction = model.reactions.get_by_id(rxn_id)
except Exception:
continue
reaction.lower_bound = -default_bound
reaction.upper_bound = default_bound
def _is_semi_disconnected_cycle(self, model: cobra.Model, cycle: np.array) -> bool:
"""Check whether a cycle is dead-end, i.e. it exchanges flux with the
rest of the network through a single metabolite only.
"""
with model as model:
# Pick a reaction and fix it to a flux value that is not at the
# boundaries.
try:
fixed_reaction = model.reactions.get_by_id(cycle[0])
except Exception:
# If the reaction does not exist, the cycle is broken and we don't need
# to do anything else.
return False
model.objective = {fixed_reaction: 1}
max_flux = model.slim_optimize()
model.objective = {fixed_reaction: -1}
min_flux = -model.slim_optimize()
fixed_flux = (max_flux + min_flux) / 2
fixed_reaction.lower_bound = fixed_flux
fixed_reaction.upper_bound = fixed_flux
# If all other reactions of the cycle are not fixed as well, then this is a
# dead-end cycle.
for rxn_id in cycle:
try:
other_reaction = model.reactions.get_by_id(rxn_id)
except Exception:
return False
model.objective = {other_reaction: 1}
max_flux = model.slim_optimize()
model.objective = {other_reaction: -1}
min_flux = -model.slim_optimize()
if max_flux - min_flux > 1e-8:
return False
return True
def _get_forced_internal_cycles(
self, model: cobra.Model, cycles: np.array
) -> np.array:
"""Select our of a list of cycles the ones that are forced."""
forced_cycle_idxs = []
for e in range(cycles.shape[1]):
if self._is_forced_cycle(model, cycles[:, e]):
forced_cycle_idxs.append(e)
return cycles[:, forced_cycle_idxs]
def _is_forced_cycle(
self,
model: cobra.Model,
cycle: np.array,
) -> bool:
"""Check if a cycle is forced (must be active in any non-zero flux solution)."""
min_flux = 1e-6
rxn_idxs = np.where(cycle != 0)[0].tolist()
# Check for any pair of reaction and for any sense of the cycle.
for i, _ in enumerate(rxn_idxs):
for j in range(i):
for sense in [-1, 1]:
with model as model:
rxn_i = model.reactions[rxn_idxs[i]]
rxn_j = model.reactions[rxn_idxs[j]]
direction_i = cycle[rxn_idxs[i]] * sense
direction_j = cycle[rxn_idxs[j]] * sense
# If the chosen directions are not within the flux bounds of the
# model we can skip the iteration.
if (
(direction_i > 0 and rxn_i.upper_bound <= 0)
or (direction_i < 0 and rxn_i.lower_bound >= 0)
or (direction_j < 0 and rxn_j.upper_bound <= 0)
or (direction_j > 0 and rxn_j.lower_bound >= 0)
):
continue
# Set the direction of the cycle for the first reaction.
if direction_i > 0:
rxn_i.lower_bound = max(rxn_i.lower_bound, min_flux)
else:
rxn_i.upper_bound = min(rxn_i.upper_bound, -min_flux)
# Set the opposite direction (with respect to the direction in
# the EFM) for the second reaction.
if direction_j < 0:
rxn_j.lower_bound = max(rxn_j.lower_bound, min_flux)
else:
rxn_j.upper_bound = min(rxn_j.upper_bound, -min_flux)
# Check whether the model is feasible. If yes, there are
# thermodynamically feasible solutions for the set of
# reactions involved in the EFM because the cycle is not
# mandatory.
if not np.isnan(model.slim_optimize()):
return False
# If we could not find any thermodynamically feasible combination of directions
# the cycle is forced.
return True
[docs]class QuantitativeAssessment:
"""Quantitative thermodynamic assessment of a metabolic network. This process
combines thermodynamic data and flux constraints to identify parts of the network
where the mechanism in the model is possibly incorrect.
Parameters
----------
pmo_problem : PmoProblem
A solved PMO problem.
z_score_threshold : float, optional
Threshold on the z-score to consider a predicted value an anomaly.
shadow_price_threshold : float, optional
Threshold on the shadow price to consider a reaction a strong thermodynamic
constrain.
max_extracellular_conc : Q, optional
Threshold on the concentration to consider a predicted concentration an anomaly.
"""
def __init__(
self,
pmo_problem: PmoProblem,
z_score_threshold: float = default_theta_z,
shadow_price_threshold: float = default_theta_s,
max_extracellular_conc: Q = None,
):
self._theta_z = z_score_threshold
self._theta_s = shadow_price_threshold
self._max_ex_conc = max_extracellular_conc or Q(
default_max_non_intracellular_conc_mM, "mM"
)
self._is_intracellular = pd.DataFrame(
[
m.compartment not in non_intracellular_compartment_ids
for m in pmo_problem.T.metabolites
]
)
# Restrict the PMO problem to the predicted directions. This way, the
# problem is not an MILP anymore and solvers should be able to return
# shadow prices.
orthant_pmo = pmo_problem.rebuild_for_directions(pmo_problem.d.value)
status = orthant_pmo.solve()
assert status == "optimal" or status == "optimal_inaccurate"
if status == "optimal_inaccurate":
logger.warning("Inaccurate solution found when computing shadow prices.")
# Assemble metabolite and reaction data in two data frames.
self._metabolites_df = pd.DataFrame(
{
"id": pmo_problem.T.metabolite_ids,
"conc": np.exp(pmo_problem.log_c).ravel() * 1000
if pmo_problem.log_c is not None
else np.nan,
"z_log_c": self._make_z_scores(pmo_problem, "log_c"),
}
)
# Verify whether the shadow prices were computed correctly.
if (
orthant_pmo.sign_constraints[0].dual_value is None
or orthant_pmo.sign_constraints[1].dual_value is None
):
logger.warning(
"Unable to compute shadow prices. Solve again with verbose=True to see the solver output."
)
sp_drg = np.zeros(len(pmo_problem.T.reaction_ids)) * np.nan
else:
sp_drg = np.maximum(
orthant_pmo.sign_constraints[0].dual_value,
orthant_pmo.sign_constraints[1].dual_value,
).ravel()
self._reactions_df = pd.DataFrame(
{
"id": pmo_problem.T.reaction_ids,
"v": pmo_problem.v[pmo_problem._rxn_idxs_F_to_T].ravel(),
"drg0": pmo_problem.drg0.ravel()
if pmo_problem.drg0 is not None
else np.nan,
"drg": pmo_problem.drg.ravel()
if pmo_problem.drg is not None
else np.nan,
"z_drg": self._make_z_scores(pmo_problem, "drg"),
"z_drg0": self._make_z_scores(pmo_problem, "drg0"),
"sp_drg": sp_drg,
}
)
@property
@property
[docs] def reactions_df(self) -> pd.DataFrame:
"""Gets a data frame with the reaction-related quantities."""
return self._reactions_df
@property
[docs] def theta_z(self) -> float:
"""Gets the mimimum absolute z-score used to classify a metabolite
concentration as anomaly."""
return self._theta_z
@property
[docs] def theta_s(self) -> float:
"""Gets the mimimum absolute shadow price to flag a constraint."""
return self._theta_s
@property
[docs] def max_ex_concentration(self) -> float:
"""Gets the minimum concentration used to classify a non-intracellular
metabolite concentration as anomaly."""
return self._max_ex_conc
[docs] def summary(self):
"""Print a summary of the quantitative assessment."""
print("Quantitative thermodynamic assessment summary:")
print("------------------------------------------------")
print("concentrations: mM, free energies: kJ/mol")
print("")
pd_context = pd.option_context(
"display.max_rows",
None,
"display.max_columns",
None,
"display.precision",
3,
)
# Search for anomalies based on concentration z-scores.
anomalies_m_z = self.metabolites_df[
self.metabolites_df["z_log_c"].abs() > self.theta_z
]
anomalies_m_z = anomalies_m_z.reindex(
anomalies_m_z["z_log_c"].abs().sort_values(ascending=False).index
)
if not anomalies_m_z.empty:
print(
f"> The following metabolites have been flagged as "
f"anomalies because their predicted concentration has an "
f"absolute z-score greater than {self.theta_z}:"
)
with pd_context:
print(anomalies_m_z)
else:
print("> No anomaly found in concentration z-scores.")
print("")
# Search for anomalies based on maximum concentration.
max_ex_concentration_mM = self.max_ex_concentration.m_as("mM")
anomalies_m_m = self.metabolites_df[
(self.metabolites_df["conc"] >= max_ex_concentration_mM)
& ~pd.DataFrame(self._is_intracellular)[0]
& (self.metabolites_df["z_log_c"].abs() > 0)
]
anomalies_m_m = anomalies_m_m.reindex(
anomalies_m_m["conc"].abs().sort_values(ascending=False).index
)
if not anomalies_m_m.empty:
print(
f"> The following non-intracellular metabolites have been "
f"flagged as anomalies because they have concentation "
f"greater than {max_ex_concentration_mM} mM:"
)
with pd_context:
print(anomalies_m_m)
else:
print("> No anomaly found in non-intracellular concentrations.")
print("")
# Search for anomalies based on reaction z-scores.
anomalies_r_z = self.reactions_df[
(self.reactions_df["z_drg"].abs() > self.theta_z)
| (self.reactions_df["z_drg0"].abs() > self.theta_z)
]
anomalies_r_z = anomalies_r_z.reindex(
anomalies_r_z[["z_drg", "z_drg0"]]
.abs()
.max(axis=1)
.sort_values(ascending=False)
.index
)
if not anomalies_r_z.empty:
print(
f"> The following reactions have been flagged as anomalies "
f"because their predicted free energy or standard free "
f"energy has an absolute z-score greater than {self.theta_z}:"
)
with pd_context:
print(anomalies_r_z)
else:
print("> No anomaly found in free energy z-scores.")
print("")
# Search for anomalies based on reaction shadow prices.
anomalies_r_s = self.reactions_df[(self.reactions_df["sp_drg"] > self.theta_s)]
anomalies_r_s = anomalies_r_s.reindex(
anomalies_r_s["sp_drg"].sort_values(ascending=False).index
)
if not anomalies_r_s.empty:
print(
f"> The following reactions are predicted to impose strong "
f"thermodynamic constraints on the network because their "
f"shadow price is greater than {self.theta_s}:"
)
with pd_context:
print(anomalies_r_s)
else:
print("> No significant thermodynamic constraints found.")
print("")
def _make_z_scores(
self, pmo_problem: PmoProblem, variable: str
) -> Union[np.array, float]:
"""Construct the z-scores for a specific type of variables."""
transform = None
if variable == "log_c":
if pmo_problem.B.to_log_conc_transform is not None:
transform = pmo_problem.B.to_log_conc_transform[0]
elif variable == "drg":
if pmo_problem.B.to_drg_transform is not None:
transform = pmo_problem.B.to_drg_transform[0]
elif variable == "drg0":
if pmo_problem.B.to_drg0_transform is not None:
transform = pmo_problem.B.to_drg0_transform[0]
else:
raise ValueError(f"Unsupported variable {variable}")
if transform is None:
return np.nan
else:
std_devs = np.sqrt(np.diag(transform @ transform.T))
scaling = np.zeros(std_devs.shape)
scaling[std_devs > 0] = np.reciprocal(std_devs[std_devs > 0])
scaling = np.diag(scaling)
return (scaling @ transform @ pmo_problem.m.value).ravel()
[docs]def prepare_for_pta(
model: cobra.Model,
biomass_id: Optional[str] = None,
atpm_id: str = "ATPM",
default_bound: float = default_flux_bound,
autoresolve_inconsistencies: bool = True,
remove_blocked_reactions: bool = True,
tighten_bounds: bool = True,
prevent_loops: bool = False,
):
"""Attempt to automatically prepare a model for use in PTA.
This method performs three actions:
* Runs structural assessment on the model and attempts to autoresolve possible
inconsistencies.
* Removes all blocked reactions from the model.
* Runs FVA to tighten the flux bounds of each reaction.
Parameters
----------
model : cobra.Model
The target model.
biomass_id : Optional[str], optional
ID of the biomass reactions. If None, the method will try to find it
automatically.
atpm_id : str, optional
ID of the ATP maintenance reaction.
default_bound : float, optional
Default bound to use for unconstrained reactions.
autoresolve_inconsistencies : bool, optional
True if the method should attempt to automatically resolve inconsistencies,
false otherwise. By default True.
remove_blocked_reactions : bool, optional
True if the method should remove blocked reactions, false otherwise. By default
True.
tighten_bounds : bool, optional
True if the method should restrict the flux bounds with FVA, false otherwise. By
default True.
"""
logger.info("Preparing model for PTA ...")
result = model.optimize()
assert result.status != "infeasible", "Model is infeasible."
# Run structural assessment and auto-resolve all inconsistencies.
if autoresolve_inconsistencies:
assessment = StructuralAssessment(model, biomass_id, atpm_id)
assessment.autoresolve(model, default_bound)
if remove_blocked_reactions or tighten_bounds:
fva_result = flux_variability_analysis(
model, loopless=prevent_loops, fraction_of_optimum=0.0
)
# Remove blocked reactions to make sure that all reactions can have a
# well-defined direction.
if remove_blocked_reactions:
model.remove_reactions(
find_blocked_reactions_from_fva_solution(model, fva_result=fva_result),
remove_orphans=True,
)
# Restrict reaction bounds using FVA.
if tighten_bounds:
tighten_model_bounds(model, prevent_loops=prevent_loops, fva_result=fva_result)
logger.info("Model preprocessing completed.")