Source code for pta.thermodynamic_space

"""Description of the space of thermodynamics-related quantities of a metabolic network.
"""

import logging
from typing import Dict, List, Optional, Set, Tuple, Union

import cobra
import numpy as np
import numpy.linalg as la
import scipy.linalg
from cobra.util.array import create_stoichiometric_matrix
from component_contribution.linalg import LINALG
from enkie import CompartmentParameters, Metabolite
from enkie.constants import R
from enkie.estimators import (EquilibratorGibbsEstimator,
                              GibbsEstimatorInterface)
from enkie.io.cobra import parse_metabolites
from enkie.utils import qrvector, qvector

from .commons import Q
from .concentrations_prior import ConcentrationsPrior
from .constants import default_min_eigenvalue_tds_basis
from .utils import (apply_transform, covariance_square_root,
                    get_candidate_thermodynamic_constraints, to_reactions_ids,
                    to_reactions_idxs)

logger = logging.getLogger(__name__)


[docs]class ThermodynamicSpace: """ Construction, description and manipulation of the thermodynamic space of a metabolic network. Parameters ---------- S_constraints : np.ndarray Stoichiometric matrix of the reactions covered by thermodynamic constraints. reaction_ids : List[str] Identifiers of the reactions covered by thermodynamic constraints. metabolites : List[Metabolite] List describing the metabolites in the network. parameters : CompartmentParameters, optional The physiological parameters (pH, ionic strength, ...) of each compartment. concentrations : ConcentrationsPrior, optional Prior distributions for the metabolite concentrations. estimator : GibbsEstimatorInterface, optional Object used to estimate Gibbs free energies. dfg0_estimate : Optional[Tuple[Q, Q]], optional Estimate of formation energies (mean and a square root of the covariance matrix) in case the user wants to specify them manually. This is only used if :code:`estimator` is :code:`None`. """ def __init__( self, S_constraints: np.ndarray, reaction_ids: List[str], metabolites: List[Metabolite], parameters: CompartmentParameters = None, concentrations: ConcentrationsPrior = None, estimator: GibbsEstimatorInterface = None, dfg0_estimate: Optional[Tuple[Q, Q]] = None, ): self._S_constraints = S_constraints self._reaction_ids = reaction_ids self._metabolites = metabolites self._parameters = parameters or CompartmentParameters.load("default") if estimator is None and dfg0_estimate is not None: self._dfg0_prime_mean, self._dfg0_prime_cov_sqrt = dfg0_estimate else: if dfg0_estimate is not None: logger.warning( "dfg0_estimate ignored, since an estimator has " "has been passed to ThermodynamicSpace." ) estimator = estimator or EquilibratorGibbsEstimator() self._dfg0_prime_mean, self._dfg0_prime_cov_sqrt = estimator.get_dfg0_prime( self.S_constraints, metabolites, self._parameters ) concentrations = concentrations or ConcentrationsPrior() self._concentrations = concentrations ( self._log_conc_mean, self._log_conc_cov, ) = self._make_concentrations_distribution(metabolites, concentrations) @property
[docs] def dfg0_prime_mean(self) -> Q: """Gets the mean of the corrected standard formation energies.""" return self._dfg0_prime_mean
@property
[docs] def dfg0_prime_cov(self) -> Q: """Gets the covariance of the corrected standard formation energies.""" return self._dfg0_prime_cov_sqrt @ self._dfg0_prime_cov_sqrt.T
@property
[docs] def dfg0_prime_cov_sqrt(self) -> Q: """Gets a square root of the covariance of the corrected standard formation energies. """ return self._dfg0_prime_cov_sqrt
@property
[docs] def drg0_prime_mean(self) -> Q: """Gets the mean of the corrected standard reaction energies.""" return self.S_constraints.T @ self.dfg0_prime_mean
@property
[docs] def drg0_prime_cov(self) -> Q: """Gets the covariance of the corrected standard reaction energies.""" return self.drg0_prime_cov_sqrt @ self.drg0_prime_cov_sqrt.T
@property
[docs] def drg0_prime_cov_sqrt(self) -> Q: """Gets a square root of the covariance of the corrected standard reaction energies. """ return self.S_constraints.T @ self.dfg0_prime_cov_sqrt
@property
[docs] def log_conc_mean(self): """Gets the mean of the log-concentrations.""" return self._log_conc_mean
@property
[docs] def log_conc_cov(self): """Gets the covariance of the log-concentrations.""" return self._log_conc_cov
@property
[docs] def dfg_prime_mean(self): """Gets the mean of the formation energies.""" return self.dfg0_prime_mean + self.log_conc_mean * R * self.parameters.T()
@property
[docs] def dfg_prime_cov(self): """Gets the covariance of the formation energies.""" return self.dfg0_prime_cov + self.log_conc_cov * (R * self.parameters.T()) ** 2
@property
[docs] def drg_prime_mean(self): """Gets the mean of the reaction energies.""" return self.S_constraints.T @ self.dfg_prime_mean
@property
[docs] def drg_prime_cov(self): """Gets the covariance of the reaction energies.""" return self.S_constraints.T @ self.dfg_prime_cov @ self.S_constraints
@property
[docs] def parameters(self) -> CompartmentParameters: """Gets the compartment parameters of the system.""" return self._parameters
@property
[docs] def metabolites(self) -> List[Metabolite]: """Gets the list of metabolites in the thermodynamic space.""" return self._metabolites
@property
[docs] def S_constraints(self) -> np.ndarray: """Gets stoichiometric matrix of the reactions with thermodynamic constraints.""" return self._S_constraints
@property
[docs] def metabolite_ids(self) -> List[str]: """Gets the IDs of the metabolites in the thermodynamic space.""" return [m.id for m in self.metabolites]
@property
[docs] def reaction_ids(self) -> List[str]: """Gets the IDs of the reactions with thermodynamic constraints.""" return self._reaction_ids
[docs] def subspace( self, reaction_ids: Set[str], metabolite_ids: Set[str] ) -> "ThermodynamicSpace": reaction_idxs = [ i for i, r in enumerate(self.reaction_ids) if r in reaction_ids ] metabolite_idxs = [ i for i, m in enumerate(self.metabolites) if m.id in metabolite_ids ] return ThermodynamicSpace( self.S_constraints[np.ix_(metabolite_idxs, reaction_idxs)].copy(), [self.reaction_ids[i] for i in reaction_idxs], [self.metabolites[i] for i in metabolite_idxs], self.parameters, self._concentrations, dfg0_estimate=( Q(self.dfg0_prime_mean.m[metabolite_idxs,:], "kJ/mol"), Q(LINALG.qr_rank_deficient(self.dfg0_prime_cov_sqrt.m[metabolite_idxs, :].T).T, "kJ/mol"),
), ) @staticmethod
[docs] def from_cobrapy_model( model: cobra.Model, metabolites_namespace: str = None, constrained_rxns: Union[List[int], List[str], cobra.DictList] = None, estimator: GibbsEstimatorInterface = None, parameters: CompartmentParameters = None, concentrations: ConcentrationsPrior = None, dfg0_estimate: Optional[Tuple[Q, Q]] = None, ) -> "ThermodynamicSpace": """Constructs a thermodynamic space from a cobrapy model. Parameters ---------- model : cobra.Model Cobra model describing the metabolic network. metabolites_namespace : str, optional Specifies the name to use when reading metabolite identifiers from the SBML model annotations. constrained_rxns : Union[List[int], List[str], cobra.DictList], optional The reactions that should be modeled with thermodynamic constraints. Usually this list should contain all reactions except biomass and boundary reactions. The list can contain either the reactions themselves, their indices or they identifiers. estimator : GibbsEstimatorInterface, optional Object used to estimate Gibbs free energies. parameters : CompartmentParameters, optional The physiological parameters (pH, ionic strength, ...) of each compartment. concentrations : ConcentrationsPrior, optional Prior distributions for the metabolite concentrations. Returns ------- ThermodynamicSpace The thermodynamic space for the specified model. """ if constrained_rxns is None: constrained_rxns = get_candidate_thermodynamic_constraints( model, metabolites_namespace, ) constrained_rxn_ids = to_reactions_ids(constrained_rxns, model) constrained_rxn_idxs = to_reactions_idxs(constrained_rxns, model) S_constraints = create_stoichiometric_matrix(model)[:, constrained_rxn_idxs] is_metabolite_needed = np.sum(np.abs(S_constraints), axis=1) > 0 needed_metabolites_ids = [ model.metabolites[i].id for i in np.where(is_metabolite_needed)[0] ] S_constraints = S_constraints[is_metabolite_needed, :] # Parse metabolites from the model. metabolites = parse_metabolites( model, needed_metabolites_ids, metabolites_namespace ) return ThermodynamicSpace( S_constraints, constrained_rxn_ids, list(metabolites.values()), parameters, concentrations, estimator, dfg0_estimate,
) def _make_concentrations_distribution( self, metabolites: List[Metabolite], concentrations: ConcentrationsPrior, ) -> Tuple[Q, Q]: """Construct the distribution over metabolite concentrations.""" log_conc_mean = qvector( [ Q(concentrations.get(m.miriam_id, m.compartment).log_mean) for m in metabolites ] ) log_conc_cov = Q( np.diag( np.square( qrvector( [ Q(concentrations.get(m.miriam_id, m.compartment).log_std) for m in metabolites ] ).m ) ) ) return log_conc_mean, log_conc_cov
[docs]class ThermodynamicSpaceBasis: """ Full-dimensional basis of the thermodynamic space. It is possible to make only selected variables explicit in the basis, reducing its dimensionality. Parameters ---------- thermodynamic_space : ThermodynamicSpace The target thermodynamic space. explicit_log_conc : bool, optional True if log-concentrations should be represented explicitly in the basis, false otherwise. By default True. explicit_drg0 : bool, optional True if standard reaction energies should be represented explicitly in the basis, false otherwise. By default True. explicit_drg : bool, optional True if reaction energies should be represented explicitly in the basis, false otherwise. By default True. min_eigenvalue : float, optional Minimum eigenvalue of for a vector to be part of the basis. """ def __init__( self, thermodynamic_space: "ThermodynamicSpace", explicit_log_conc: bool = True, explicit_drg0: bool = True, explicit_drg: bool = True, min_eigenvalue: float = default_min_eigenvalue_tds_basis, ): self._to_log_conc_transform: Optional[Tuple[np.ndarray, np.ndarray]] = None self._to_drg0_transform: Optional[Tuple[np.ndarray, np.ndarray]] = None self._to_drg_transform: Optional[Tuple[np.ndarray, np.ndarray]] = None self._observables_ranges: Dict[str, List[int]] = { "drg": [], "drg0": [], "log_conc": [], } self._compute_basis( thermodynamic_space, explicit_log_conc, explicit_drg0, explicit_drg, min_eigenvalue, ) @property
[docs] def to_log_conc_transform(self) -> Optional[Tuple[np.ndarray, np.ndarray]]: """Gets the transformation from this space to log-concentrations. :code:`None` if concentrations are not represented explicitly.""" return self._to_log_conc_transform
@property
[docs] def to_drg0_transform(self) -> Optional[Tuple[np.ndarray, np.ndarray]]: """Gets the transformation from this space to DrG'°. :code:`None` if standard reaction energies are not represented explicitly.""" return self._to_drg0_transform
@property
[docs] def to_drg_transform(self) -> Optional[Tuple[np.ndarray, np.ndarray]]: """Gets the transformation from this space to DrG'. :code:`None` if reaction energies are not represented explicitly.""" return self._to_drg_transform
@property
[docs] def to_observables_transform(self) -> Tuple[np.ndarray, np.ndarray]: """Gets the transformation from this space to the selected observables.""" return self._to_observables_transform
@property
[docs] def observables_ranges(self) -> Dict[str, List[int]]: """The ranges of the different variables in a vector of observables.""" return self._observables_ranges
@property
[docs] def sigmas(self) -> np.ndarray: """Gets a vector containing the standard deviation of each variable in the minimal basis.""" return self._sigmas
@property
[docs] def dimensionality(self) -> int: """Gets the dimensionality of the basis.""" return self._dimensionality
[docs] def to_log_conc(self, basis_vars: np.ndarray) -> np.ndarray: """Transform a vector or matrix in the basis to a vector or matrix of log-concentrations. Parameters ---------- basis_vars : np.ndarray The input vector or matrix. Returns ------- np.ndarray The transformed vector or matrix. :code:`None` if concentrations are not represented explicitly. """ if not self.to_log_conc_transform: return None else: return apply_transform(basis_vars, self.to_log_conc_transform)
[docs] def to_drg0(self, basis_vars: np.ndarray) -> np.ndarray: """Transform a vector or matrix in the basis to a vector or matrix of standard reaction energies. Parameters ---------- basis_vars : np.ndarray The input vector or matrix. Returns ------- np.ndarray The transformed vector or matrix. :code:`None` if standard reaction energies are not represented explicitly. """ if not self.to_drg0_transform: return None else: return apply_transform(basis_vars, self.to_drg0_transform)
[docs] def to_drg(self, basis_vars: np.ndarray) -> np.ndarray: """Transform a vector or matrix in the basis to a vector or matrix of reaction energies. Parameters ---------- basis_vars : np.ndarray The input vector or matrix. Returns ------- np.ndarray The transformed vector or matrix. :code:`None` if reaction energies are not represented explicitly. """ if not self.to_drg_transform: return None else: return apply_transform(basis_vars, self.to_drg_transform)
def _compute_basis( self, thermodynamic_space: "ThermodynamicSpace", explicit_log_conc, explicit_drg0, explicit_drg, tolerance, ): """Compute the basis of the thermodynamic space, only making explicit the specified variables. """ log_conc_mean = thermodynamic_space.log_conc_mean.m log_conc_cov = thermodynamic_space.log_conc_cov.m drg0_prime_mean = thermodynamic_space.drg0_prime_mean.m_as("kJ/mol") drg0_prime_cov_sqrt = thermodynamic_space.drg0_prime_cov_sqrt.m_as("kJ/mol") S_con_T = thermodynamic_space.S_constraints.T RT = (R * thermodynamic_space.parameters.T()).m_as("kJ/mol") n_mets = log_conc_mean.size n_rxns = drg0_prime_mean.size # Use the eigenvalue decomposition to find the minimal basis for the # log concentrations. log_conc_cov_sqrt = covariance_square_root(log_conc_cov, tolerance) # Combined mean and squared root of the covariance of log-concentrations # and DrG'°. log_conc_drg0_mean = np.block([[log_conc_mean], [drg0_prime_mean]]) log_conc_drg0_cov_sqrt = scipy.linalg.block_diag( log_conc_cov_sqrt, drg0_prime_cov_sqrt ) # Construct the transform from log-concentrations and DrG'° to the # variables that must be explicitly represented ("observables"). mapping_blocks = [] if explicit_log_conc: mapping_blocks.append([np.identity(n_mets), np.zeros((n_mets, n_rxns))]) if explicit_drg0: mapping_blocks.append([np.zeros((n_rxns, n_mets)), np.identity(n_rxns)]) if explicit_drg: mapping_blocks.append([RT * S_con_T, np.identity(n_rxns)]) to_observables = np.block(mapping_blocks) observables_mean = to_observables @ log_conc_drg0_mean observables_cov_sqrt = to_observables @ log_conc_drg0_cov_sqrt # Find a square root for the covariance matrix with minimum # dimensionality using the QR decomposition. observables_cov_sqrt = LINALG.qr_rank_deficient(observables_cov_sqrt.T).T observables_cov_sqrt[np.abs(observables_cov_sqrt) < 1e-5] = 0 self._to_observables_transform = (observables_cov_sqrt, observables_mean) self._dimensionality = observables_cov_sqrt.shape[1] self._sigmas = la.norm(observables_cov_sqrt, axis=0).T # Finally, identify the transforms from the basis to the single # observables. current_id = 0 if explicit_log_conc: self._observables_ranges["log_conc"] = list( range(current_id, current_id + n_mets) ) self._to_log_conc_transform = ( observables_cov_sqrt[self.observables_ranges["log_conc"], :], observables_mean[self.observables_ranges["log_conc"]], ) current_id += n_mets if explicit_drg0: self._observables_ranges["drg0"] = list( range(current_id, current_id + n_rxns) ) self._to_drg0_transform = ( observables_cov_sqrt[self.observables_ranges["drg0"], :], observables_mean[self.observables_ranges["drg0"]], ) current_id += n_rxns if explicit_drg: self._observables_ranges["drg"] = list( range(current_id, current_id + n_rxns) ) self._to_drg_transform = ( observables_cov_sqrt[self.observables_ranges["drg"], :], observables_mean[self.observables_ranges["drg"]],
)