Module fia_analysis
[hide private]
[frames] | no frames]

Source Code for Module fia_analysis

   1  #!/usr/bin/python 
   2  """ 
   3  \n FIA - Fluxomers Iterative Algorithm \n 
   4   
   5  SYNOPSIS: 
   6      fia_analysis {-i[=--fia] PROJFILE | -t[=--ftbl] PROJFILE} [-o[=--output_file] OUTPUTFILE] 
   7   
   8  USAGE: 
   9      fia_analysis performs the following two main steps: 
  10       
  11          1) Metabolic pathway analysis. 
  12           
  13              Here FIA transforms the text file representing the metabolic network into 
  14              mathematical matrices required later for the MFA optimization process.             
  15               
  16          2) Metabolic fluxes evaluation. 
  17   
  18              Here FIA tries to find the fluxes that best suit the given metabolic system. 
  19              The output log of this stage is a file called    <PROJFILE>.out, 
  20              or else if specified by the "-o" flag. 
  21   
  22      fia_analysis supports two types of input file formats: FIA and 13CFlux FTBL. 
  23      The two are very much alike, except for the fact that the FIA format supports only declaration of 
  24      uni-directional fluxes, and does not contain the FLUXES and INEQUALITIES sections. 
  25      For bi-directional fluxes, FIA simply defines two seperated fluxes. 
  26      When analyzing 13CFlux FTBL files, fia_analysis looks for C=0 constraints in the FLUXES XCH section 
  27      in order to determine directionality of fluxes. 
  28   
  29  OPTIONS: 
  30      -i --fia 
  31          Specifies that the metabolic pathway input file <PROJFILE> is a FIA formated file.  
  32          All fluxes are assumed unidirectional (for bi-directional specify 2 seperated fluxes).         
  33   
  34      -t --ftbl 
  35          Specifies that the metabolic pathway input file <PROJFILE> is a 13CFlux FTBL formated file.          
  36          Flux is assumed unidirectional unless C=0 constraint is applied to it in the FLUXES XCH section. 
  37          The FIA formated file will be saved under <PROJFILE>.fia . 
  38   
  39     -o --output_file 
  40          optional.  
  41          Specifies the name of the result log output file. The default file name is PROJFILE.out 
  42           
  43      at least one of the above must be supplied. 
  44           
  45  USAGE EXAMPLE: 
  46   
  47  fia_analysis -t temp.ftbl 
  48      Runs anaylsis & evaluation of the network defined in the FTBL formated input file temp.ftbl . 
  49      same as: 
  50          fia_analysis --ftbl temp.ftbl  
  51   
  52  fia_analysis --fia temp.fia -a   |   
  53      Runs only network analysis on the given FIA formated input file. 
  54      Same as: 
  55          fia_analysis -i temp.fia -a 
  56          fia_analysis -i temp.fia --analyze_only 
  57          fia_analysis --fia temp.fia --analyze_only 
  58           
  59  fia_analysis temp.fia -i -e 
  60      Runs only the fluxes evaluation on the FIA formated input file temp.fia . 
  61      This assumes analysis has been done before on the temp.fia file. 
  62      Same as: 
  63          fia_analysis -i temp.fia --evaluate_only 
  64          fia_analysis --fia temp.fia -e 
  65          fia_analysis --fia temp.fia --evaluate_only 
  66  """ 
  67   
  68  import sys 
  69  import getopt 
  70  import pickle 
  71  import time 
  72  #import getopt 
  73   
  74  #import time 
  75  #from  cvxmod import * 
  76  #import cvxmod 
  77  #import cvxopt 
  78  from numpy import matrix 
  79  from scipy.linalg import inv, det, eig 
  80  from scipy import sparse  
  81  #from scipy import zeros 
  82  from scipy import * 
  83  import scipy 
  84  from scipy import optimize 
  85  import progress_bar 
  86  import warnings 
  87  #import progressbar 
  88   
  89  warnings.simplefilter('ignore',scipy.sparse.SparseEfficiencyWarning) 
  90  #warnings.simplefilter('ignore') 
  91   
  92  from scipy.sparse.linalg import umfpack as um 
  93   
  94  #from cvx_opt_matapp_solve import * 
  95          
  96  umfpack = um.UmfpackContext() 
  97   
  98  #from solve_orr import * 
  99  #import mlabwrap 
 100   
 101  prog_width = 40 
 102  display_prog = False 
 103  #!/usr/bin/env python 
 104   
 105  import sys 
 106   
107 -class MyWriter:
108 """ 109 A file-handler replacement that duplicates the actions applied to 110 the hanlder to a log file handler as well. 111 """
112 - def __init__(self, stdout, filename):
113 """ 114 Initialization of the file-handler duplicator. 115 @type stdout: file 116 @param stdout: Originial file handler to be forked 117 @type filename: string 118 @param filename: Name of the log file for which actions should be replicated into 119 """ 120 self.stdout = stdout 121 self.logfile = file(filename, 'a')
122
123 - def write(self, text):
124 """ 125 Replacement for the file handler's write method. 126 """ 127 self.stdout.write(text) 128 self.logfile.write(text)
129
130 - def flush(self):
131 """ 132 Replacement for the file handler's flush method. 133 """ 134 self.stdout.flush() 135 self.logfile.flush()
136
137 - def close(self):
138 """ 139 Replacement for the file handler's close method. 140 """ 141 self.stdout.close() 142 self.logfile.close()
143
144 -class FtblFile:
145 """ 146 This class is the main object of the FIA algorithm. 147 Its main purposes are: 148 1. Conversion of FIA and FTBL files into python mathematical objects. 149 2. Solution of the MFA optimization problem 150 """ 151 filename = '' # filename 152 ftbl_file = '' # text file object 153
154 - def __init__ (self, projname, inpfiletype):
155 """ 156 @type projname: string 157 @param projname: Name of the project to be analyzed. 158 @type inpfiletype: string 159 @param inpfiletype: Type of the input file to be analyzed. Can be either 'fia' (for fia input files) or 'ftbl' (for ftbl input files) 160 """ 161 self.ProjName = projname #: Project's name 162 self.labelinp_var = 0.001 #: Label input variance (it is not supplied with FTBL files). 163 self.InputFileType = inpfiletype #: input file type (fia or ftbl) 164 self.excell_pools = [] #: Dictionary of the external (global input or output) metabolic pools 165 self.ftbl_file = self.load_file(projname) #: Parsed input file object (TAB spliited lists for the various lines of the input file) 166 if display_prog: 167 prog_bar = progress_bar.ProgressBar(prog_width) 168 self.create_pools_dict() 169 if display_prog: 170 prog_bar.update(50) 171 self.create_flux_dict() 172 if display_prog: 173 prog_bar.update(100) 174 print ' done.' 175 sys.stdout.flush() 176 if debug_level>1: 177 print 'Found the following input/output pools:', self.excell_pools 178 #self.create_flux_stoi_eq() 179 # If we want only to analyze the network: 180 181 self.create_U_matrix() # Generate flux transformation (isotopomeric->real) matrix 182 self.create_label_meas_eq() 183 self.create_ms_meas_eq() # Generate the mass spectrometry equations 184 self.create_inp_eq_matrix() 185 self.create_flux_meas_mat() 186 self.create_global_meas_mat() 187 188 if not (Evaluate_Only): 189 self.create_label_transition_mat() 190 191 #if not (Evaluate_Only): 192 self.create_lu_trans_matrix() 193 194 #self.save_data() 195 ## self.save_ftblfile(ProjName +".dat") 196 197 # if not (Analyze_Only): 198 self.evaluate()
199
200 - def save_ftblfile(self, filename):
201 """ 202 This function saves the entire FtblFile class to a file using python's pickle packge. 203 This enables the user to use the data analyzed later without the need of recalculating the 204 transition or LU matrices (which can take a bit of a time). 205 """ 206 if debug_level > 1: 207 print "Saving data in file: ", filename," ...", 208 f = open(filename,'w') 209 pickle.dump(self,f,-1) 210 print " done." 211 sys.stdout.flush()
212
213 - def save_data(self):
214 """ 215 MATLAB debugging function. 216 This function saves the mathematical objects used for the optimizaiton process 217 in files in order to be analyzed by U{MATLAB<http://www.mathworks.com>}. 218 """ 219 matlab_save_sparsed_matrix(ProjPathName + ProjName + "_Q"+ext,self.Q) 220 print "saving H..." 221 matlab_save_sparsed_matrix(ProjPathName + ProjName + "_H"+ext,self.H) 222 print "saving S..." 223 matlab_save_sparsed_matrix(ProjPathName + ProjName + "_S"+ext,self.S) 224 print "saving U..." 225 matlab_save_sparsed_matrix(ProjPathName + ProjName + "_U"+ext,self.U) 226 227 228 #print "saving alpha_M..." 229 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "_alpha_M"+ext,self.alpha_M) 230 #print "saving alpha_N..." 231 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "_alpha_N"+ext,self.alpha_N) 232 #print "saving alpha_trans matrix..." 233 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "_alpha_mat"+ext,self.alpha_mat) 234 #print "saving alpha_Q matrix..." 235 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "_alpha_Q_mat"+ext,self.alpha_Q_mat) 236 237 238 #print "saving UV_alpha_M..." 239 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "_UV_alpha_M"+ext,self.UV_alpha_M) 240 #print "saving UV_alpha_N..." 241 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "_UV_alpha_N"+ext,self.UV_alpha_N) 242 #print "saving UV_alpha_trans matrix..." 243 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "_UV_alpha_mat_M"+ext,self.UV_alpha_mat_M) 244 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "_UV_alpha_mat_N"+ext,self.UV_alpha_mat_N) 245 #print "saving alpha_Q matrix..." 246 #matlab_save_sparsed_matrix(ProjPathName + ProjName + "UV_alpha_Q_mat"+ext,self.UV_alpha_Q_mat) 247 248 249 print "saving labelinp_num..." 250 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_labelinp_num'+ext, self.labelinp_num ) 251 print "saving labelinp_denom..." 252 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_labelinp_denom'+ext, self.labelinp_denom ) 253 print "saving labelinp_value..." 254 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_labelinp_value'+ext, self.labelinp_value ) 255 256 print "saving labelmeas_num..." 257 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_labelmeas_num'+ext, self.labelmeas_num ) 258 print "saving labelmeas_denom..." 259 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_labelmeas_denom'+ext, self.labelmeas_denom ) 260 print "saving labelmeas_value..." 261 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_labelmeas_value'+ext, self.labelmeas_value ) 262 print "saving labelmeas_var..." 263 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_labelmeas_var'+ext, self.labelmeas_var) 264 265 if gen_trans_mat: 266 print "saving trans_mat_a..." 267 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_trans_mat_a'+ext, self.trans_mat_a ) 268 print "saving trans_mat_a_beta..." 269 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_trans_mat_a_beta'+ext, self.trans_mat_a_beta ) 270 print "saving trans_mat_a_gamma..." 271 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_trans_mat_a_gamma'+ext, self.trans_mat_a_gamma ) 272 print "saving trans_mat_b..." 273 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_trans_mat_b'+ext, self.trans_mat_b ) 274 print "saving trans_mat_b_beta..." 275 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_trans_mat_b_beta'+ext, self.trans_mat_b_beta ) 276 print "saving beta_vec_M..." 277 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_beta_vec_M'+ext, self.beta_vec_M ) 278 print "saving beta_vec_N..." 279 matlab_save_sparsed_matrix(ProjPathName + ProjName + '_beta_vec_N'+ext, self.beta_vec_N ) 280 save_text_vec(ProjPathName + ProjName + "_beta_vec_names.txt",self.beta_vec) 281 print "saving text vectors..." 282 # save_text_vec(ProjPathName + ProjName + "_label_names.txt",[self.fluxes.items()[self.fluxes.values().index(i)][0] for i in range(len(self.fluxes))]) 283 save_text_vec(ProjPathName + ProjName + "_label_names.txt",[a[0] for a in sort_dict_by_values(self.fluxes)]) 284 285 save_text_vec(ProjPathName + ProjName + "_flux_names.txt",self.flux_nw_vec.keys())
286 287 288
289 - def get_flux_name(self, flux_num):
290 ''' 291 returns the name of fluxer whose index is flux_num. 292 @type flux_num: number 293 @param flux_num: The number of the fluxomer we are looking for. 294 @rtype: string 295 @return: The name of the fluxomer. 296 ''' 297 return self.fluxes.items()[self.fluxes.values().index(flux_num)][0]
298
299 - def load_file(self, filename):
300 """ 301 This function loads the input file into the FtblClass object by: 302 1. Dropping comments (lines starting with "//") 303 2. Replacing EOL "\\n" or "\\r" with "" 304 3. Erasing spaces 305 Eventually we are left with a file containing only TABs. 306 This object is then transformed into an array of lists - every array element is a line of the parsed file, and splitted by TABs into a list object. 307 308 @type filename: string 309 @param filename: Name of the input file to be examined. 310 @rtype: list 311 @return: List of lists containing the TAB splitted parsed version of the input file. 312 """ 313 314 if debug_level > 1: 315 print "Opening the ftbl file... ", 316 sys.stdout.flush() 317 f = open(filename,"r") 318 r = [] 319 for line in f: 320 line = line.split("//")[0].replace("\n","").replace("\r","").replace(" ","") 321 if line.replace("\t","") != "" and line != "\t": 322 r.append(line) 323 f.close() 324 return r
325
326 - def find_segment(self, seg_name):
327 ''' 328 This function finds the block "seg_name" block segment in the parsed input file L{self.ftbl_file} object. 329 New segments begin with every non-idented line in the input file. Their names are the first string in these lines. 330 331 @type seg_name: string 332 @param seg_name: Name of the segment to be analyzed 333 @rtype: list 334 @return: A list containing the segment's rows, tab seperated. 335 ''' 336 r = [] 337 338 first_line = -1 339 last_line = -1 340 counting = False 341 for e,i in enumerate(self.ftbl_file): 342 if i[0] != "\t" and counting == True: 343 last_line = e-1 344 counting = False 345 if i.lower().startswith(seg_name.lower()): 346 first_line = e+1 347 counting = True 348 349 if first_line == -1: 350 return "" # if we didn't find the segment, return "" 351 352 remove_leading = False 353 if self.ftbl_file[first_line][0] == "\t": 354 remove_leading = True 355 for i in self.ftbl_file[first_line:last_line+1]: 356 t = i.split("\t") 357 if remove_leading == True: 358 t=t[1:] 359 r.append(t) # we always drop the first tab (since everything starts with tab) 360 361 return r
362
363 - def create_pools_dict(self):
364 """ 365 This function creates the initial metabolic pools dictionary object by analyzing the NETWORK section of the input file. The function creates the L{self.pools} dictrionary object - Metabolic pools dictionary for which the keys are pools names, and the values are the number of carbon atoms for each pool. 366 """ 367 netseg = self.find_segment('NETWORK')[1:] 368 pools = {} 369 for linenum, line in enumerate(netseg): 370 if line[0] != "": 371 for k,j in enumerate(line[1:]): 372 if j != "": 373 pools[j] = len(netseg[linenum+1][k+1])-1 374 self.pools = pools #: Metabolic pools dictionary (Keys are pools names, values are the number of carbon atoms for each pool)
375
376 - def create_flux_dict(self):
377 """ 378 This function creates and updates the main building blocks objects used for the metabolic pathway analysis. 379 380 Classification of fluxes 381 ======================== 382 Classifiction of fluxes into: uni-directional fluxes, bi-directional fluxes (FTBL files only) and input/output fluxes. 383 The uni and bi direictional analysis is only needed for FTBL files (FIA assumes all the fluxes are uni-directional). 384 The decision is made by looking for 0 constrained exchange fluxes in the FLUXES section of the FTBL file. 385 386 Re-generation of the NETWORK section - adding necessary virtual fluxes 387 ====================================================================== 388 When we are given with a chemical reaction that transforms two elements of the same metabolic pool into others, we translate 389 it into a regular multi-input chmecial reaction by adding a virtual pool and flux to our system: 390 391 As an example:: 392 f: A + A -> B + C (flux "f" transforms A+A into B+C) 393 is transformed into:: 394 f: A + A_s -> B + C (flux "f" transofmrs A+A_s ito B+C) 395 f_A: A -> A_s (flux "f_A" transforms A into A_s) 396 397 This transformation is being done on the input file object itself (hence by changing the parsed NETWORK segment of the input file). 398 The same applies for reversed fluxes of bi-diredctional FTBL files fluxes. 399 400 Analysis of extra-cellular pools 401 ================================ 402 Finding pools which are global input or output to the system, and save them in the L{self.excell_pools} object. 403 These pools are recognized as pools with no input / output fluxes going into / exiting them. 404 405 Re-generation of the NETWORK section - adding necessary reversed fluxes 406 ======================================================================= 407 The mathematical analysis of the problem consists of flux vector representing uni-directional fluxes. 408 Due to this reason, we need to transform the NETWORK section to contain only uni-directional fluxes by adding to it necessary reversed 409 fluxes. This only applies when FTBL input files are supplied (since again, FIA files are assumed to contain only uni-diretional fluxes in anyway). 410 411 Generation of the global fluxes vector 412 ====================================== 413 Generation of the global fluxes (as opposed to fluxomers) vector dictionary object. 414 The dictrionary L{self.flux_nw_vec} keys are the names of the global fluxes in our system, 415 and the values are the number of carbon atoms each of them transforms. 416 417 Generation of the equality matrix 418 ================================= 419 The equality matrix L{self.S} and its associated equality vector L{self.S_b} are the mathematical constraints of the global optimization problem 420 That should apply on the global fluxes vector (hence self.S * x == self.S_b). 421 The equality matrix is constructed out of 2 main elements: 422 1. The stoichiometric equations - as analyzed from the parsed NETWORK section of the input file 423 2. The eqaulities constraints from the EQUALITIES section of the input file. 424 425 Generation of the equality measurements 426 ======================================= 427 Equalities arised from the constraints section of the FLUXES section (only applies to FTBL files) are added 428 to the system as measurements and not as constraints (as opposed to equalities from the EQUALITIES section). 429 Note that this only relevant for FTBL files. The python relevant objects are L{self.FCM} and L{self.FCv}, 430 and we have M{FCM*x = FCv} . 431 432 Generation of the global equation list 433 ====================================== 434 The entire metabolic network is defined by a set of fluxomeric equations defined in L{self.eq_list}. 435 The structure of this list is:: 436 [ [ poolname : ([input fluxes], [output fluxes])], ... ] 437 where the input and output fluxes are list of the names of the incoming and outgoing fluxes. 438 """ 439 # First line is the header: 440 netseg = self.find_segment('NETWORK') 441 #: IOvec analyzes the header of the NETWORK section in order to decide which lines are inputs to the metabolic pools, and which are outputs. 442 IOvec = [] 443 for i in netseg[0][1:]: 444 if i != "": 445 IOvec.append( i[0] ) # educt, #product 446 447 FluxHeader = netseg[0] 448 head_range = range(1,5) 449 netseg = netseg[1:] 450 451 # If we are dealing with a FTLB file, we need to find the unidirectional fluxes (for FIA, all the fluxes are unidirectional). We do that by finding the 0 constrained XCH fluxes. 452 unidir_fluxes = [] 453 if self.InputFileType == 'ftbl': 454 fluxseg = self.find_segment('FLUXES') 455 in_xch_sec = False 456 for l in fluxseg: 457 if l[0] == 'XCH': 458 in_xch_sec = True 459 in_net_sec = False 460 if in_xch_sec and l[2]=='C': 461 if float(l[3]) != 0: 462 print "Error while parsing the FLUXES XCH section. Only 0 constraints are allowed." 463 sys.exit(-1) 464 unidir_fluxes.append(l[1].replace("'","")) 465 # end of uni-dir analysis 466 467 # Re-generation of the NETWORK section, looking for fluxes that take 2 elements of their input pool. See the function documentation for more details. 468 # Generation of the fluxes_data object, representing the updated NETWORK section. 469 fluxes_data = [] 470 for linenum, line in enumerate(netseg): 471 if line[0] != "": 472 if line[1] == line[2]: # if we have flux that requires 2 of the same input pool, we add a virtual pool for ease of implementation 473 line[1] = line[1] + "_" + line[0] # add the virutal pool 474 tmpline = [line[0]+"_s", line[2],"",line[1],""] 475 tmpline2 = ["",netseg[linenum+1][1],"",netseg[linenum+1][1],""] 476 self.pools[line[1]]=len(netseg[linenum+1][1])-1 477 unidir_fluxes.append(tmpline[0]) 478 fluxes_data.append( [line,netseg[linenum+1]] ) 479 fluxes_data.append( [tmpline,tmpline2] ) 480 else: 481 fluxes_data.append( [line,netseg[linenum+1]] ) 482 483 # if someone forgot to put "TAB" in the end of the line, we'll add it for him. 484 if len(line) == 4: 485 line+=[""] 486 # if that's a reversed flux we'll add it by hand: 487 if self.InputFileType == 'ftbl' and (line[3]==line[4]) and (line[0] not in unidir_fluxes): # if that's a bidir pool and it becomes 2 instances of the pool 488 # problem if it's a 1->2 flux that goes into an output ... 489 unidir_fluxes.append(line[0]) 490 # add the reversed line: 491 tmpline_2 = [line[0]+"_r",line[3],line[4]+"_"+line[0],line[1],line[2]] 492 tmpline2_2 = ["",netseg[linenum+1][3],netseg[linenum+1][4],netseg[linenum+1][1],netseg[linenum+1][2]] 493 fluxes_data.append( [tmpline_2,tmpline2_2] ) 494 unidir_fluxes.append(tmpline_2[0]) 495 # add the line that goes to line[0]_s: 496 tmpline = [line[0]+"_s",line[3],"",line[4]+"_"+line[0]] 497 unidir_fluxes.append(tmpline[0]) 498 tmpline2 = ["",netseg[linenum+1][3],"",netseg[linenum+1][3],""] 499 self.pools[line[4]+"_"+line[0]]=len(netseg[linenum+1][3])-1 500 fluxes_data.append( [tmpline,tmpline2] ) 501 502 # find excell_pool (find pools which are only input of output - "extra-cellular pools") 503 for p in self.pools: 504 inp = 0 505 out = 0 506 for f in fluxes_data: 507 for ind,a in enumerate(IOvec): 508 if len(f[0])>ind+1: # in case someone forgot to put TABS after his last entered flux 509 if a[0] == "P" and f[0][ind+1] == p: #if it's a product (flux f[0][0] goes into p) 510 inp+=1 511 if a[0] == "E" and f[0][ind+1] == p: #if it's an educt (flux f[0][0] goes out of p) 512 out+=1 513 else: 514 f[0].append('') 515 if inp==0 or out==0: 516 self.excell_pools.append(p) 517 518 # add bidirectional fluxes to fluxes_data for FTBL input files: 519 if self.InputFileType == 'ftbl': 520 fluxseg = self.find_segment('FLUXES') 521 522 print "Found the following unidirectional fluxes:", unidir_fluxes 523 #print "All the rest of the fluxes (except for global input/output) are assumed to be bi-directional." 524 fluxes_data_copy=[list([list(z) for z in k]) for k in fluxes_data] 525 bidir_fluxes = [] 526 io_fluxes_inp = [] 527 io_fluxes_out = [] 528 for l in fluxes_data_copy: 529 excell_flux = False 530 for p in self.excell_pools: 531 if p in l[0][1:3]: 532 excell_flux = True 533 io_fluxes_inp.append(l[0][0]) 534 if p in l[0][3:5]: 535 excell_flux = True 536 io_fluxes_out.append(l[0][0]) 537 if (excell_flux==False) and (l[0][0] not in unidir_fluxes): # if that's a bidirectional flux 538 k=[list(q) for q in l] 539 # Let's make the reverse flux out of the current given flux: 540 k[0][0]=l[0][0]+'_r' 541 k[0][1]=l[0][3] 542 k[0][2]=l[0][4] 543 k[0][3]=l[0][1] 544 k[0][4]=l[0][2] 545 k[1][1]=l[1][3] 546 k[1][2]=l[1][4] 547 k[1][3]=l[1][1] 548 k[1][4]=l[1][2] 549 fluxes_data.append(k) # let's add its reverse flux to the network as well 550 bidir_fluxes.append(l[0][0]) 551 print "Found the following bidirectional fluxes:", bidir_fluxes 552 print "Found the following global input fluxes:", io_fluxes_inp 553 print "Found the following global output fluxes:", io_fluxes_out 554 self.io_fluxes_inp=io_fluxes_inp 555 self.io_fluxes_out=io_fluxes_out 556 io_fluxes = io_fluxes_inp+io_fluxes_out 557 self.fluxes_data = fluxes_data 558 559 # Create the global fluxes dictionary object. A dictionary with flux width for every flux_name 560 flux_nw_vec = {} # fluxes name_width dict 561 562 for f in fluxes_data: 563 cur_width = 0 564 for ind,a in enumerate(IOvec): 565 if a[0] == "E" and f[0][ind+1] != "": #if it's an educt (reactant) (coming out of) 566 cur_width += len(f[1][ind+1]) - 1 # len of (#ABCD) - 1 567 flux_nw_vec [ f[0][0] ] = cur_width # append [fluxname, flux_width] 568 self.flux_nw_vec = flux_nw_vec 569 570 # Create the equality constraints S matrix. 571 # First we add to it the stoichiometric equations: 572 S = sparse.lil_matrix((len(self.pools)-len(self.excell_pools), len(self.flux_nw_vec))) 573 sr = 0 574 for p in self.pools: 575 if p not in self.excell_pools: 576 # for every pool that needs to stay in equilibrium 577 inp = [] 578 out = [] 579 for f in fluxes_data: 580 for ind,a in enumerate(IOvec): 581 if a[0] == "P" and f[0][ind+1] == p: #if it's a product (flux f[0][0] goes into p) 582 inp.append(self.get_metabolic_flux_index(f[0][0])) 583 if a[0] == "E" and f[0][ind+1] == p: #if it's an educt (flux f[0][0] goes out of p) 584 out.append(self.get_metabolic_flux_index(f[0][0])) 585 for k in inp: 586 S[sr, k] = S[sr, k] + 1 587 for k in out: 588 S[sr, k] = S[sr, k] - 1 589 sr += 1 590 591 # Add to S the equality constraints from the EQUALITIES section of the input file 592 S_b = [0]*S.shape[0] 593 eqseg = self.find_segment('EQUALITIES') 594 in_net=False 595 for line in eqseg: 596 if line[0] == 'NET': 597 in_net=True 598 elif line[0] == 'XCH': 599 in_net=False 600 elif in_net==True and line[1] != 'VALUE' and line[1] != '': 601 S_b.append(float(line[1])) 602 cur_formula=line[2] 603 cur_formula.split("+") 604 pos_args=[k.split("-")[0] for k in cur_formula.split("+") if k.split("-")[0] != ''] 605 neg_args=[k.split("+")[0] for k in cur_formula.split("-")[1:] if k.split("+")[0] != ''] 606 for ind,q in enumerate(neg_args): 607 if len(q.split("*")) == 1: 608 q="1*"+q 609 neg_args[ind] = q 610 neg_args[ind]=neg_args[ind].split("*") 611 for ind,q in enumerate(pos_args): 612 if len(q.split("*")) == 1: 613 q="1*"+q 614 pos_args[ind] = q 615 pos_args[ind]=pos_args[ind].split("*") 616 cur_row = S.shape[0] # let's add a new row to S... 617 S = S.reshape( (S.shape[0]+1,S.shape[1]) ) 618 for k in pos_args: 619 if self.InputFileType == 'ftbl': 620 if k[1] in unidir_fluxes or k[1] in io_fluxes: 621 S[cur_row,self.get_metabolic_flux_index(k[1])] += float(k[0]) 622 elif k[1] in bidir_fluxes: 623 S[cur_row,self.get_metabolic_flux_index(k[1])] += float(k[0]) 624 S[cur_row,self.get_metabolic_flux_index(k[1]+"_r")] -= float(k[0]) 625 else: 626 S[cur_row,self.get_metabolic_flux_index(k[1])] += float(k[0]) 627 for k in neg_args: 628 if self.InputFileType == 'ftbl': 629 if k[1] in unidir_fluxes or k[1] in io_fluxes: 630 S[cur_row,self.get_metabolic_flux_index(k[1])] -= float(k[0]) 631 elif k[1] in bidir_fluxes: 632 S[cur_row,self.get_metabolic_flux_index(k[1])] -= float(k[0]) 633 S[cur_row,self.get_metabolic_flux_index(k[1]+"_r")] += float(k[0]) 634 else: 635 S[cur_row,self.get_metabolic_flux_index(k[1])] -= float(k[0]) 636 #: Stoichiometric equality vector 637 self.S_b = matrix(S_b).T 638 #: Stoichiometric matrix (with equality constraints) 639 self.S = sparse.csr_matrix(S) 640 641 FCM = [] #sparse.lil_matrix( (1,S.shape[1]) ) 642 643 FCv = [] #sparse.lil_matrix( (1,1) ) # The (FCM*x = FCv) vector 644 645 # Add the C constraints from the FLUXES section to the measurements matrices (as opposed to the EQUALITIES constraints): 646 if self.InputFileType == 'ftbl': 647 fluxseg = self.find_segment('FLUXES') 648 unidir_fluxes = [] 649 in_xch_sec = False 650 for l in fluxseg: 651 if l[0] == 'NET': 652 in_xch_sec = False 653 in_net_sec = True 654 if l[0] == 'XCH': 655 in_xch_sec = True 656 in_net_sec = False 657 if in_xch_sec and l[2]=='C': 658 if float(l[3]) != 0: 659 print "Error while parsing the FLUXES XCH section. Only 0 constraints are allowed." 660 sys.exit(-1) 661 if in_net_sec and l[2]=='C': 662 if (l[1] not in unidir_fluxes) and (l[1] not in io_fluxes): 663 ## FCM.append([l[1],l[1]+'_r']) 664 print "Constraints over bi-directional fluxes are currently not supported." 665 print l[1] 666 print io_fluxes 667 sys.exit(-1) 668 else: 669 FCM.append(l[1]) 670 FCv.append(l[3]) 671 #: Fluxes constraints matrix from the FLUXES section - added as measurements (FCM*x = FCv) 672 self.FCM = FCM 673 #: Fluxes constraints values from the FLUXES section - added as measurements (FCM*x = FCv) 674 self.FCv = FCv 675 676 # Create full fluxes vector (with isotope labels) 677 fluxes = [] # full flux name list 678 for f in flux_nw_vec: 679 for i in range(2**flux_nw_vec[f]): # 2^flux_width 680 fluxes.append(f+"/"+num2bin(i,flux_nw_vec[f])) 681 flux_dict = {} 682 for k in fluxes: 683 flux_dict[k] = fluxes.index(k) 684 self.fluxes = flux_dict # dictionary with flux names and indeces 685 686 # Create the equation list. For each flux: [ poolname : ([input], [output])] 687 eq_list = [] 688 for p in self.pools: # for every pool P 689 #if p not in self.excell_pools: # if that's not an extra-cellular pools 690 if 1: 691 for iso in range(2**self.pools[p]): # for each isotope of pool P 692 iso_bin = num2bin(iso,self.pools[p]) 693 # find all incoming fluxes: 694 tmp_flux = "" 695 for f in fluxes_data: 696 # Create an isotope template for this flux: 697 tmp_flux_carbons = "" # temp carbons string 698 for i in head_range: 699 if FluxHeader[i][0] == 'E': 700 tmp_flux_carbons = tmp_flux_carbons + f[1][i][1:] 701 # If necessary, replace the letters with our selected isotope 702 saved_tmp_flux_carbons = tmp_flux_carbons # let's make sure that if the same flux comes twice, we count it twice. 703 for k in head_range: 704 tmp_flux_carbons = saved_tmp_flux_carbons 705 if f[0][k] == p: 706 # let's go over all the carbons of our educt metabolite and replace its carbons with iso_bin 707 for ind,i in enumerate(f[1][k][1:]): 708 tmp_flux_carbons = tmp_flux_carbons.replace(i, iso_bin[ind]) 709 # let's replace the carbons left with \x00 (x's) 710 a = FluxHeader[k][0]; 711 if a == "E": #if it's an educt (reactant) we add it to the equation 712 tmp_flux += "*+" 713 else: #if it's a product, we take it off our equation 714 tmp_flux += "*-" 715 tmp_flux += f[0][0] + "/" # add the flux name and "/" 716 for i in tmp_flux_carbons: 717 if i == "0" or i == "1": 718 tmp_flux +=i 719 else: 720 tmp_flux +="\x00" 721 in_flux = [] 722 out_flux = [] 723 for k in tmp_flux.split("*")[1:]: 724 if k[0] == "+": 725 for z in expand_x_values(k[1:]): 726 #in_flux.append(self.fluxes[z]) 727 in_flux.append(z) 728 else: 729 for z in expand_x_values(k[1:]): 730 #out_flux.append(self.fluxes[z]) 731 out_flux.append(z) 732 eq_list.append([p, in_flux, out_flux, iso_bin]) 733 #: Global equation list. Each element is built as: [ poolname : ([input fluxomers], [output fluxomers])] 734 self.eq_list = eq_list
735
736 - def get_metabolic_flux_index(self, flux_name):
737 """ 738 This function returns the index of the global metabolic flux "flux_name". 739 740 @type flux_name: string 741 @param flux_name: The name of the flux we want to find. 742 @rtype: number 743 @return: The index of the global metabolic flux "flux_name". 744 """ 745 return self.flux_nw_vec.keys().index(flux_name)
746
747 - def create_flux_meas_mat(self):
748 ''' 749 This function parses the FLUX_MEASUREMENTS segment of the ftbl file. 750 It creates the following matrices: 751 1. L{self.flux_meas_mat} - The matrix choosing the measured metabolic fluxes out of the fluxomer vector. 752 2. L{self.flux_meas_values} - The measured values. 753 3. L{self.flux_meas_variance} - The measurement variance vector. 754 Eventually we will search for:: 755 M{min [ (self.flux_meas_mat * x - self.flux_meas_values_).T * diag(self.flux_meas_variance) * (self.flux_meas_mat * x - self.flux_meas_values_)] } 756 ''' 757 if debug_level > 1: 758 print "Creating the input flux measurement matrices (flux_meas_*)... ", 759 sys.stdout.flush() 760 # Find out what labels we have 761 # labels is a list with the names of the measured labels 762 fluxseg = self.find_segment("FLUX_MEASUREMENTS") 763 self.fluxseg = fluxseg 764 765 flux_meas_mat = zeros( (len(fluxseg)-1+len(self.FCM),len(self.fluxes)) ) 766 flux_meas_values = zeros( (len(fluxseg)-1+len(self.FCM),1) ) 767 flux_meas_var = zeros( (len(fluxseg)-1+len(self.FCM),1) ) 768 769 tU = matrix(self.U.toarray()) 770 for linenum, line in enumerate(fluxseg[1:]): 771 flux_meas_mat[linenum,:] = tU[self.get_metabolic_flux_index(line[0])] 772 flux_meas_values[linenum] = float(line[1]) 773 flux_meas_var[linenum] = float(line[2]) 774 775 cur_line = linenum+1 776 for (flxname,val) in zip(self.FCM, self.FCv): 777 #for ind,k in enumerate(flxname): 778 # print k 779 # print self.nw_flux_vec 780 flux_meas_mat[cur_line,:] = tU[self.get_metabolic_flux_index(flxname)] 781 flux_meas_values[cur_line] = float(val) 782 flux_meas_var[cur_line] = 1e-7 ### TEMPORARY VALUE 783 cur_line+=1 784 #: Flux values measurement matrix 785 self.flux_meas_mat = flux_meas_mat 786 #: Flux values measurement vector 787 self.flux_meas_values = flux_meas_values 788 #: Flux values measurement variance vector 789 self.flux_meas_var = flux_meas_var 790 print ' done.' 791 sys.stdout.flush()
792
793 - def create_global_meas_mat(self):
794 """ 795 This function creates the objective function for the MFA optmization problem 796 (the global "G" measurement matrix). 797 798 The objective of the MFA problem is to minimize:: 799 M{|| tG * x - tB ||^2} 800 801 The matrix L{self.tG} is created by concatanation of the 3 possible measurments matrices: 802 1. Label measurements (L{self.labelinp_num} and L{self.labelinp_denom}). 803 2. Fluxes measurement (L{self.labelmeas_num} and L{self.labelmeas_denom}). 804 3. Mass-Spectrometry measurements (L{self.ms_meas_num} and L{self.ms_meas_denom}). 805 806 The vector L{self.tB} is created by concatanation of appropriate values of the above. 807 """ 808 809 print "Creating the MFA optimization objective matrix... ", 810 sys.stdout.flush() 811 # The variance of the measured input labels is assumed to be the constant: self.labelinp_var 812 label_inp_sigma = (self.labelinp_var**(-1))*ones( (self.labelinp_value.shape[0],1) ) 813 814 # Fisrt we add the label input measurements: 815 LS_num_mat = (sdiag(label_inp_sigma)*self.labelinp_num).toarray() 816 LS_value = self.labelinp_value.toarray() 817 LS_denom_mat = (sdiag(label_inp_sigma)*self.labelinp_denom).toarray() 818 819 # now, if needed, we add the label measurements: 820 if len(self.labelmeas_value) > 1: 821 LS_num_mat = concatenate( (LS_num_mat, sdiag(one_div_vec(self.labelmeas_var))*self.labelmeas_num.toarray()) ) 822 LS_value = concatenate( (LS_value, matrix(self.labelmeas_value).T)) 823 LS_denom_mat = concatenate( (LS_denom_mat, (sdiag(one_div_vec(self.labelmeas_var))*self.labelmeas_denom).toarray()) ) 824 825 # Last but not least, the mass-spectrometry measurements: 826 if len(self.ms_meas_value) > 1: 827 LS_num_mat = concatenate( (LS_num_mat, sdiag(one_div_vec(self.ms_meas_var))*self.ms_meas_num.toarray()) ) 828 LS_value = concatenate( (LS_value, matrix(self.ms_meas_value).T) ) 829 LS_denom_mat = concatenate( (LS_denom_mat, (sdiag(one_div_vec(self.ms_meas_var))*self.ms_meas_denom).toarray()) ) 830 831 G = LS_num_mat - sdiag(LS_value)*LS_denom_mat 832 self.G = G 833 #: Global measurement matrix. The optimization problem seeks to minimize || tG*x - tB || 834 self.tG = sparse.csr_matrix(vstack( (self.G,sparse.spdiags(10/self.flux_meas_var.T,[0],len(self.flux_meas_var),len(self.flux_meas_var))* self.flux_meas_mat) ) ) 835 #: Global measurement vector. The optimization problem seeks to minimize || tG*x - tB || 836 self.tB = vstack( (zeros( (shape(self.G)[0],1) ) ,sparse.spdiags(10/self.flux_meas_var.T,[0],len(self.flux_meas_var),len(self.flux_meas_var))*self.flux_meas_values) ) 837 838 # vector that indicates the number of input flux the measurement was taken from. used for speeding up things. 839 self.labelinp_num_fluxes = nonzero(self.U*(self.labelinp_denom).T)[0] 840 print " done." 841 sys.stdout.flush()
842
843 - def trnasform_eq_into_matrix(self, eq):
844 """ 845 This function transforms string equation of fluxes into a fluxomers stoichiometric matrix. 846 For example:: 847 "*+flux1/0*-flux2/1" -> [1 0 0 0; 0 0 0 -1]) 848 849 @type eq: string 850 @param eq: An string equation. Every equation element is seperated by either "*+" or "*-" with the fluxomer name following it. 851 No preciding numbers are allowed, only plain fluxomer names. 852 For multiple instances of the same fluxomer, simply add/substract it more than once to the equation. 853 @rtype: CSR matrix 854 @return: The fluxomer stoichiometric matrix represented by eq. 855 """ 856 S = sparse.lil_matrix((len(eq), len(self.fluxes)) ) 857 for (ind, k) in enumerate(eq): 858 t = k.split("*")[1:] 859 for h in t: 860 m = self.fluxes[h[1:]] 861 if h[0] == "+": 862 S[ind, m] = S[ind, m] + 1 863 else: 864 S[ind, m] = S[ind, m] - 1 865 return S.tocsr()
866 867
868 - def create_label_meas_eq(self):
869 ''' 870 This function parses the LABEL_MEASUREMENTS segment of the ftbl file. 871 The output of the function is the matrices and vectors: 872 1. L{self.labelmeas_num} - The label measurements numerator matrix 873 2. L{self.labelmeas_denom} - The label measurements denominator matrix 874 3. L{self.labelmeas_value} - The label measurements values vector 875 4. L{self.labelmeas_var} - The label measurements values vector 876 877 The objective function will be to minimize:: 878 M{ ||diag(self.labelmeas_var) * [self.labelmeas_num - diag(self.labelmeas_value) * self.labelmeas_denom]*x ||^2 } 879 ''' 880 if debug_level > 1: 881 print "Creating the label measurement matrices (label_meas_*)... ", 882 sys.stdout.flush() 883 # Find out what labels we have 884 # labels is a list with the names of the measured labels 885 labseg = self.find_segment("LABEL_MEASUREMENTS") 886 self.labseg = labseg 887 labels = [] 888 meas_numerator = [] 889 meas_value = [] 890 meas_var = [] 891 cur_label = "" 892 for linenum, line in enumerate(labseg): 893 if line[0] != "": 894 cur_label = line[0] 895 labels.append(cur_label) 896 if len(line) >= 4: 897 meas_numerator.append(cur_label +"/"+line[4][1:].replace("x","\x00")) 898 meas_value.append(line[2]) 899 meas_var.append(line[3]) 900 901 # now meas_numerator is a list constructed as follows: [ 'EDUCT_NAME1/ISOTOPE1', 'EDUCT_NAME2/ISOTOPE2',...] 902 # meas_value are the measurement values for the numerators of meas_numerator 903 # meas_var is the variance for the above vectors. 904 res_num = [] 905 res_denom = [] 906 # now we need to translate the pool and isotope to our flux variables. 907 908 for (mn,mv, mvar) in zip(meas_numerator[1:], meas_value[1:], meas_var[1:]): 909 (fname, iso_bin) = mn.split("/") 910 iso_bin_group = expand_x_values(iso_bin) 911 iso_dem_group = expand_x_values(len(iso_bin)*'\x00') 912 tmp_num_eq = "*+" 913 tmp_dem_eq = "*+" 914 #Found=False 915 for k in self.eq_list: 916 #if k[0] == fname and (Found==False) and k[3] in iso_bin_group: 917 if k[0] == fname and k[3] in iso_bin_group: 918 #Found=True 919 tmp_num_eq += "*+".join(k[1]) + "*+" #+q #q.split("/")[0]+"/"+iso_bin 920 # for k in self.eq_list: 921 if k[0] == fname and k[3] in iso_dem_group: 922 tmp_dem_eq += "*+".join(k[1]) + "*+" #+q #q.split("/")[0]+"/"+iso_bin 923 924 res_num.append(tmp_num_eq[:-2]) 925 res_denom.append(tmp_dem_eq[:-2]) 926 if len(res_num)>0: 927 #: label measurement numerator matrix 928 self.labelmeas_num = self.trnasform_eq_into_matrix(res_num) 929 #: label measurement denominator matrix 930 self.labelmeas_denom = self.trnasform_eq_into_matrix(res_denom) 931 #: label measurement values vector 932 self.labelmeas_value = sparse.lil_matrix((len(meas_value[1:]),1)) 933 #self.labelmeas_value = zeros((len(meas_value[1:]),1)) 934 for (x,y) in enumerate(meas_value[1:]): 935 if float(y) == 0: 936 y='1e-7' 937 self.labelmeas_value[x,0] = float(y) 938 self.labelmeas_value.tocsr() 939 #self.labelmeas_value = sparse.dok_matrix(self.labelmeas_value) 940 #: label measurement variance vector 941 self.labelmeas_var = sparse.lil_matrix((len(meas_var[1:]),1) ) 942 for (x,y) in enumerate(meas_var[1:]): 943 self.labelmeas_var[(x,0)] = float(y) 944 self.labelmeas_var.tocsr() 945 self.labelmeas_value = self.labelmeas_value.T.toarray()[0] 946 else: 947 self.labelmeas_num = [] 948 self.labelmeas_denom = [] 949 self.labelmeas_var = [] 950 self.labelmeas_value = [] 951 print " done." 952 sys.stdout.flush()
953
954 - def create_ms_meas_eq(self):
955 ''' 956 This function parses the MASS_SPECTROMETRY segment of the ftbl file. 957 The output of the function is the matrices and vectors: 958 1. L{self.ms_meas_num} - The MS measurements numerator matrix 959 2. L{self.ms_meas_denom} - The MS measurements denominator matrix 960 3. L{self.ms_meas_value} - The MS measurements values vector 961 4. L{self.ms_meas_var} - The MS measurements values vector 962 963 We always treat these measurement as a ratio measurement between the MS values, relative to the 0 MS measurement 964 (hence we add to the objective the ratios: mass1/mass0, mass2/mass0 etc'). 965 966 The objective function will be to minimize:: 967 M{ ||diag(self.ms_meas_var) * [self.ms_meas_num - diag(self.ms_meas_value) * self.ms_meas_denom]*x ||^2 } 968 969 ''' 970 if debug_level > 1: 971 print "Creating the mass spectrometry measurement matrices (ms_meas_*)... ", 972 sys.stdout.flush() 973 # Find out what labels we have 974 # labels is a list with the names of the measured labels 975 msseg = self.find_segment("MASS_SPECTROMETRY") 976 self.msseg = msseg 977 978 labels = [] 979 ms_numerator = [] 980 ms_value = [] 981 ms_var = [] 982 ms_denom = [] 983 cur_label = "" 984 denom_items_counter = 0 985 for linenum, line in enumerate(msseg[1:]): 986 if line[0] != "": 987 cur_label = line[0] 988 for k in range(denom_items_counter): 989 t_save_denom = [save_denom[0][0]] 990 for q in save_denom: 991 t_save_denom+=q[1:] 992 ms_denom.append(t_save_denom) 993 ms_value.append(save_value_vec[k]/save_value) 994 save_denom = [] 995 save_value_vec = [] 996 save_value = 0 997 denom_items_counter = 0 998 if line[1] != "": 999 cur_indices = [int(k)-1 for k in line[1].split(",")] 1000 if len(line) >= 4: 1001 denom_items_counter+=1 1002 ms_is_vec = create_ms_isotopomers_dict(self.pools[cur_label], cur_indices) 1003 nume = [cur_label]+[q.replace("x","\x00") for q in ms_is_vec[int(line[2])]] 1004 ms_numerator.append(nume) 1005 save_denom.append(nume) 1006 save_value+=float(line[3]) 1007 save_value_vec.append(float(line[3])) 1008 ms_var.append(line[4]) 1009 1010 for k in range(denom_items_counter): 1011 t_save_denom = [save_denom[0][0]] 1012 for q in save_denom: 1013 t_save_denom+=q[1:] 1014 ms_denom.append(t_save_denom) 1015 ms_value.append(save_value_vec[k]/save_value) 1016 1017 # now meas_numerator is a list constructed as follows: [ 'EDUCT_NAME1/ISOTOPE1', 'EDUCT_NAME2/ISOTOPE2',...] 1018 # meas_value are the measurement values for the numerators of meas_numerator 1019 # meas_var is the variance for the above vectors. 1020 res_num = [] 1021 res_denom = [] 1022 # now we need to translate the pool and isotope to our flux variables. 1023 1024 for (mn,md, mv, mvar) in zip(ms_numerator, ms_denom, ms_value, ms_var): 1025 #fname_isobin = [k.split("/") for k in mn] 1026 iso_bin_group = [] 1027 for k in mn[1:]: 1028 iso_bin_group+=expand_x_values(k) 1029 fname=mn[0] 1030 iso_dem_group=[] 1031 fdnom_name=md[0] 1032 for k in md[1:]: 1033 iso_dem_group+=expand_x_values(k) 1034 1035 tmp_num_eq = "*+" 1036 tmp_dem_eq = "*+" 1037 #Found=False 1038 for k in self.eq_list: 1039 #if k[0] == fname and (Found==False) and k[3] in iso_bin_group: 1040 if k[0] == fname and k[3] in iso_bin_group: 1041 #Found=True 1042 if fname in self.excell_pools: 1043 if len(k[1])>0: 1044 tmp_num_eq += "*+".join(k[1]) + "*+" #+q #q.split("/")[0]+"/"+iso_bin 1045 else: 1046 tmp_num_eq += "*+".join(k[2]) + "*+" #+q #q.split("/")[0]+"/"+iso_bin 1047 else: 1048 tmp_num_eq += "*+".join(k[1]) + "*+" #+q #q.split("/")[0]+"/"+iso_bin 1049 # for k in self.eq_list: 1050 if k[0] == fdnom_name and k[3] in iso_dem_group: 1051 if fdnom_name in self.excell_pools: 1052 if len(k[1])>0: 1053 tmp_dem_eq += "*+".join(k[1]) + "*+" #+q #q.split("/")[0]+"/"+iso_bin 1054 else: 1055 tmp_dem_eq += "*+".join(k[2]) + "*+" #+q #q.split("/")[0]+"/"+iso_bin 1056 else: 1057 tmp_dem_eq += "*+".join(k[1]) + "*+" #+q #q.split("/")[0]+"/"+iso_bin 1058 1059 res_num.append(tmp_num_eq[:-2]) 1060 res_denom.append(tmp_dem_eq[:-2]) 1061 if len(res_num)>0: 1062 #print res_num 1063 #: Mass-spectrometry numerator matrix 1064 self.ms_meas_num = self.trnasform_eq_into_matrix(res_num) 1065 #: Mass-spectrometry dominator matrix 1066 self.ms_meas_denom = self.trnasform_eq_into_matrix(res_denom) 1067 #: Mass-spectrometry measurement values vector 1068 self.ms_meas_value = sparse.lil_matrix((len(ms_value),1)) 1069 1070 #self.labelmeas_value = zeros((len(meas_value[1:]),1)) 1071 for (x,y) in enumerate(ms_value): 1072 if float(y) == 0: 1073 y='1e-7' 1074 self.ms_meas_value[x,0] = float(y) 1075 self.ms_meas_value.tocsr() 1076 #self.labelmeas_value = sparse.dok_matrix(self.labelmeas_value) 1077 self.ms_meas_var = sparse.lil_matrix((len(ms_var),1) ) 1078 for (x,y) in enumerate(ms_var): 1079 self.ms_meas_var[(x,0)] = float(y) 1080 #: Mass-spectrometry measurement variance vector 1081 self.ms_meas_var.tocsr() 1082 self.ms_meas_value = self.ms_meas_value.T.toarray()[0] 1083 else: 1084 self.ms_meas_num = [] 1085 self.ms_meas_denom = [] 1086 self.ms_meas_value = [] 1087 self.ms_meas_var = [] 1088 print " done." 1089 sys.stdout.flush()
1090
1091 - def create_U_matrix(self):
1092 ''' 1093 This function constructs the U matrix, which transforms the fluxomers vector 1094 into the metaoblic fluxes vector. hence:: 1095 M{u = U*x} 1096 where:: 1097 u - the metabolic flux vector 1098 x - the fluxomers vector 1099 ''' 1100 if debug_level > 1: 1101 print "Creating the U matrix...", 1102 sys.stdout.flush() 1103 eq = [] 1104 for f in self.flux_nw_vec: 1105 m = "*+" + f + "/"+"\x00"*(self.flux_nw_vec[f]) 1106 eq.append("*"+"*".join(expand_x_values(m))) 1107 self.U = sparse.csr_matrix(self.trnasform_eq_into_matrix(eq)) 1108 print " done." 1109 sys.stdout.flush()
1110
1111 - def create_inp_eq_matrix(self):
1112 ''' 1113 This function parses the LABEL_INPUT segment. 1114 The output of the function is the matrices and vectors: 1115 1. L{self.labelinp_num} - The flux value measurements numerator matrix 1116 2. L{self.labelinp_denom} - The flux value measurements denominator matrix 1117 3. L{self.labelinp_value} - The flux value measurements values vector 1118 1119 The objective function will be to minimize:: 1120 M{ ||diag(self.labelinp_var) * [self.labelinp_num - diag(self.labelinp_value) * self.labelinp_denom]*x ||^2 } 1121 1122 @note: since not supplied by the FTBL files, the value of self.labelinp_value is a global assigned constant. 1123 1124 ''' 1125 if debug_level > 1: 1126 print "Creating the input label measurements matrices (labelinp_*)... ", 1127 sys.stdout.flush() 1128 # Find out what labels we have 1129 # labels is a list with the names of the measured labels 1130 labelinpseg = self.find_segment("LABEL_INPUT")[1:] 1131 labels = [] 1132 meas_eq = [] 1133 meas_value = [] 1134 meas_label = [] 1135 cur_label = "" 1136 for linenum, line in enumerate(labelinpseg): 1137 if line[0] != "": 1138 cur_label = line[0] 1139 if len(line) >= 3: 1140 meas_label.append( cur_label ) 1141 meas_eq.append( line[1][1:] ) 1142 meas_value.append(line[2]) 1143 1144 # now meas_eq is a list of the equality isotopes 1145 # meas_value are the measurement values for the numerators of meas_numerator 1146 # meas_label is the name of the label for which meas_eq holds 1147 1148 res_num = [] 1149 res_denom = [] 1150 # now we need to translate the pool and isotope to our flux variables. 1151 for fname, iso_bin in zip(meas_label, meas_eq): 1152 tmp_num_eq = "" 1153 tmp_dem_eq = "" 1154 Found=False 1155 a = [] 1156 for k in self.eq_list: 1157 for q in k[1]: 1158 if k[0] == fname and Found==False: 1159 Found=True 1160 tmp_num_eq += q.split("/")[0]+"/"+iso_bin 1161 a.append(q.split("/")[0]) 1162 res_num.append("*+"+"*+".join(expand_x_values(tmp_num_eq))) 1163 a = set(a) 1164 tmp_dem_eq_a = [] 1165 for x in a: 1166 m = "*+" + x + "/"+"\x00"*(self.flux_nw_vec[x]) 1167 tmp_dem_eq_a.append("*"+"*".join(expand_x_values(m))) 1168 res_denom.append("".join(tmp_dem_eq_a)) 1169 #: Input label measurement numerator matrix 1170 self.labelinp_num = self.trnasform_eq_into_matrix(res_num) 1171 #: Input label measurement denominator matrix 1172 self.labelinp_denom = self.trnasform_eq_into_matrix(res_denom) 1173 #: Input label measurement value vector 1174 self.labelinp_value = sparse.lil_matrix((len(meas_value),1) ) 1175 1176 for (x,y) in enumerate(meas_value): 1177 self.labelinp_value[x,0] = float(y) 1178 self.labelinp_value.tocsr() 1179 1180 print " done." 1181 sys.stdout.flush()
1182
1184 ''' 1185 This function creates the propogation transition matrix:: 1186 \M{x(t) = P x(t-1)} . 1187 1188 P is provided using the following sub matrices: 1189 1. L{self.trans_mat_a} - the M{H_1} matrix (in CSR format). 1190 2. L{self.trans_mat_b} - the M{H_2} matrix (in CSR format). 1191 3. L{self.trans_mat_a_gamma} - the M{g_1} matrix (in CSR format). 1192 4. L{self.trans_mat_a_beta} - the M{g_2} matrix (in CSR format). 1193 5. L{self.trans_mat_b_beta} - the M{g_3} matrix (in CSR format) 1194 1195 In addition, this function creates the beta (ratios) vector: 1196 1. L{self.beta_vec} - the text names of the beta vector elements 1197 2. L{self.beta_vec_M} - the numerator for construction of the beta vec. 1198 3. L{self.beta_vec_N} - the denominator for constuction of the beta vec. 1199 eventually, we have:: 1200 M{beta_vec = (self.beta_vec_M * u) / (self.beta_vec_N * u)} 1201 1202 ''' 1203 print "Creating the transition matrices... ", 1204 sys.stdout.flush() 1205 # First line is the header: 1206 netseg = self.find_segment('NETWORK') 1207 IOvec = [] 1208 for i in netseg[0][1:]: 1209 if i != "": 1210 IOvec.append( i[0] ) # educt, #product 1211 1212 fluxes_data = self.fluxes_data 1213 flux_nw_vec = self.flux_nw_vec 1214 1215 fluxes = self.fluxes 1216 1217 # Create the beta vector: 1218 beta_vec = [] 1219 1220 for f in fluxes_data: 1221 cur_width = 0 1222 for ind,a in enumerate(IOvec): 1223 if a[0] == "E" and f[0][ind+1] != "": #if it's an educt (going into this pool) 1224 beta_vec.append(f[0][ind+1] + "/" + f[0][0]) # beta_vec = "POOLNAME/FLUXNAME" 1225 1226 beta_vec = list(set(beta_vec)) # is this correct..? 1227 beta_vec.sort() 1228 1229 for f in self.flux_nw_vec: 1230 beta_vec.append(f) # beta has all the fluxes (and later we'll add all the ratios) 1231 #: Beta vector names 1232 self.beta_vec = dict([[beta_vec[k],k] for k in range(len(beta_vec))]) 1233 #: Beta vector numerator matrix 1234 self.beta_vec_M = sparse.lil_matrix((len(self.beta_vec),len(self.flux_nw_vec)+1)) 1235 #: Beta vector denominator matix 1236 self.beta_vec_N = sparse.lil_matrix((len(self.beta_vec),len(self.flux_nw_vec))) 1237 1238 z = [k.split("/") for k in beta_vec] 1239 1240 fl=flux_nw_vec.keys() 1241 for i,k in enumerate(self.beta_vec): 1242 t = [q[-1] for q in z if q[0] == z[i][0]] 1243 for p in t: 1244 self.beta_vec_N[i,fl.index(p)]=1 1245 1246 self.beta_vec_M[i,fl.index(z[i][-1])]=1 1247 1248 self.beta_vec_M.tocsr() 1249 self.beta_vec_N.tocsr() 1250 l=len(self.fluxes) 1251 visited_fluxes = [] 1252 1253 if display_prog: 1254 progbar = progress_bar.ProgressBar(prog_width) 1255 1256 sys.stdout.flush() 1257 #progbar = progressbar.ProgressBar().start() 1258 1259 # coo matrix initialization 1260 # Coo matrices are used here in order to speed up the construction process of the matrices. 1261 trans_mat_a_row = [] 1262 trans_mat_a_col = [] 1263 trans_mat_a_beta_row = [] 1264 trans_mat_a_beta_col = [] 1265 trans_mat_a_gamma_row = [] 1266 trans_mat_a_gamma_col = [] 1267 trans_mat_b_row = [] 1268 trans_mat_b_col = [] 1269 trans_mat_b_beta_row = [] 1270 trans_mat_b_beta_col = [] 1271 1272 for i,e in enumerate(self.eq_list): 1273 # enable if propogation prints are needed: 1274 #if i%20 == 0: 1275 #print str(100*float(i)/len(self.eq_list)) + "% done." 1276 if display_prog: 1277 progbar.update(100*float(i)/len(self.eq_list)) 1278 #print len(e[1]), len(e[2]) 1279 k = [p.split("/")[0] for p in e[1]] 1280 for f in e[1]: 1281 fspl = f.split("/")[0] 1282 f_ind = self.fluxes[f] 1283 if k.count(fspl) == 1: # if that's simple pool (hence the whole flux is coming out of it) 1284 for p in e[2]: 1285 trans_mat_a_row.append(f_ind) 1286 trans_mat_a_col.append(self.fluxes[p]) # -> trans_mat_a calculates the input flux to the specific pool 1287 trans_mat_a_beta_row.append(f_ind) 1288 trans_mat_a_beta_col.append(self.beta_vec[e[0]+"/"+fspl]) # beta -> the ratio of flux fspl in the current pool 1289 trans_mat_a_gamma_row.append(f_ind) # gamma is just 1 1290 trans_mat_a_gamma_col.append(len(self.beta_vec)) # gamma is just 1 1291 1292 trans_mat_b_row.append(f_ind) # b should be just 1 here... 1293 trans_mat_b_col.append(len(self.fluxes)) 1294 trans_mat_b_beta_row.append(f_ind) 1295 trans_mat_b_beta_col.append(len(self.beta_vec)) 1296 else: 1297 #if k.count(f.split('/')[0]) > 1: 1298 # if it's 1->2 or 2->2 (hence the flux is not determined solely by 1 equation) 1299 if f not in visited_fluxes: 1300 visited_fluxes.append(f) 1301 for p in e[2]: 1302 trans_mat_a_row.append(f_ind) 1303 trans_mat_a_col.append(self.fluxes[p]) 1304 trans_mat_a_beta_row.append(f_ind) 1305 trans_mat_a_beta_col.append(self.beta_vec[e[0]+"/"+fspl]) 1306 try: 1307 trans_mat_a_gamma_row.append(f_ind) 1308 trans_mat_a_gamma_col.append(self.beta_vec[fspl]) 1309 except: 1310 print fspl, self.beta_vec[fspl], len(self.beta_vec) 1311 sys.exit(0) 1312 aklsjdkj 1313 else: 1314 for p in e[2]: 1315 trans_mat_b_row.append(f_ind) 1316 trans_mat_b_col.append(self.fluxes[p]) 1317 trans_mat_b_beta_row.append(f_ind) 1318 trans_mat_b_beta_col.append(self.beta_vec[e[0]+"/"+fspl]) 1319 # now let's put 1's for the input fluxes...: 1320 # converting and making sure the max element of a_beta, a_gamma and b_beta is 1 1321 trans_mat_a = sparse.csr_matrix( ([1]*len(trans_mat_a_row), (trans_mat_a_row, trans_mat_a_col) ),shape=(len(self.fluxes),len(self.fluxes)), dtype=float64) 1322 trans_mat_a_beta = sparse.coo_matrix(([1]*len(trans_mat_a_beta_row), (trans_mat_a_beta_row, trans_mat_a_beta_col) ), shape=(len(self.fluxes),len(self.beta_vec)+1),dtype=float64).todok() 1323 trans_mat_a_gamma = sparse.coo_matrix(([1]*len(trans_mat_a_gamma_row), (trans_mat_a_gamma_row, trans_mat_a_gamma_col) ), shape=(len(self.fluxes),len(self.beta_vec)+1),dtype=float64).todok() 1324 trans_mat_b = sparse.csr_matrix(([1]*len(trans_mat_b_row), (trans_mat_b_row, trans_mat_b_col) ), shape=(len(self.fluxes),len(self.fluxes)+1),dtype=float64) # we add one in case we don't need a product... 1325 trans_mat_b_beta = sparse.coo_matrix(([1]*len(trans_mat_b_beta_row), (trans_mat_b_beta_row, trans_mat_b_beta_col) ),shape=(len(self.fluxes),len(self.beta_vec)+1),dtype=float64).todok() 1326 1327 a=trans_mat_a_beta.sum(1).tolist() 1328 for i,k in enumerate(a): 1329 if k==0 or k==[0]: 1330 trans_mat_a[i,i]=1 1331 trans_mat_a_beta[i,len(self.beta_vec)]=1 1332 trans_mat_a_gamma[i,len(self.beta_vec)]=1 1333 trans_mat_b[i,len(self.fluxes)] = 1 1334 trans_mat_b_beta[i,len(self.beta_vec)] = 1 1335 1336 #: The M{H_1} matrix (in CSR format). 1337 self.trans_mat_a = trans_mat_a.tocsr() 1338 #self.trans_mat_a = trans_mat_a 1339 #: The M{g_2} matrix (in CSR format). 1340 self.trans_mat_a_beta = trans_mat_a_beta.tocsr() 1341 #: The M{g_1} matrix (in CSR format). 1342 self.trans_mat_a_gamma = trans_mat_a_gamma.tocsr() 1343 #: The M{H_2} matrix (in CSR format). 1344 self.trans_mat_b = trans_mat_b.tocsr() 1345 #self.trans_mat_b = trans_mat_b 1346 #: The M{g_3} matrix (in CSR format). 1347 self.trans_mat_b_beta = trans_mat_b_beta.tocsr() 1348 1349 if display_prog: 1350 progbar.update(100) 1351 print " done." 1352 sys.stdout.flush()
1353
1354 - def create_lu_trans_matrix(self):
1355 """ 1356 This function computes the LU decomposition of the propogation transition matrices. 1357 We find a factorization for:: 1358 L{self.trans_mat_a} and L{self.trans_mat_b} 1359 by finding the LU factorizaiton of the matix constructed out of concatanation of the two above. 1360 1361 The result are the matrices: 1362 1. L{self.al1} - the left side of the pseudo LU of self.trans_mat_a 1363 2. L{self.al2} - the left side of the pseudo LU of self.trans_mat_b 1364 3. L{self.au} - the right side of the pseudo LU factorizaiton of both self.trans_mat_a and self.trans_mat_b . 1365 """ 1366 print "Calculating the LU decomposition of the transition matrices... ", 1367 sys.stdout.flush() 1368 #tmp_trans_mat = concatenate( (self.trans_mat_a,self.trans_mat_b[:,:-1]) ) 1369 tmp_trans_mat = sparse.vstack( (self.trans_mat_a,self.trans_mat_b[:,:-1]) ).tocsr() 1370 #sp_tmp_trans_mat = sparse.csc_matrix(tmp_trans_mat) 1371 # keeprows = nonzero(abs(tmp_trans_mat).sum(1))[0] 1372 1373 L, U, P, Q, R, do_recip = umfpack.lu( tmp_trans_mat ) 1374 1375 umfpack.free() 1376 tP=sparse.csc_matrix( ([1]*len(P),(P,range(len(P)))), (len(P),len(P)) ) 1377 tQ=sparse.csc_matrix( ([1]*len(Q),(Q,range(len(Q)))), (len(Q),len(Q)) ) 1378 1379 if do_recip: # LU = PRAQ 1380 #P=P*sparse.spdiags(R,[0],len(R),len(R)) 1381 self.al = sparse.spdiags(1.0/R,[0],len(R),len(R))*tP*L 1382 #self.al = tP*L 1383 else: # LU=P(R^-1)AQ 1384 self.al = sparse.spdiags(R,[0],len(R),len(R))*tP*L 1385 #self.al = tP*L 1386 #: The right side of the pseudo LU factorizaiton of both self.trans_mat_a and self.trans_mat_b . 1387 self.au = U*tQ.T 1388 #self.au = U 1389 #: The left side of the pseudo-LU factorizaiton of self.trans_mat_a 1390 self.al1 = self.al[:len(self.fluxes),:] 1391 #: For easy of computation, self.al1au = self.al1 * self.au 1392 self.al1au = self.al1*self.au 1393 #: The left side of the pseudo-LU factorizaiton of self.trans_mat_b 1394 self.al2 = self.al[len(self.fluxes):,:] 1395 #: For easy of computation, self.al2au = self.al2 * self.au 1396 self.al2au = self.al2*self.au 1397 self.al2_0 = self.trans_mat_b[:,len(self.fluxes)].tocsc() 1398 print " done." 1399 sys.stdout.flush()
1400
1401 - def evaluate(self):
1402 """ 1403 This function runs the main optimization process. 1404 It uses the L{scipy.optimize.fmin_slsqp} for the actual optimization process, and prints out the results in a TABed formatted table. 1405 """ 1406 print "----------------------------------" 1407 print "\nOptimizing network:", self.ProjName, "\n" 1408 print "----------------------------------" 1409 sys.stdout.flush() 1410 find_init_point(self) 1411 iu = self.init_u # input u 1412 start_time = time.time() 1413 print "Starting the optimization process:" 1414 ## r = optimize.fmin_slsqp(func=get_normG, x0=iu, f_eqcons=lambda x,y:self.S.todense()*matrix(x).T, bounds = [(0,10)]*len(self.init_u), fprime=get_normG_grad, fprime_eqcons=lambda x,y:self.S.todense(), iprint=3, args=[self]) 1415 ## r = optimize.fmin_slsqp(func=get_normG, x0=iu, f_eqcons=lambda x,y:self.S.todense()*matrix(x).T, 1416 ## bounds = [(0,10)]*len(self.init_u), fprime=get_normG_grad, 1417 ## fprime_eqcons=lambda x,y:self.S.todense(), iprint=3, args=[self]) 1418 1419 ## r = optimize.fmin_slsqp(func=get_normG, x0=iu, f_eqcons=lambda x,y:self.S.todense()*matrix(x).T - self.S_b, 1420 ## bounds = [(0,10)]*len(self.init_u), fprime=get_normG_grad, 1421 ## fprime_eqcons=lambda x,y:self.S.todense(), iprint=3, args=[self], iter=400, acc=1e-12, epsilon=1e-12 ) 1422 1423 # This is the main optimization process call function. 1424 r = optimize.fmin_slsqp(func=get_normG, x0=iu, f_eqcons=lambda x,y:self.S.todense()*matrix(x).T - self.S_b, 1425 bounds = [(0,10)]*len(self.init_u), fprime=get_normG_grad, 1426 fprime_eqcons=lambda x,y:self.S.todense(), iprint=3, args=[self], iter=400, epsilon=1e-10 ) 1427 1428 1429 ## dres = get_normG_grad(r,self) 1430 self.res = r 1431 #self.dres = dres 1432 end_time=time.time() 1433 print "----------------------------------" 1434 print "Time for convergence: ", end_time-start_time, ' sec' 1435 print "----------------------------------" 1436 print "Results:" 1437 print "Flux name\tFlux value" 1438 # let's sort it... 1439 tmp = self.flux_nw_vec.keys() 1440 tmp.sort() 1441 for a in tmp: 1442 if a[-2:] != '_s': 1443 print a,'\t', r[self.get_metabolic_flux_index(a)]#, dres[self.get_metabolic_flux_index(a)]
1444 1445 #for ind,k in enumerate(self.flux_nw_vec.keys()): 1446 # print k, '\t', r[ind] 1447
1448 -def one_div_vec(vec):
1449 """ 1450 This function returns a vector which is 1/vec. 1451 @param vec: input vector 1452 @type vec: scipy.array 1453 @rtype: scipy.array 1454 @return: 1/vec (element wise division). 1455 """ 1456 return scipy.divide(ones(vec.shape),vec.toarray()) 1457
1458 -def sdiag(a):
1459 """ 1460 Fast sparse diagonal matrix creator. 1461 Simply returns a sparse matrix with a as its diagonal. 1462 @type a: array 1463 @param a: input array 1464 @rtype: scipy.sparse matrix 1465 @return: sparse matrix with a in its diagonal. 1466 """ 1467 return scipy.sparse.spdiags(scipy.matrix(a).T,0,len(a),len(a)) 1468
1469 -def expand_x_values(cteq):
1470 ''' 1471 This function transforms input with (\x00)'s into all the possible sets of inputs with 0's and 1's replacing the (\x00)s. 1472 @type cteq: string 1473 @param cteq: Input string. (\x00)'s will be replaced with 1's and 0's. 1474 @rtype: list 1475 @return: List of possible variations of cteq, with all the possible 0's and 1's replacements for \x00's in the input string. 1476 ''' 1477 if cteq[0] == "*": 1478 in_teq_s = cteq.split("*")[1:] 1479 else: 1480 in_teq_s = cteq.split("*") 1481 1482 f_eq = [] 1483 for teq in in_teq_s: 1484 eq = [] 1485 if teq.find("\x00") != -1: # if we need to change something: 1486 for k in range(2**teq.count("\x00")): 1487 s = "" 1488 curbin = num2bin(k,teq.count("\x00")) 1489 for m in teq: 1490 if m == "\x00": 1491 s = s + curbin[0] 1492 curbin = curbin[1:] 1493 else: 1494 s = s + m 1495 eq.append(s) 1496 else: 1497 eq.append(teq) 1498 f_eq = f_eq + list(eq) 1499 return f_eq
1500
1501 -def create_ms_isotopomers_dict(no_of_atoms, indices_to_change):
1502 """ 1503 This function returns all the possible permutations of no_of_atoms atoms, seperated by the number of 1's in them. 1504 (indices_to_change) specifies which of the atoms can be changed (and the counting is then done only on these atoms). 1505 For example, if we call:: 1506 create_ms_isotopomers_dict(4,[0,1,2]) 1507 we get:: 1508 [['000x'], ['001x', '010x', '100x'], ['011x', '101x', '110x'], ['111x']] 1509 The first element represents all 0's vec (for indices_to_change), 1510 the second a vector with only one 1 element, the third with two 1's elements, and the third with three 1's elements. 1511 1512 This function is used for mass-spectrometry measurements analysis. 1513 1514 @param no_of_atoms: Number of atoms in the counted molecule. 1515 @type no_of_atoms: number 1516 @param indices_to_change: List of indicies on which we want to count. 1517 @type indices_to_change: list 1518 @return: List of lists - all possible permutations for the molecule, sorted by number of ones. 1519 @rtype: list 1520 """ 1521 full_label = no_of_atoms * "x" 1522 label_out = [] 1523 ones_counter = [] 1524 for k in range(2**len(indices_to_change)): 1525 ones_counter.append(bin(k)[2:].count('1')) # how many ones are in the binary representation of k? 1526 1527 for k in range(len(indices_to_change)+1): 1528 tmp = [] 1529 for ind,z in enumerate(ones_counter): 1530 if z==k: 1531 tmp.append(bin(ind)[2:].rjust(len(indices_to_change),'0')) 1532 # tmp containts all the possible isotopomers with k labeled atoms. 1533 tmp2 = [] 1534 1535 for z in tmp: 1536 t = "" 1537 q=0 1538 for ind in range(len(full_label)): 1539 if ind in indices_to_change: 1540 if z[q]=='1': 1541 t+='1' 1542 else: 1543 t+='0' 1544 q=q+1 1545 else: 1546 t+=full_label[ind] 1547 tmp2.append(t) 1548 label_out.append(tmp2) 1549 return label_out
1550
1551 -def num2bin(num,length):
1552 """ 1553 Converts number to binary format with at (length) bits. 1554 @param num: The number to be converted 1555 @type num: number 1556 @param length: Length of the output binary number. 1557 @type length: number 1558 @return: Binary string representing the input number. Zeros are left appended if necessary in order to maintain the required length. 1559 @rtype: string 1560 """ 1561 r = "" 1562 while num > 0: 1563 r = str(num % 2) + r 1564 num = num / 2 1565 return r.rjust(length,'0')
1566
1567 -def save_text_vec(filename, vec):
1568 """ 1569 This fucntion saves the input vector in text TAB seperated format. 1570 Used for MATLAB debugging purposes. 1571 """ 1572 f = open(filename,"w") 1573 for k in vec: f.write(k+'\n') 1574 f.close()
1575
1576 -def matlab_save_sparsed_matrix(filename, mat):
1577 """ 1578 This fucntion saves the input matrix in text TAB seperated format loadable by MATLAB. 1579 Used for MATLAB debugging purposes. 1580 """ 1581 f = open(filename,'w') 1582 for i in mat.iteritems(): 1583 f.write(str(i[0][0]+1)+'\t'+str(i[0][1]+1)+'\t'+str(i[1])+'\n') 1584 if mat[mat.shape[0]-1,mat.shape[1]-1] == 0: 1585 f.write(str(mat.shape[0])+'\t'+str(mat.shape[1])+'\t'+'0'+'\n') 1586 1587 f.close()
1588
1589 -def sort_dict_by_values(d):
1590 """ 1591 This function sorts the input dictionary by its values. 1592 """ 1593 return sorted(d.items(), key=lambda (k,v): (v,k))
1594
1595 -def null(A, eps=1e-15):
1596 """ 1597 This function finds the null space of the incoming matrix A. 1598 """ 1599 u, s, vh = scipy.linalg.svd(A,full_matrices=True) 1600 s = hstack( (matrix(s),zeros((1,A.shape[1]-len(s)))) ) 1601 null_mask = (s <= eps).A1 1602 null_space = scipy.compress(null_mask, vh, axis=0) 1603 return scipy.transpose(array(null_space))
1604
1605 -def find_init_point(self):
1606 """ 1607 This function generates the initial point given for the optimization process. 1608 It is based upon the standard init point finding methods of iterior point algorithms: 1609 In order to find a valid solution for M{S*u = 0}, M{U* u <= s} we solve:: 1610 min(s) 1611 1612 s.t.: 1613 1614 [I,-I]*u + [0*i,10*i] >= s 1615 Su = S_b 1616 We then use this initial point with the propogation equation, resulting with the first point for the algorithm. 1617 """ 1618 print('Generating initial guess...'), 1619 self.len_v = len(self.flux_nw_vec) 1620 self.len_f = len(self.fluxes) 1621 1622 init_u = scipy.linalg.lstsq(self.S.todense(),self.S_b)[0] 1623 1624 init_s = abs(min(init_u)) 1625 1626 up_bound = 10 1627 1628 ineq_mat = vstack( (identity(self.len_v),-identity(self.len_v)) ) 1629 ineq_mat = hstack( (ineq_mat, 1*ones( (ineq_mat.shape[0],1) ) ) ) 1630 ineq_vec = vstack( (zeros( (self.len_v,1)),up_bound * ones( (self.len_v,1) ) ) ) 1631 1632 S_with_x = hstack( (self.S.todense(),zeros( (self.S.shape[0],1) )) ) 1633 1634 # min(s) 1635 # 1636 # s.t.: 1637 # 1638 # [I,-I]*u + [0*i,10*i] >= s 1639 # Su = S_b 1640 1641 f_prime = vstack( (zeros( (self.len_v,1) ),ones( (1,1) )) ) 1642 init_x = vstack( (init_u, init_s) ) 1643 def func(x): 1644 a = self.flux_meas_mat*(sdiag(1/self.U.sum(1)).todense()*self.U).T*matrix(x[:-1]).T 1645 return a.T*a - 2*a.T*self.flux_meas_values + x[-1]
1646 def func_prime(x): 1647 a = (self.flux_meas_mat*(sdiag(1/self.U.sum(1)).todense()*self.U).T) 1648 return hstack( (2*matrix(x[:-1])*(a.T*a) -2*self.flux_meas_values.T*a,ones((1,1))) ).tolist()[0] 1649 1650 r = optimize.fmin_slsqp(func=func, #lambda x:x[self.len_v], 1651 x0=init_x, 1652 fprime = func_prime, #lambda x:f_prime, 1653 f_ieqcons = lambda x: (ineq_mat*matrix(x).T + ineq_vec).tolist(), 1654 fprime_ieqcons = lambda x: ineq_mat, 1655 f_eqcons = lambda x: (S_with_x*matrix(x).T - self.S_b).tolist(), 1656 fprime_eqcons = lambda x: S_with_x, 1657 iprint=0) 1658 1659 init_u = matrix(r[:-1]).T 1660 1661 self.init_u=init_u 1662 1663 u=self.init_u 1664 1665 u_1 = vstack((u,1)) 1666 1667 t = (self.beta_vec_M*u_1) / (self.beta_vec_N*u) 1668 t[-len(u):] = 1/u 1669 t_1 = vstack((t,1)) 1670 uz = nonzero(u==0) 1671 t_1[-len(u)-1+uz[0]]=0 1672 1673 1674 tmp_dtm = 1/(self.beta_vec_N*u) 1675 d_tm = sparse.spdiags(tmp_dtm.A1,0,len(t),len(t))*self.beta_vec_M 1676 1677 tmp_dtn = -(self.beta_vec_M*u_1)/((self.beta_vec_N*u).A1**2) 1678 d_tn = sparse.spdiags(tmp_dtn.A1,0,len(t),len(t))*self.beta_vec_N 1679 1680 z = multiply((self.trans_mat_a_gamma*t_1), multiply((self.trans_mat_a_beta*t_1), (self.trans_mat_b_beta*t_1)) ) 1681 1682 # Generate init vector: 1683 v = zeros( (self.len_f,1) ) 1684 for k in range(self.len_v): 1685 #v[self.U.getrow(k).indices[-1]]=u[k] 1686 v[self.U[k,:].indices[-1]]=u[k] 1687 1688 meas_flux_values = self.labelinp_denom*v 1689 v = zeros( (self.len_f,1) ) 1690 for k in range(shape(self.labelinp_num)[0]): 1691 tmpind = nonzero(self.labelinp_num[k,:].toarray()[0] > 0) 1692 if len(tmpind)>1: 1693 assert('Incorrect flux measurement.') 1694 v[(tmpind[0])] = self.labelinp_value[k,0]*meas_flux_values[k,0] 1695 # upt/00000 = 3962 1696 1697 v=matrix(v) 1698 self.orr_v = v 1699 #print self.U*v 1700 last_v=v 1701 1702 tmp1 = self.al1*self.au 1703 tmp2 = self.al2*self.au 1704 1705 ttmp1 = sparse.spdiags(z.T,[0],len(z),len(z))*self.al1 1706 ttmp2 = self.al2 1707 1708 for k in range(500): 1709 v = multiply(z, multiply( matrix(tmp1*v), (self.al2_0+tmp2*v ))) 1710 if (k==0): # Let's see which rows are constant (input fluxes which are not set) 1711 ax = ttmp1*v 1712 bx = self.al2_0+ttmp2*v 1713 1714 J = self.au*(sdiag(bx)*ttmp1 + sdiag(ax)*ttmp2) - sparse.identity(len(v)) 1715 self.keeprows = nonzero(abs(J).sum(1))[0] 1716 q = self.init_u / (self.U*v) 1717 #### print q 1718 q[ nonzero(self.U*v==0)]=0 1719 v = multiply(v,self.U.T*q) 1720 if scipy.linalg.norm(v-last_v) < 1e-14: 1721 break 1722 ## print scipy.linalg.norm(v-last_v) 1723 last_v=v 1724 print scipy.linalg.norm(self.U*v - self.init_u) 1725 self.sv=v 1726 self.init_v = v 1727 1728 print('done.') 1729
1730 -def get_normG(u, self):
1731 ''' 1732 Main MFA optimization objective function value. 1733 This function calculates || self.tG * x - self.tB || for a given u vector 1734 by finding the valid x vector and then substituting it in the objective function. 1735 ''' 1736 # tic() 1737 ## print "****** Starting get_normG ******" 1738 ## print u 1739 if len(shape(u))==1: 1740 u=matrix(u).T 1741 self.lastu = u 1742 u_1 = vstack((u,1)) #[u;1] 1743 t = (self.beta_vec_M*u_1) / (self.beta_vec_N*u) 1744 1745 t[-len(u):] = 1/u 1746 t_1 = vstack((t,1)) #[t;1] 1747 1748 # if one the the u's is zero we need to zero the 1/u element in t as well. 1749 t_1 = nan_to_num(t_1) 1750 1751 ## t_1[-len(u)-1+uz[0]]=0 1752 1753 #print t_1 1754 # Calc g(u) (here it is called z): 1755 z = multiply((self.trans_mat_a_gamma*t_1), multiply((self.trans_mat_a_beta*t_1), (self.trans_mat_b_beta*t_1)) ) 1756 1757 # Generate init vector: 1758 q = u/(self.U*self.sv) 1759 ## q[uz[0]] = 0 1760 q = nan_to_num(q) 1761 v = self.au*(multiply(self.sv,(self.U.T*q))) 1762 1763 last_v = v 1764 tmp1 = sparse.spdiags(z.T,[0],len(z),len(z))*self.al1 1765 tmp2 = self.al2 1766 1767 keeprows = self.keeprows 1768 1769 for k in range(400): 1770 1771 ax = tmp1*v 1772 bx = self.al2_0+tmp2*v 1773 nv = multiply(ax,bx) 1774 ## q = u / (self.U*nv) 1775 ## q[ nonzero(self.U*nv==0)]=0 1776 ## nv = multiply(nv,self.U.T*q) 1777 p=scipy.linalg.norm(self.au*nv-self.au*last_v) 1778 last_v=nv 1779 nv=self.au*nv 1780 if (abs(p) > 1e-4 ) or (k==0): # let's see if we need to use Netwon's step 1781 f = nv - v 1782 ttmp1=tmp1.copy() 1783 ttmp2=tmp2.copy() 1784 sparse.sparsetools.csr_scale_rows(len(v),len(v),ttmp1.indptr,ttmp1.indices,ttmp1.data,array(bx.T)[0]) 1785 sparse.sparsetools.csr_scale_rows(len(v),len(v),ttmp2.indptr,ttmp2.indices,ttmp2.data,array(ax.T)[0]) 1786 J = self.au*(ttmp1 + ttmp2) - sparse.identity(len(v)) 1787 #toc() 1788 ## keeprows = nonzero(power(J,2).sum(1))[0] 1789 #keeprows = nonzero(J.sum(1))[0] 1790 #keeprows = nonzero(abs(J).sum(1))[0] 1791 1792 r = sparse.linalg.dsolve.spsolve(J[keeprows,keeprows],f[keeprows]) 1793 1794 v[keeprows,0]-=r.T 1795 else: 1796 v=nv 1797 1798 if abs(p) < 1e-7: 1799 break 1800 ## print "normG:",k,p 1801 # print scipy.linalg.norm(v-last_v) 1802 1803 tv=multiply(ax,bx) 1804 val = scipy.linalg.norm(self.tG*tv-self.tB) 1805 self.tv = tv 1806 self.tv_val = val 1807 1808 ## print "****** Exiting get_normG ******" 1809 return val
1810 1811 #def get_normG_grad(self, u, gen_gradient=True):
1812 -def get_normG_grad(u, self):
1813 ''' 1814 Main MFA optimization objective function gradient calculation. 1815 This function calculates d (|| self.tG * x - self.tB ||) / du for a given u vector. 1816 ''' 1817 ## print "****** Starting get_normG_grad ******" 1818 if len(shape(u))==1: 1819 u=matrix(u).T 1820 if any(self.lastu != u): 1821 get_normG(u,self) 1822 1823 u_1 = vstack((u,1)) #[u;1] 1824 t = (self.beta_vec_M*u_1) / (self.beta_vec_N*u) 1825 t[-len(u):] = 1/u 1826 t_1 = vstack((t,1)) #[t;1] 1827 1828 # if one the the u's is zero we need to zero the 1/u element in t as well. 1829 uz = nonzero(u<=1e-7) 1830 t_1[-len(u)-1+uz[0]]=0 1831 t[-len(u)+uz[0]]=0 1832 1833 # Calc beta_vec_M derivative 1834 tmp_dtm = 1/(self.beta_vec_N*u) 1835 d_tm = sparse.spdiags(tmp_dtm.T,0,len(t),len(t))*self.beta_vec_M 1836 1837 # Calc beta_vec_N derivative 1838 tmp_dtn = -(self.beta_vec_M*u_1)/power((self.beta_vec_N*u),2) 1839 d_tn = sparse.spdiags(tmp_dtn.T,0,len(t),len(t))*self.beta_vec_N 1840 1841 # Calc g(u) (here it is called z): 1842 z = multiply((self.trans_mat_a_gamma*t_1), multiply((self.trans_mat_a_beta*t_1), (self.trans_mat_b_beta*t_1)) ) 1843 1844 # Generate init vector: 1845 q = u/(self.U*self.sv) 1846 q[uz[0]]=0 1847 #v = self.au*(multiply(self.sv,(self.U.T*q))) 1848 v = self.tv 1849 tv=v 1850 ## z = self.tv_z 1851 ## t_1 = self.tv_t_1 1852 ## d_tm = self.tv_d_tm 1853 ## d_tn = self.tv_d_tn 1854 1855 ax = self.al1au*tv 1856 bx = self.al2au*tv + self.al2_0 1857 #ax = self.al1*self.au*tv 1858 #bx = self.al2*self.Gau*tv + self.al2_0 1859 #tmpones=ones(1,len(v)) 1860 for k in range(self.labelinp_value.shape[0]): 1861 tmpind = nonzero(self.labelinp_num[0,:])[1] 1862 if len(tmpind) > 1: 1863 assert('Multi-fluxomers label input data are not supported yet.') 1864 ax[tmpind] = 0 1865 bx[tmpind] = 0 1866 1867 1868 #self.tau = tau 1869 #J = self.au*sparse.spdiags(tz.T,[0],len(z),len(z))*( sparse.spdiags(bx.T,[0],len(v),len(v))*self.al1 + sparse.spdiags(ax.T,[0],len(v),len(v))*self.al2) - sparse.eye(len(v),len(v)) 1870 1871 ttmp1=self.al1.copy() 1872 ttmp2=self.al2.copy() 1873 sparse.sparsetools.csr_scale_rows(len(v),len(v),ttmp1.indptr,ttmp1.indices,ttmp1.data,array(bx.T)[0]) 1874 sparse.sparsetools.csr_scale_rows(len(v),len(v),ttmp1.indptr,ttmp1.indices,ttmp1.data,array(z.T)[0]) 1875 sparse.sparsetools.csr_scale_rows(len(v),len(v),ttmp2.indptr,ttmp2.indices,ttmp2.data,array(ax.T)[0]) 1876 sparse.sparsetools.csr_scale_rows(len(v),len(v),ttmp2.indptr,ttmp2.indices,ttmp2.data,array(z.T)[0]) 1877 J = self.au*(ttmp1 + ttmp2) - sparse.identity(len(v)) 1878 1879 ## J = self.au*sparse.spdiags(z.T,[0],len(z),len(z))*( sparse.spdiags(bx.T,[0],len(v),len(v))*self.al1 + \ 1880 ## sparse.spdiags(ax.T,[0],len(v),len(v))*self.al2) - \ 1881 ## sparse.identity(len(v)) 1882 1883 1884 #z = multiply((self.trans_mat_a_gamma*t_1), multiply((self.trans_mat_a_beta*t_1), (self.trans_mat_b_beta*t_1)) ) 1885 tmp_dz = (sparse.spdiags(multiply((self.trans_mat_a_beta*t_1), (self.trans_mat_b_beta*t_1)).T,[0],len(v),len(v))*self.trans_mat_a_gamma[:,:-1]*sparse.spdiags(-power(t,2).T,[0],len(t),len(t)))[:,-len(u):] 1886 d_tm=d_tm[:,:-1] 1887 dz = sparse.spdiags(multiply((self.trans_mat_a_gamma*t_1), (self.trans_mat_b_beta*t_1)).T,[0],len(v),len(v)) * self.trans_mat_a_beta[:,:-1]*(d_tm + d_tn) + sparse.spdiags(multiply((self.trans_mat_a_gamma*t_1), (self.trans_mat_a_beta*t_1)).T,[0],len(v),len(v)) * self.trans_mat_b_beta[:,:-1]*(d_tm + d_tn) + tmp_dz 1888 #P = (sparse.spdiags(multiply(ax,bx).T,[0],len(v),len(v))*dz).todense() 1889 1890 P = (sparse.spdiags(multiply(ax,bx).T,[0],len(v),len(v))*dz).todense() 1891 1892 for k in range(self.labelinp_value.shape[0]): 1893 tmpind = nonzero(self.labelinp_num[k,:])[1] 1894 if len(tmpind) > 1: 1895 assert('Multi-fluxomers label input data are not supported yet.') 1896 1897 #tmpind2 = nonzero(self.U*(self.labelinp_denom[k,:]).T)[0] 1898 tmpind2=self.labelinp_num_fluxes[k] 1899 P[tmpind[0],tmpind2] = self.labelinp_value[k,0] 1900 1901 asd = P 1902 P = self.au*P 1903 keeprows = nonzero(abs(J).sum(1))[0] 1904 #keeprows = nonzero(J.sum(1))[0] 1905 J = J[keeprows,keeprows] 1906 #P = P[keeprows,:] 1907 1908 ## tL, tU, tP, tQ, tR, do_recip = umfpack.lu( J ) 1909 ## umfpack.free() 1910 ## tP=sparse.csc_matrix( ([1]*len(tP),(tP,range(len(tP)))), (len(tP),len(tP)) ) 1911 ## tQ=sparse.csc_matrix( ([1]*len(tQ),(tQ,range(len(tQ)))), (len(tQ),len(tQ)) ) 1912 ## 1913 ## if do_recip: # LU = PRAQ 1914 ## L = sparse.spdiags(1.0/tR,[0],len(tR),len(tR))*tP*tL 1915 ## else: # LU=P(R^-1)AQ 1916 ## L = sparse.spdiags(tR,[0],len(tR),len(tR))*tP*tL 1917 ## U = tU*tQ.T 1918 1919 tJ = sparse.linalg.dsolve.factorized(J) 1920 #tJ = sparse.linalg.dsolve.splu(J) 1921 #b = P[:,g].T.toarray()[0] 1922 d_xu = zeros( (shape(P)) ) 1923 1924 for g in range(shape(P)[1]): 1925 ## tr = sparse.linalg.dsolve.spsolve(L,P[:,g]).T 1926 ## tr2 = sparse.linalg.dsolve.spsolve(U,tr).T 1927 #tr = sparse.linalg.dsolve.spsolve(J,P[:,g]).T 1928 #tr = tJ( -array(P[keeprows.T,g]).T[0] ) 1929 tr = tJ( - array(P[keeprows.T,g]).T[0] ) 1930 d_xu[keeprows,g] = tr 1931 1932 d_xu = asd + (sparse.spdiags(bx.T,[0],len(v),len(v))*sparse.spdiags(z.T,[0],len(v),len(v))*self.al1*d_xu + sparse.spdiags(ax.T,[0],len(v),len(v))*sparse.spdiags(z.T,[0],len(v),len(v))*self.al2*d_xu) 1933 ## print (self.tG*tv - self.tB)[-13] 1934 dval = 0.5*(self.tv_val)**(-1)*(2*tv.T*self.tG.T*self.tG*d_xu - 2*matrix(self.tB).T*self.tG*d_xu) 1935 ## print max(dval) 1936 if any(max(dval)>1e4): 1937 dval=dval/scipy.linalg.norm(dval)*scipy.linalg.norm(v) 1938 ## print max(real(dval.A[0])), min(real(dval.A[0])) 1939 ## print real(dval.A[0])[23] 1940 ## print "****** Exiting normG_grad ******" 1941 return (real(dval.A[0]))
1942 1943
1944 -def load_data(fname):
1945 # global ftbl_file 1946 if debug_level > 1: 1947 print "Loading data..." 1948 f = open(fname) 1949 ftbl_file=pickle.load(f) 1950 if debug_level > 1: 1951 print " done." 1952 return ftbl_file
1953
1954 -def tic():
1955 global thetime 1956 thetime = time.time()
1957 -def toc():
1958 global thetime 1959 y=time.time() 1960 print y-thetime 1961 thetime = y
1962 1963 1964 import sys 1965
1966 -def main(argv):
1967 global debug_level 1968 global Evaluate_Only 1969 # parse command line options 1970 try: 1971 opts, args = getopt.getopt(argv, "hi:t:o:", ["help","fia=","ftbl=","output_file="]) 1972 except getopt.error, msg: 1973 print msg 1974 print "for help use --help" 1975 sys.exit(2) 1976 1977 Analyze_Only = False 1978 Evaluate_Only = False 1979 i_or_t = False 1980 output_file = "" 1981 # process options 1982 for o, a in opts: 1983 if o in ("-h", "--help"): 1984 print __doc__ 1985 sys.exit(0) 1986 1987 if o in ("-i", "--fia"): 1988 ProjName = a 1989 inpfiletype = 'fia' 1990 i_or_t = True 1991 1992 if o in ("-t", "--ftbl"): 1993 ProjName = a 1994 inpfiletype = 'ftbl' 1995 i_or_t = True 1996 1997 if o in ("-o", "--output_file"): 1998 output_file = a 1999 2000 if o in ("-a", "--analyze_only"): 2001 Analyze_Only = True 2002 2003 if o in ("-e", "--evaluate_only"): 2004 Evaluate_Only = True 2005 2006 if not i_or_t: 2007 print __doc__ 2008 sys.exit(2) 2009 2010 # process arguments 2011 for arg in args: 2012 print __doc__ 2013 sys.exit(2) 2014 2015 print "\n FIA - Fluxomers Iterative Algorithm \n" 2016 2017 if output_file == "": 2018 output_file = ProjName + ".out" 2019 2020 writer = MyWriter(sys.stdout, output_file) 2021 sys.stdout = writer 2022 2023 ProjPathName = ProjName + "/" # Used only for Matlab debug purposes 2024 debug_level = 2 2025 print time.asctime() 2026 print "Analyzing network:", ProjName, "\n" 2027 print "Saving output in:", output_file, "\n" 2028 sys.stdout.flush() 2029 gen_trans_mat = True 2030 2031 ftbl_file = FtblFile(ProjName, inpfiletype) 2032 2033 #print "*************************************" 2034 print "\nAll done.\n" 2035 sys.stdout.flush()
2036 2037 ## 2038 ##debug_level=2 2039 ####ProjName="examples/ecoli.ftbl" 2040 ##ProjName="examples/ex0.ftbl" 2041 ###ProjName="tmp.ftbl" 2042 ##Evaluate_Only = False 2043 ##inpfiletype="ftbl" 2044 ##ftbl_file = FtblFile(ProjName, inpfiletype) 2045 ## 2046 2047 2048 if __name__ == '__main__': 2049 main(sys.argv[1:]) 2050