mpp.cpp
1 /*
2 Copyright (c) 2015-2019, Florian Sittel (www.lettis.net) and Daniel Nagel
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice, this
9 list of conditions and the following disclaimer.
10 
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #include <fstream>
28 #include <iomanip>
29 
30 #include "tools.hpp"
31 #include "mpp.hpp"
32 #include "logger.hpp"
33 
34 namespace Clustering {
35  namespace MPP {
36 
38  read_transition_probabilities(std::string fname) {
39  std::vector<unsigned int> i;
40  std::vector<unsigned int> j;
41  std::vector<float> k;
42  std::ifstream fh(fname);
43  // read raw data
44  if (fh.is_open()) {
45  float i_buf;
46  float j_buf;
47  float k_buf;
48  while (fh.good()) {
49  fh >> i_buf;
50  fh >> j_buf;
51  fh >> k_buf;
52  if (fh.good()) {
53  i.push_back(i_buf);
54  j.push_back(j_buf);
55  k.push_back(k_buf);
56  }
57  }
58  } else {
59  std::cerr << "error: cannot open file "
60  << fname
61  << " for reading transition matrix."
62  << std::endl;
63  exit(EXIT_FAILURE);
64  }
65  // convert to matrix
66  unsigned int max_state = std::max((*std::max_element(i.begin()
67  , i.end()))
68  , (*std::max_element(j.begin()
69  , j.end())));
70  SparseMatrixF trans_prob(max_state+1, max_state+1);
71  for (unsigned int n=0; n < i.size(); ++n) {
72  trans_prob(i[n], j[n]) = k[n];
73  }
74  return trans_prob;
75  }
76 
78  transition_counts(std::vector<std::size_t> trajectory,
79  std::vector<std::size_t> concat_limits,
80  std::size_t n_lag_steps,
81  std::size_t i_max) {
82  if (n_lag_steps == 0) {
83  std::cerr << "error: lagtime of 0 does not make any sense for"
84  << " MPP clustering" << std::endl;
85  exit(EXIT_FAILURE);
86  }
87  std::vector<std::size_t>::iterator next_limit = concat_limits.begin();
88  if (i_max == 0) {
89  i_max = (*std::max_element(trajectory.begin()
90  , trajectory.end()));
91  }
92  SparseMatrixF count_matrix(i_max+1, i_max+1);
93  for (std::size_t i=0; i < trajectory.size() - n_lag_steps; ++i) {
94  std::size_t from = trajectory[i];
95  std::size_t to = trajectory[i+n_lag_steps];
96  if (next_limit != concat_limits.end()) {
97  // check for sub-trajectory limits
98  if (i+n_lag_steps < (*next_limit)) {
99  count_matrix(from, to) += 1;
100  } else if (i+1 == (*next_limit)) {
101  ++next_limit;
102  }
103  } else {
104  // either last sub-trajectory or everything is
105  // a single, continuous trajectory
106  count_matrix(from, to) += 1;
107  }
108  }
109  return count_matrix;
110  }
111 
113  weighted_transition_counts(std::vector<std::size_t> trajectory
114  , std::vector<std::size_t> concat_limits
115  , std::size_t n_lag_steps) {
116  // get max index (max. matrix size == max index+1)
117  std::size_t i_max = (*std::max_element(trajectory.begin()
118  , trajectory.end()));
119  SparseMatrixF weighted_counts(i_max+1, i_max+1);
120  std::vector<float> acc_weights(i_max+1);
121  std::size_t lower_lim = 0;
122  for (std::size_t i_chunk=0; i_chunk < concat_limits.size(); ++i_chunk) {
123  // compute count matrix per chunk
124  std::size_t upper_lim = lower_lim + concat_limits[i_chunk];
125  std::vector<std::size_t> chunk = std::vector<std::size_t>(
126  concat_limits.begin()+lower_lim
127  , concat_limits.begin()+upper_lim);
128  SparseMatrixF counts = transition_counts(chunk
129  , {}
130  , n_lag_steps
131  , i_max);
132  // compute weights for this chunk
133  std::vector<float> weights(i_max+1);
134  for (std::size_t i=0; i < i_max+1; ++i) {
135  for (std::size_t j=0; j < i_max+1; ++j) {
136  weights[i] += counts(i,j);
137  }
138  weights[i] = sqrt(weights[i]);
139  acc_weights[i] += weights[i];
140  }
141  // add weighted counts to end result
142  for (std::size_t i=0; i < i_max+1; ++i) {
143  for (std::size_t j=0; j < i_max+1; ++j) {
144  weighted_counts(i,j) += weights[i]*counts(i,j);
145  }
146  }
147  lower_lim = upper_lim;
148  }
149  // re-weight end result
150  for (std::size_t i=0; i < i_max+1; ++i) {
151  for (std::size_t j=0; j < i_max+1; ++j) {
152  weighted_counts(i,j) /= acc_weights[i];
153  }
154  }
155  return weighted_counts;
156  }
157 
160  , std::set<std::size_t> cluster_names) {
161  std::size_t n_rows = count_matrix.size1();
162  std::size_t n_cols = count_matrix.size2();
163  SparseMatrixF transition_matrix(n_rows, n_cols);
164  for (std::size_t i: cluster_names) {
165  std::size_t row_sum = 0;
166  for (std::size_t j=0; j < n_cols; ++j) {
167  row_sum += count_matrix(i,j);
168  }
169  if (row_sum > 0) {
170  for (std::size_t j=0; j < n_cols; ++j) {
171  if (count_matrix(i,j) != 0) {
172  transition_matrix(i,j) = count_matrix(i,j) / row_sum;
173  }
174  }
175  }
176  }
177  return transition_matrix;
178  }
179 
182  , std::map<std::size_t, std::size_t> sinks
183  , std::map<std::size_t, std::size_t> pops) {
184  std::size_t n_rows = transition_matrix.size1();
185  std::size_t n_cols = transition_matrix.size2();
186  SparseMatrixF updated_matrix(n_rows
187  , n_cols);
188  // macrostates == states left after lumping
189  std::set<std::size_t> macrostates;
190  // microstates == states before lumping
191  // (with map-key being the name of the macrostate they are lumped into)
192  std::map<std::size_t, std::set<std::size_t>> microstates;
193  for (auto lump_from_to: sinks) {
194  macrostates.insert(lump_from_to.second);
195  if (microstates.count(lump_from_to.second) == 0) {
196  microstates[lump_from_to.second] = {lump_from_to.first};
197  } else {
198  microstates[lump_from_to.second].insert(lump_from_to.first);
199  }
200  }
201  // compute relative populations of microstates inside their macrostate
202  std::map<std::size_t, float> relative_pops;
203  for (auto macro1: macrostates) {
204  std::size_t pop_total = 0;
205  for (auto micro1: microstates[macro1]) {
206  pop_total += pops[micro1];
207  }
208  for (auto micro1: microstates[macro1]) {
209  relative_pops[micro1] = (float) pops[micro1] / (float) pop_total;
210  }
211  }
212  // construct new transition matrix by summing over all transition
213  // probabilities from one macrostate to another macrostate
214  for (auto macro1: macrostates) {
215  float macro_row_sum = 0.0f;
216  for (auto macro2: macrostates) {
217  for (auto micro1: microstates[macro1]) {
218  for (auto micro2: microstates[macro2]) {
219  updated_matrix(macro1, macro2) += relative_pops[micro1]
220  * transition_matrix(micro1, micro2);
221  }
222  }
223  macro_row_sum += updated_matrix(macro1, macro2);
224  }
225  // renormalize row
226  for (auto macro2: macrostates) {
227  updated_matrix(macro1, macro2) /= macro_row_sum;
228  }
229  }
230  return updated_matrix;
231  }
232 
233  std::map<std::size_t, std::size_t>
235  std::set<std::size_t> cluster_names,
236  float q_min,
237  std::map<std::size_t, float> min_free_energy) {
238  std::map<std::size_t, std::size_t> future_state;
239  for (std::size_t i: cluster_names) {
240  std::vector<std::size_t> candidates;
241  float max_trans_prob = 0.0f;
242  if (transition_matrix(i,i) >= q_min) {
243  // self-transition is greater than stability measure: stay.
244  candidates = {i};
245  } else {
246  for (std::size_t j: cluster_names) {
247  // self-transition lower than q_min:
248  // choose other state as immidiate future
249  // (even if it has lower probability than self-transition)
250  if (i != j) {
251  if (transition_matrix(i,j) > max_trans_prob) {
252  max_trans_prob = transition_matrix(i,j);
253  candidates = {j};
254  } else if (transition_matrix(i,j) == max_trans_prob
255  && max_trans_prob > 0.0f) {
256  candidates.push_back(j);
257  }
258  }
259  }
260  }
261  if (candidates.size() == 0) {
262  std::cerr << "error: state '"
263  << i
264  << "' has self-transition probability of "
265  << transition_matrix(i,i)
266  << " at Qmin "
267  << q_min
268  << " and does not find any transition candidates."
269  << " please have a look at your trajectory!"
270  << std::endl;
271  exit(EXIT_FAILURE);
272  } else if (candidates.size() == 1) {
273  future_state[i] = candidates[0];
274  } else {
275  // multiple candidates: choose the one with lowest Free Energy
276  auto min_fe_compare = [&](std::size_t i, std::size_t j) {
277  return min_free_energy[i] < min_free_energy[j];
278  };
279  future_state[i] = (*std::min_element(candidates.begin()
280  , candidates.end()
281  , min_fe_compare));
282  }
283  }
284  return future_state;
285  }
286 
287  std::map<std::size_t, std::vector<std::size_t>>
288  most_probable_path(std::map<std::size_t, std::size_t> future_state,
289  std::set<std::size_t> cluster_names) {
290  std::map<std::size_t, std::vector<std::size_t>> mpp;
291  for (std::size_t i: cluster_names) {
292  std::vector<std::size_t> path = {i};
293  std::set<std::size_t> visited = {i};
294  std::size_t next_state = future_state[i];
295  // abort when path 'closes' in a loop, i.e.
296  // when a state has been revisited
297  while (visited.count(next_state) == 0) {
298  path.push_back(next_state);
299  visited.insert(next_state);
300  next_state = future_state[next_state];
301  }
302  mpp[i] = path;
303  }
304  return mpp;
305  }
306 
307  std::map<std::size_t, std::size_t>
308  microstate_populations(std::vector<std::size_t> clusters,
309  std::set<std::size_t> cluster_names) {
310  std::map<std::size_t, std::size_t> pops;
311  for (std::size_t i: cluster_names) {
312  pops[i] = std::count(clusters.begin(), clusters.end(), i);
313  }
314  return pops;
315  }
316 
317  // assign every state the lowest free energy value
318  // of all of its frames.
319  std::map<std::size_t, float>
320  microstate_min_free_energy(const std::vector<std::size_t>& clustering,
321  const std::vector<float>& free_energy) {
322  std::map<std::size_t, float> min_fe;
323  for (std::size_t i=0; i < clustering.size(); ++i) {
324  std::size_t state = clustering[i];
325  if (min_fe.count(state) == 0) {
326  min_fe[state] = free_energy[i];
327  } else {
328  if (free_energy[i] < min_fe[state]) {
329  min_fe[state] = free_energy[i];
330  }
331  }
332  }
333  return min_fe;
334  }
335 
336  std::map<std::size_t, std::size_t>
337  path_sinks(std::vector<std::size_t> clusters,
338  std::map<std::size_t, std::vector<std::size_t>> mpp,
339  SparseMatrixF transition_matrix,
340  std::set<std::size_t> cluster_names,
341  float q_min,
342  std::vector<float> free_energy) {
343  std::map<std::size_t, std::size_t> pops;
344  pops = microstate_populations(clusters, cluster_names);
345  std::map<std::size_t, float> min_free_energy;
346  min_free_energy = microstate_min_free_energy(clusters, free_energy);
347  std::map<std::size_t, std::size_t> sinks;
348  for (std::size_t i: cluster_names) {
349  std::vector<std::size_t> metastable_states;
350  for (std::size_t j: mpp[i]) {
351  // check: are there stable states?
352  if (transition_matrix(j,j) > q_min) {
353  metastable_states.push_back(j);
354  }
355  }
356  if (metastable_states.size() == 0) {
357  // no stable state: treat all states in path as 'metastable'
358  metastable_states = mpp[i];
359  }
360  // helper function: compare states by their population
361  auto pop_compare = [&](std::size_t i, std::size_t j) -> bool {
362  return pops[i] < pops[j];
363  };
364  // helper function: compare states by their min. Free Energy
365  auto fe_compare = [&](std::size_t i, std::size_t j) -> bool {
366  return min_free_energy[i] < min_free_energy[j];
367  };
368  // find sink candidate state from lowest free energy
369  auto candidate = std::min_element(metastable_states.begin()
370  , metastable_states.end()
371  , fe_compare);
372  float min_fe = free_energy[*candidate];
373  std::set<std::size_t> sink_candidates;
374  while (candidate != metastable_states.end()
375  && free_energy[*candidate] == min_fe) {
376  // there may be several states with same (min.) free energy,
377  // collect them all into one set
378  sink_candidates.insert(*candidate);
379  metastable_states.erase(candidate);
380  candidate = std::min_element(metastable_states.begin()
381  , metastable_states.end()
382  , fe_compare);
383  }
384  // select sink by lowest free energy
385  if (sink_candidates.size() == 1) {
386  sinks[i] = (*sink_candidates.begin());
387  } else {
388  // or highest population, if equal
389  sinks[i] = (*std::max_element(sink_candidates.begin()
390  , sink_candidates.end()
391  , pop_compare));
392  }
393  }
394  return sinks;
395  }
396 
397  // lump states based on path sinks and return new trajectory.
398  // new microstates will have IDs of sinks.
399  std::vector<std::size_t>
400  lumped_trajectory(std::vector<std::size_t> trajectory,
401  std::map<std::size_t, std::size_t> sinks) {
402  for (std::size_t& state: trajectory) {
403  state = sinks[state];
404  }
405  return trajectory;
406  }
407 
408  // run clustering for given Q_min value
409  // returns: {new traj, lumping info, updated transition matrix}
410  std::tuple<std::vector<std::size_t>
411  , std::map<std::size_t, std::size_t>
412  , SparseMatrixF>
413  fixed_metastability_clustering(std::vector<std::size_t> initial_trajectory,
414  SparseMatrixF trans_prob,
415  float q_min,
416  std::vector<float> free_energy) {
417  std::set<std::size_t> microstate_names;
418  std::vector<std::size_t> traj = initial_trajectory;
419  std::map<std::size_t, std::size_t> lumping;
420  const uint MAX_ITER=100;
421  uint iter;
422  for (iter=0; iter < MAX_ITER; ++iter) {
423  // reset names in case of vanished states (due to lumping)
424  microstate_names = std::set<std::size_t>(traj.begin(), traj.end());
425  if (microstate_names.count(0)) {
426  std::cerr << "\nwarning:\n"
427  << " there is a state '0' in your trajectory.\n"
428  << " are you sure you generated a proper"
429  << " trajectory of microstates\n"
430  << " (e.g. by running a final, seeded"
431  << " density-clustering to fill up the FEL)?\n"
432  << std::endl;
433  }
434  logger(std::cout) << " " << std::setw(3)
435  << iter+1
436  << " " << std::setw(6)
437  << Clustering::Tools::stringprintf("%0.3f", q_min)
438  << std::endl;
439  // get immediate future
440  std::map<std::size_t, std::size_t> future_state;
441  future_state = single_step_future_state(trans_prob
442  , microstate_names
443  , q_min
445  traj
446  , free_energy));
447  // compute MPP
448  std::map<std::size_t, std::vector<std::size_t>> mpp;
449  mpp = most_probable_path(future_state, microstate_names);
450  // compute sinks (i.e. states with lowest Free Energy per path)
451  std::map<std::size_t, std::size_t> sinks = path_sinks(traj
452  , mpp
453  , trans_prob
454  , microstate_names
455  , q_min
456  , free_energy);
457  // update transition matrix
458  trans_prob = updated_transition_probabilities(trans_prob
459  , sinks
461  // lump trajectory into sinks
462  std::vector<std::size_t> traj_old = traj;
463  traj = lumped_trajectory(traj, sinks);
464  for (auto from_to: sinks) {
465  std::size_t from = from_to.first;
466  std::size_t to = from_to.second;
467  if (from != to) {
468  lumping[from] = to;
469  }
470  }
471  // check convergence
472  if (traj_old == traj) {
473  break;
474  }
475  }
476  if (iter == MAX_ITER) {
477  throw std::runtime_error(Clustering::Tools::stringprintf(
478  "reached max. no. of iterations"
479  " for Q_min convergence: %d"
480  , iter));
481  } else {
482  return std::make_tuple(traj, lumping, trans_prob);
483  }
484  }
485 
486  void
487  main(boost::program_options::variables_map args) {
488  // import IO functions
495  // load initial trajectory, free energies, etc
496  std::string basename = args["output"].as<std::string>();
497  std::map<std::size_t, std::pair<std::size_t, float>> transitions;
498  std::map<std::size_t, std::size_t> max_pop;
499  std::map<std::size_t, float> max_qmin;
500  Clustering::logger(std::cout) << "~~~ reading files\n"
501  << " trajectory from: " << args["states"].as<std::string>()
502  << std::endl;
503  std::vector<std::size_t> traj;
504  traj = read_clustered_trajectory(args["states"].as<std::string>());
505 
506  // read previously used parameters
507  std::string header_comment = args["header"].as<std::string>();
508  std::map<std::string,float> commentsMap = args["commentsMap"].as<std::map<std::string,float>>();
509  Clustering::Tools::read_comments(args["states"].as<std::string>(), commentsMap);
510 
511  std::size_t n_frames = traj.size();
512  Clustering::logger(std::cout) << " free energy from: "
513  << args["free-energy-input"].as<std::string>()
514  << std::endl;
515  std::string fname_fe_in = args["free-energy-input"].as<std::string>();
516  std::vector<float> free_energy = read_free_energies(fname_fe_in);
517  //check if input is consistent
518  Clustering::Tools::read_comments(args["free-energy-input"].as<std::string>(), commentsMap);
519 
520  float q_min_from = args["qmin-from"].as<float>();
521  float q_min_to = args["qmin-to"].as<float>();
522  float q_min_step = args["qmin-step"].as<float>();
523  int lagtime = args["lagtime"].as<int>();
524  std::vector<std::size_t> concat_limits;
525  bool diff_sized_chunks = args.count("concat_limits");
526  if (diff_sized_chunks) {
527  Clustering::logger(std::cout) << " concat limits from: "
528  << args["concat-limits"].as<std::string>() << std::endl;
529  concat_limits = Clustering::Tools::read_concat_limits(args["concat-limits"].as<std::string>());
530  } else if (args.count("concat-nframes")) {
531  std::size_t n_frames_per_subtraj = args["concat-nframes"].as<std::size_t>();
532  for (std::size_t i=n_frames_per_subtraj; i <= n_frames; i += n_frames_per_subtraj) {
533  concat_limits.push_back(i);
534  }
535  } else {
536  concat_limits = {n_frames};
537  }
538  // check if concat_limits are well definied
539  Clustering::Tools::check_concat_limits(concat_limits, n_frames);
540 
541  SparseMatrixF trans_prob;
542  bool tprob_given = args.count("tprob");
543  Clustering::logger(std::cout) << "~~~ transition matrix" << std::endl;
544  if (tprob_given) {
545  // read transition matrix from file
546  std::string tprob_fname = args["tprob"].as<std::string>();
547  Clustering::logger(std::cout) << " read from " << tprob_fname << "\n"
548  << " lagtime -l will be ignored." << std::endl;
549  trans_prob = read_transition_probabilities(tprob_fname);
550  } else {
551  // compute transition matrix from trajectory
552  Clustering::logger(std::cout) << " compute it" << std::endl;
553  auto microstate_names = std::set<std::size_t>(traj.begin(), traj.end());
554  if (diff_sized_chunks) {
557  , concat_limits
558  , lagtime)
559  , microstate_names);
560  } else {
562  transition_counts(traj
563  , concat_limits
564  , lagtime)
565  , microstate_names);
566  }
567  }
568  Clustering::logger(std::cout) << "\n~~~ run mpp\n iteration qmin" << std::endl;
569  for (float q_min=q_min_from; q_min <= q_min_to; q_min += q_min_step) {
570  auto traj_sinks_tprob = fixed_metastability_clustering(traj
571  , trans_prob
572  , q_min
573  , free_energy);
574 
575  std::string header_qmin = header_comment;
576  Clustering::Tools::append_commentsMap(header_qmin, commentsMap);
577  header_qmin.append(Clustering::Tools::stringprintf(
578  "#\n# mpp specific parameters: \n"
579  "# qmin = %0.3f \n", q_min));
580  // reuse updated transition matrix in next iteration
581  trans_prob = std::get<2>(traj_sinks_tprob);
582  // write trajectory at current Qmin level to file
583  traj = std::get<0>(traj_sinks_tprob);
584  write_single_column(stringprintf("%s_traj_%0.3f.dat"
585  , basename.c_str()
586  , q_min)
587  , traj, header_qmin);
588  // save transitions (i.e. lumping of states)
589  std::map<std::size_t, std::size_t> sinks = std::get<1>(traj_sinks_tprob);
590  for (auto from_to: sinks) {
591  transitions[from_to.first] = {from_to.second, q_min};
592  }
593  // write microstate populations to file
594  std::map<std::size_t, std::size_t> pops;
596  write_map<std::size_t, std::size_t>(stringprintf("%s_pop_%0.3f.dat"
597  , basename.c_str()
598  , q_min)
599  , pops, header_qmin);
600  // collect max. pops + max. q_min per microstate
601  for (std::size_t id: std::set<std::size_t>(traj.begin(), traj.end())) {
602  max_pop[id] = pops[id];
603  max_qmin[id] = q_min;
604  }
605  }
606  // write transitions to file
607  {
608  std::ofstream ofs(basename + "_transitions.dat");
609  for (auto trans: transitions) {
610  ofs << trans.first
611  << " "
612  << trans.second.first
613  << " "
614  << trans.second.second
615  << "\n";
616  }
617  }
618  Clustering::Tools::append_commentsMap(header_comment, commentsMap);
619  write_map<std::size_t, std::size_t>(basename + "_max_pop.dat", max_pop, header_comment);
620  write_map<std::size_t, float>(basename + "_max_qmin.dat", max_qmin, header_comment);
621  }
622  } // end namespace MPP
623 } // end namespace Clustering
624 
std::vector< std::size_t > read_concat_limits(std::string filename)
Definition: tools.cpp:133
boost::numeric::ublas::mapped_matrix< float > SparseMatrixF
BOOST implementation of a sparse matrix for floats.
Definition: mpp.hpp:59
SparseMatrixF read_transition_probabilities(std::string fname)
read (row-normalized) transition matrix from file
Definition: mpp.cpp:38
SparseMatrixF row_normalized_transition_probabilities(SparseMatrixF count_matrix, std::set< std::size_t > cluster_names)
compute transition matrix from counts by normalization of rows
Definition: mpp.cpp:159
Most Probable Path Clustering.
general namespace for clustering package
Definition: coring.cpp:38
Tools mainly for IO and some other functions.
std::map< std::size_t, std::size_t > microstate_populations(std::vector< std::size_t > traj)
compute microstate populations from clustered trajectory
Definition: tools.cpp:176
std::vector< float > read_free_energies(std::string filename)
read free energies from plain text file
Definition: tools.cpp:96
Define global logger.
std::map< std::size_t, std::size_t > microstate_populations(std::vector< std::size_t > clusters, std::set< std::size_t > cluster_names)
compute cluster populations
Definition: mpp.cpp:308
std::map< std::size_t, float > microstate_min_free_energy(const std::vector< std::size_t > &clustering, const std::vector< float > &free_energy)
Definition: mpp.cpp:320
void check_concat_limits(std::vector< std::size_t > concat_limits, std::size_t n_frames)
check if concat limits were passed correctly
Definition: tools.cpp:189
std::map< std::size_t, std::vector< std::size_t > > most_probable_path(std::map< std::size_t, std::size_t > future_state, std::set< std::size_t > cluster_names)
Definition: mpp.cpp:288
void write_map(std::string filename, std::map< KEY, VAL > mapping, std::string header_comment, bool val_then_key=false)
write key-value map to plain text file with key as first and value as second column ...
Definition: tools.hxx:209
std::map< std::size_t, std::size_t > path_sinks(std::vector< std::size_t > clusters, std::map< std::size_t, std::vector< std::size_t >> mpp, SparseMatrixF transition_matrix, std::set< std::size_t > cluster_names, float q_min, std::vector< float > free_energy)
Definition: mpp.cpp:337
std::ostream & logger(std::ostream &s)
Definition: logger.cpp:32
void read_comments(std::string filename, std::map< std::string, float > &stringMap)
read comments of stringMap from file. Comments should start with #@
Definition: tools.cpp:229
SparseMatrixF updated_transition_probabilities(SparseMatrixF transition_matrix, std::map< std::size_t, std::size_t > sinks, std::map< std::size_t, std::size_t > pops)
update transition matrix after lumping states into sinks
Definition: mpp.cpp:181
std::map< std::size_t, std::size_t > single_step_future_state(SparseMatrixF transition_matrix, std::set< std::size_t > cluster_names, float q_min, std::map< std::size_t, float > min_free_energy)
Definition: mpp.cpp:234
void append_commentsMap(std::string &header_comment, std::map< std::string, float > &stringMap)
append commentsMap to header comment
Definition: tools.cpp:267
SparseMatrixF weighted_transition_counts(std::vector< std::size_t > trajectory, std::vector< std::size_t > concat_limits, std::size_t n_lag_steps)
Definition: mpp.cpp:113
void write_single_column(std::string filename, std::vector< NUM > dat, std::string header_comment, bool with_scientific_format=false)
write single column of numbers to given file. number type (int, float, ...) given as template paramet...
Definition: tools.hxx:258
std::vector< NUM > read_single_column(std::string filename)
read single column of numbers from given file. number type (int, float, ...) given as template parame...
Definition: tools.hxx:230
std::vector< std::size_t > lumped_trajectory(std::vector< std::size_t > trajectory, std::map< std::size_t, std::size_t > sinks)
Definition: mpp.cpp:400
SparseMatrixF transition_counts(std::vector< std::size_t > trajectory, std::vector< std::size_t > concat_limits, std::size_t n_lag_steps, std::size_t i_max)
Definition: mpp.cpp:78
std::vector< std::size_t > read_clustered_trajectory(std::string filename)
read states from trajectory (given as plain text file)
Definition: tools.cpp:58
void main(boost::program_options::variables_map args)
MPP clustering control function and user interface.
Definition: mpp.cpp:487
std::string stringprintf(const std::string &str,...)
printf-version for std::string
Definition: tools.cpp:80
std::tuple< std::vector< std::size_t >, std::map< std::size_t, std::size_t >, SparseMatrixF > fixed_metastability_clustering(std::vector< std::size_t > initial_trajectory, SparseMatrixF trans_prob, float q_min, std::vector< float > free_energy)
run clustering for given Q_min value
Definition: mpp.cpp:413