39 std::vector<unsigned int> i;
40 std::vector<unsigned int> j;
42 std::ifstream fh(fname);
59 std::cerr <<
"error: cannot open file " 61 <<
" for reading transition matrix." 66 unsigned int max_state = std::max((*std::max_element(i.begin()
68 , (*std::max_element(j.begin()
71 for (
unsigned int n=0; n < i.size(); ++n) {
72 trans_prob(i[n], j[n]) = k[n];
79 std::vector<std::size_t> concat_limits,
80 std::size_t n_lag_steps,
82 if (n_lag_steps == 0) {
83 std::cerr <<
"error: lagtime of 0 does not make any sense for" 84 <<
" MPP clustering" << std::endl;
87 std::vector<std::size_t>::iterator next_limit = concat_limits.begin();
89 i_max = (*std::max_element(trajectory.begin()
93 for (std::size_t i=0; i < trajectory.size() - n_lag_steps; ++i) {
94 std::size_t from = trajectory[i];
95 std::size_t to = trajectory[i+n_lag_steps];
96 if (next_limit != concat_limits.end()) {
98 if (i+n_lag_steps < (*next_limit)) {
99 count_matrix(from, to) += 1;
100 }
else if (i+1 == (*next_limit)) {
106 count_matrix(from, to) += 1;
114 , std::vector<std::size_t> concat_limits
115 , std::size_t n_lag_steps) {
117 std::size_t i_max = (*std::max_element(trajectory.begin()
118 , trajectory.end()));
120 std::vector<float> acc_weights(i_max+1);
121 std::size_t lower_lim = 0;
122 for (std::size_t i_chunk=0; i_chunk < concat_limits.size(); ++i_chunk) {
124 std::size_t upper_lim = lower_lim + concat_limits[i_chunk];
125 std::vector<std::size_t> chunk = std::vector<std::size_t>(
126 concat_limits.begin()+lower_lim
127 , concat_limits.begin()+upper_lim);
133 std::vector<float> weights(i_max+1);
134 for (std::size_t i=0; i < i_max+1; ++i) {
135 for (std::size_t j=0; j < i_max+1; ++j) {
136 weights[i] += counts(i,j);
138 weights[i] = sqrt(weights[i]);
139 acc_weights[i] += weights[i];
142 for (std::size_t i=0; i < i_max+1; ++i) {
143 for (std::size_t j=0; j < i_max+1; ++j) {
144 weighted_counts(i,j) += weights[i]*counts(i,j);
147 lower_lim = upper_lim;
150 for (std::size_t i=0; i < i_max+1; ++i) {
151 for (std::size_t j=0; j < i_max+1; ++j) {
152 weighted_counts(i,j) /= acc_weights[i];
155 return weighted_counts;
160 , std::set<std::size_t> cluster_names) {
161 std::size_t n_rows = count_matrix.size1();
162 std::size_t n_cols = count_matrix.size2();
164 for (std::size_t i: cluster_names) {
165 std::size_t row_sum = 0;
166 for (std::size_t j=0; j < n_cols; ++j) {
167 row_sum += count_matrix(i,j);
170 for (std::size_t j=0; j < n_cols; ++j) {
171 if (count_matrix(i,j) != 0) {
172 transition_matrix(i,j) = count_matrix(i,j) / row_sum;
177 return transition_matrix;
182 , std::map<std::size_t, std::size_t> sinks
183 , std::map<std::size_t, std::size_t> pops) {
184 std::size_t n_rows = transition_matrix.size1();
185 std::size_t n_cols = transition_matrix.size2();
189 std::set<std::size_t> macrostates;
192 std::map<std::size_t, std::set<std::size_t>> microstates;
193 for (
auto lump_from_to: sinks) {
194 macrostates.insert(lump_from_to.second);
195 if (microstates.count(lump_from_to.second) == 0) {
196 microstates[lump_from_to.second] = {lump_from_to.first};
198 microstates[lump_from_to.second].insert(lump_from_to.first);
202 std::map<std::size_t, float> relative_pops;
203 for (
auto macro1: macrostates) {
204 std::size_t pop_total = 0;
205 for (
auto micro1: microstates[macro1]) {
206 pop_total += pops[micro1];
208 for (
auto micro1: microstates[macro1]) {
209 relative_pops[micro1] = (float) pops[micro1] / (
float) pop_total;
214 for (
auto macro1: macrostates) {
215 float macro_row_sum = 0.0f;
216 for (
auto macro2: macrostates) {
217 for (
auto micro1: microstates[macro1]) {
218 for (
auto micro2: microstates[macro2]) {
219 updated_matrix(macro1, macro2) += relative_pops[micro1]
220 * transition_matrix(micro1, micro2);
223 macro_row_sum += updated_matrix(macro1, macro2);
226 for (
auto macro2: macrostates) {
227 updated_matrix(macro1, macro2) /= macro_row_sum;
230 return updated_matrix;
233 std::map<std::size_t, std::size_t>
235 std::set<std::size_t> cluster_names,
237 std::map<std::size_t, float> min_free_energy) {
238 std::map<std::size_t, std::size_t> future_state;
239 for (std::size_t i: cluster_names) {
240 std::vector<std::size_t> candidates;
241 float max_trans_prob = 0.0f;
242 if (transition_matrix(i,i) >= q_min) {
246 for (std::size_t j: cluster_names) {
251 if (transition_matrix(i,j) > max_trans_prob) {
252 max_trans_prob = transition_matrix(i,j);
254 }
else if (transition_matrix(i,j) == max_trans_prob
255 && max_trans_prob > 0.0f) {
256 candidates.push_back(j);
261 if (candidates.size() == 0) {
262 std::cerr <<
"error: state '" 264 <<
"' has self-transition probability of " 265 << transition_matrix(i,i)
268 <<
" and does not find any transition candidates." 269 <<
" please have a look at your trajectory!" 272 }
else if (candidates.size() == 1) {
273 future_state[i] = candidates[0];
276 auto min_fe_compare = [&](std::size_t i, std::size_t j) {
277 return min_free_energy[i] < min_free_energy[j];
279 future_state[i] = (*std::min_element(candidates.begin()
287 std::map<std::size_t, std::vector<std::size_t>>
289 std::set<std::size_t> cluster_names) {
290 std::map<std::size_t, std::vector<std::size_t>> mpp;
291 for (std::size_t i: cluster_names) {
292 std::vector<std::size_t> path = {i};
293 std::set<std::size_t> visited = {i};
294 std::size_t next_state = future_state[i];
297 while (visited.count(next_state) == 0) {
298 path.push_back(next_state);
299 visited.insert(next_state);
300 next_state = future_state[next_state];
307 std::map<std::size_t, std::size_t>
309 std::set<std::size_t> cluster_names) {
310 std::map<std::size_t, std::size_t> pops;
311 for (std::size_t i: cluster_names) {
312 pops[i] = std::count(clusters.begin(), clusters.end(), i);
319 std::map<std::size_t, float>
321 const std::vector<float>& free_energy) {
322 std::map<std::size_t, float> min_fe;
323 for (std::size_t i=0; i < clustering.size(); ++i) {
324 std::size_t state = clustering[i];
325 if (min_fe.count(state) == 0) {
326 min_fe[state] = free_energy[i];
328 if (free_energy[i] < min_fe[state]) {
329 min_fe[state] = free_energy[i];
336 std::map<std::size_t, std::size_t>
338 std::map<std::size_t, std::vector<std::size_t>> mpp,
340 std::set<std::size_t> cluster_names,
342 std::vector<float> free_energy) {
343 std::map<std::size_t, std::size_t> pops;
345 std::map<std::size_t, float> min_free_energy;
347 std::map<std::size_t, std::size_t> sinks;
348 for (std::size_t i: cluster_names) {
349 std::vector<std::size_t> metastable_states;
350 for (std::size_t j: mpp[i]) {
352 if (transition_matrix(j,j) > q_min) {
353 metastable_states.push_back(j);
356 if (metastable_states.size() == 0) {
358 metastable_states = mpp[i];
361 auto pop_compare = [&](std::size_t i, std::size_t j) ->
bool {
362 return pops[i] < pops[j];
365 auto fe_compare = [&](std::size_t i, std::size_t j) ->
bool {
366 return min_free_energy[i] < min_free_energy[j];
369 auto candidate = std::min_element(metastable_states.begin()
370 , metastable_states.end()
372 float min_fe = free_energy[*candidate];
373 std::set<std::size_t> sink_candidates;
374 while (candidate != metastable_states.end()
375 && free_energy[*candidate] == min_fe) {
378 sink_candidates.insert(*candidate);
379 metastable_states.erase(candidate);
380 candidate = std::min_element(metastable_states.begin()
381 , metastable_states.end()
385 if (sink_candidates.size() == 1) {
386 sinks[i] = (*sink_candidates.begin());
389 sinks[i] = (*std::max_element(sink_candidates.begin()
390 , sink_candidates.end()
399 std::vector<std::size_t>
401 std::map<std::size_t, std::size_t> sinks) {
402 for (std::size_t& state: trajectory) {
403 state = sinks[state];
410 std::tuple<std::vector<std::size_t>
411 , std::map<std::size_t, std::size_t>
416 std::vector<float> free_energy) {
417 std::set<std::size_t> microstate_names;
418 std::vector<std::size_t> traj = initial_trajectory;
419 std::map<std::size_t, std::size_t> lumping;
420 const uint MAX_ITER=100;
422 for (iter=0; iter < MAX_ITER; ++iter) {
424 microstate_names = std::set<std::size_t>(traj.begin(), traj.end());
425 if (microstate_names.count(0)) {
426 std::cerr <<
"\nwarning:\n" 427 <<
" there is a state '0' in your trajectory.\n" 428 <<
" are you sure you generated a proper" 429 <<
" trajectory of microstates\n" 430 <<
" (e.g. by running a final, seeded" 431 <<
" density-clustering to fill up the FEL)?\n" 434 logger(std::cout) <<
" " << std::setw(3)
436 <<
" " << std::setw(6)
440 std::map<std::size_t, std::size_t> future_state;
448 std::map<std::size_t, std::vector<std::size_t>> mpp;
451 std::map<std::size_t, std::size_t> sinks =
path_sinks(traj
462 std::vector<std::size_t> traj_old = traj;
464 for (
auto from_to: sinks) {
465 std::size_t from = from_to.first;
466 std::size_t to = from_to.second;
472 if (traj_old == traj) {
476 if (iter == MAX_ITER) {
478 "reached max. no. of iterations" 479 " for Q_min convergence: %d" 482 return std::make_tuple(traj, lumping, trans_prob);
487 main(boost::program_options::variables_map args) {
496 std::string basename = args[
"output"].as<std::string>();
497 std::map<std::size_t, std::pair<std::size_t, float>> transitions;
498 std::map<std::size_t, std::size_t> max_pop;
499 std::map<std::size_t, float> max_qmin;
501 <<
" trajectory from: " << args[
"states"].as<std::string>()
503 std::vector<std::size_t> traj;
504 traj = read_clustered_trajectory(args[
"states"].as<std::string>());
507 std::string header_comment = args[
"header"].as<std::string>();
508 std::map<std::string,float> commentsMap = args[
"commentsMap"].as<std::map<std::string,float>>();
511 std::size_t n_frames = traj.size();
513 << args[
"free-energy-input"].as<std::string>()
515 std::string fname_fe_in = args[
"free-energy-input"].as<std::string>();
516 std::vector<float> free_energy = read_free_energies(fname_fe_in);
520 float q_min_from = args[
"qmin-from"].as<
float>();
521 float q_min_to = args[
"qmin-to"].as<
float>();
522 float q_min_step = args[
"qmin-step"].as<
float>();
523 int lagtime = args[
"lagtime"].as<
int>();
524 std::vector<std::size_t> concat_limits;
525 bool diff_sized_chunks = args.count(
"concat_limits");
526 if (diff_sized_chunks) {
528 << args[
"concat-limits"].as<std::string>() << std::endl;
530 }
else if (args.count(
"concat-nframes")) {
531 std::size_t n_frames_per_subtraj = args[
"concat-nframes"].as<std::size_t>();
532 for (std::size_t i=n_frames_per_subtraj; i <= n_frames; i += n_frames_per_subtraj) {
533 concat_limits.push_back(i);
536 concat_limits = {n_frames};
542 bool tprob_given = args.count(
"tprob");
546 std::string tprob_fname = args[
"tprob"].as<std::string>();
548 <<
" lagtime -l will be ignored." << std::endl;
553 auto microstate_names = std::set<std::size_t>(traj.begin(), traj.end());
554 if (diff_sized_chunks) {
569 for (
float q_min=q_min_from; q_min <= q_min_to; q_min += q_min_step) {
575 std::string header_qmin = header_comment;
578 "#\n# mpp specific parameters: \n" 579 "# qmin = %0.3f \n", q_min));
581 trans_prob = std::get<2>(traj_sinks_tprob);
583 traj = std::get<0>(traj_sinks_tprob);
587 , traj, header_qmin);
589 std::map<std::size_t, std::size_t> sinks = std::get<1>(traj_sinks_tprob);
590 for (
auto from_to: sinks) {
591 transitions[from_to.first] = {from_to.second, q_min};
594 std::map<std::size_t, std::size_t> pops;
596 write_map<std::size_t, std::size_t>(stringprintf(
"%s_pop_%0.3f.dat" 599 , pops, header_qmin);
601 for (std::size_t
id: std::set<std::size_t>(traj.begin(), traj.end())) {
602 max_pop[id] = pops[id];
603 max_qmin[id] = q_min;
608 std::ofstream ofs(basename +
"_transitions.dat");
609 for (
auto trans: transitions) {
612 << trans.second.first
614 << trans.second.second
619 write_map<std::size_t, std::size_t>(basename +
"_max_pop.dat", max_pop, header_comment);
620 write_map<std::size_t, float>(basename +
"_max_qmin.dat", max_qmin, header_comment);
boost::numeric::ublas::mapped_matrix< float > SparseMatrixF
BOOST implementation of a sparse matrix for floats.
SparseMatrixF read_transition_probabilities(std::string fname)
read (row-normalized) transition matrix from file
SparseMatrixF row_normalized_transition_probabilities(SparseMatrixF count_matrix, std::set< std::size_t > cluster_names)
compute transition matrix from counts by normalization of rows
Most Probable Path Clustering.
general namespace for clustering package
std::map< std::size_t, std::size_t > microstate_populations(std::vector< std::size_t > clusters, std::set< std::size_t > cluster_names)
compute cluster populations
std::map< std::size_t, float > microstate_min_free_energy(const std::vector< std::size_t > &clustering, const std::vector< float > &free_energy)
std::map< std::size_t, std::vector< std::size_t > > most_probable_path(std::map< std::size_t, std::size_t > future_state, std::set< std::size_t > cluster_names)
std::map< std::size_t, std::size_t > path_sinks(std::vector< std::size_t > clusters, std::map< std::size_t, std::vector< std::size_t >> mpp, SparseMatrixF transition_matrix, std::set< std::size_t > cluster_names, float q_min, std::vector< float > free_energy)
std::ostream & logger(std::ostream &s)
SparseMatrixF updated_transition_probabilities(SparseMatrixF transition_matrix, std::map< std::size_t, std::size_t > sinks, std::map< std::size_t, std::size_t > pops)
update transition matrix after lumping states into sinks
std::map< std::size_t, std::size_t > single_step_future_state(SparseMatrixF transition_matrix, std::set< std::size_t > cluster_names, float q_min, std::map< std::size_t, float > min_free_energy)
SparseMatrixF weighted_transition_counts(std::vector< std::size_t > trajectory, std::vector< std::size_t > concat_limits, std::size_t n_lag_steps)
std::vector< std::size_t > lumped_trajectory(std::vector< std::size_t > trajectory, std::map< std::size_t, std::size_t > sinks)
SparseMatrixF transition_counts(std::vector< std::size_t > trajectory, std::vector< std::size_t > concat_limits, std::size_t n_lag_steps, std::size_t i_max)
void main(boost::program_options::variables_map args)
MPP clustering control function and user interface.
std::tuple< std::vector< std::size_t >, std::map< std::size_t, std::size_t >, SparseMatrixF > fixed_metastability_clustering(std::vector< std::size_t > initial_trajectory, SparseMatrixF trans_prob, float q_min, std::vector< float > free_energy)
run clustering for given Q_min value