39       std::vector<unsigned int> i;
    40       std::vector<unsigned int> j;
    42       std::ifstream fh(fname);
    59         std::cerr << 
"error: cannot open file "    61                   << 
" for reading transition matrix."    66       unsigned int max_state = std::max((*std::max_element(i.begin()
    68                                       , (*std::max_element(j.begin()
    71       for (
unsigned int n=0; n < i.size(); ++n) {
    72         trans_prob(i[n], j[n]) = k[n];
    79                       std::vector<std::size_t> concat_limits,
    80                       std::size_t n_lag_steps,
    82       if (n_lag_steps == 0) {
    83         std::cerr << 
"error: lagtime of 0 does not make any sense for"    84                   << 
" MPP clustering" << std::endl;
    87       std::vector<std::size_t>::iterator next_limit = concat_limits.begin();
    89         i_max = (*std::max_element(trajectory.begin()
    93       for (std::size_t i=0; i < trajectory.size() - n_lag_steps; ++i) {
    94         std::size_t from = trajectory[i];
    95         std::size_t to = trajectory[i+n_lag_steps];
    96         if (next_limit != concat_limits.end()) {
    98           if (i+n_lag_steps < (*next_limit)) {
    99             count_matrix(from, to) += 1;
   100           } 
else if (i+1 == (*next_limit)) {
   106           count_matrix(from, to) += 1;
   114                              , std::vector<std::size_t> concat_limits
   115                              , std::size_t n_lag_steps) {
   117       std::size_t i_max = (*std::max_element(trajectory.begin()
   118                                            , trajectory.end()));
   120       std::vector<float> acc_weights(i_max+1);
   121       std::size_t lower_lim = 0;
   122       for (std::size_t i_chunk=0; i_chunk < concat_limits.size(); ++i_chunk) {
   124         std::size_t upper_lim = lower_lim + concat_limits[i_chunk];
   125         std::vector<std::size_t> chunk = std::vector<std::size_t>(
   126                                            concat_limits.begin()+lower_lim
   127                                          , concat_limits.begin()+upper_lim);
   133         std::vector<float> weights(i_max+1);
   134         for (std::size_t i=0; i < i_max+1; ++i) {
   135           for (std::size_t j=0; j < i_max+1; ++j) {
   136             weights[i] += counts(i,j);
   138           weights[i] = sqrt(weights[i]);
   139           acc_weights[i] += weights[i];
   142         for (std::size_t i=0; i < i_max+1; ++i) {
   143           for (std::size_t j=0; j < i_max+1; ++j) {
   144             weighted_counts(i,j) += weights[i]*counts(i,j);
   147         lower_lim = upper_lim;
   150       for (std::size_t i=0; i < i_max+1; ++i) {
   151         for (std::size_t j=0; j < i_max+1; ++j) {
   152           weighted_counts(i,j) /= acc_weights[i];
   155       return weighted_counts;
   160                                           , std::set<std::size_t> cluster_names) {
   161       std::size_t n_rows = count_matrix.size1();
   162       std::size_t n_cols = count_matrix.size2();
   164       for (std::size_t i: cluster_names) {
   165         std::size_t row_sum = 0;
   166         for (std::size_t j=0; j < n_cols; ++j) {
   167           row_sum += count_matrix(i,j);
   170           for (std::size_t j=0; j < n_cols; ++j) {
   171             if (count_matrix(i,j) != 0) {
   172               transition_matrix(i,j) = count_matrix(i,j) / row_sum;
   177       return transition_matrix;
   182                                    , std::map<std::size_t, std::size_t> sinks
   183                                    , std::map<std::size_t, std::size_t> pops) {
   184       std::size_t n_rows = transition_matrix.size1();
   185       std::size_t n_cols = transition_matrix.size2();
   189       std::set<std::size_t> macrostates;
   192       std::map<std::size_t, std::set<std::size_t>> microstates;
   193       for (
auto lump_from_to: sinks) {
   194         macrostates.insert(lump_from_to.second);
   195         if (microstates.count(lump_from_to.second) == 0) {
   196           microstates[lump_from_to.second] = {lump_from_to.first};
   198           microstates[lump_from_to.second].insert(lump_from_to.first);
   202       std::map<std::size_t, float> relative_pops;
   203       for (
auto macro1: macrostates) {
   204         std::size_t pop_total = 0;
   205         for (
auto micro1: microstates[macro1]) {
   206           pop_total += pops[micro1];
   208         for (
auto micro1: microstates[macro1]) {
   209           relative_pops[micro1] = (float) pops[micro1] / (
float) pop_total;
   214       for (
auto macro1: macrostates) {
   215         float macro_row_sum = 0.0f;
   216         for (
auto macro2: macrostates) {
   217           for (
auto micro1: microstates[macro1]) {
   218             for (
auto micro2: microstates[macro2]) {
   219               updated_matrix(macro1, macro2) += relative_pops[micro1]
   220                                               * transition_matrix(micro1, micro2);
   223           macro_row_sum += updated_matrix(macro1, macro2);
   226         for (
auto macro2: macrostates) {
   227           updated_matrix(macro1, macro2) /= macro_row_sum;
   230       return updated_matrix;
   233     std::map<std::size_t, std::size_t>
   235                              std::set<std::size_t> cluster_names,
   237                              std::map<std::size_t, float> min_free_energy) {
   238       std::map<std::size_t, std::size_t> future_state;
   239       for (std::size_t i: cluster_names) {
   240         std::vector<std::size_t> candidates;
   241         float max_trans_prob = 0.0f;
   242         if (transition_matrix(i,i) >= q_min) {
   246           for (std::size_t j: cluster_names) {
   251               if (transition_matrix(i,j) > max_trans_prob) {
   252                 max_trans_prob = transition_matrix(i,j);
   254               } 
else if (transition_matrix(i,j) == max_trans_prob
   255                       && max_trans_prob > 0.0f) {
   256                 candidates.push_back(j);
   261         if (candidates.size() == 0) {
   262           std::cerr << 
"error: state '"   264                     << 
"' has self-transition probability of "   265                     << transition_matrix(i,i)
   268                     << 
" and does not find any transition candidates."   269                     << 
" please have a look at your trajectory!"   272         } 
else if (candidates.size() == 1) {
   273           future_state[i] = candidates[0];
   276           auto min_fe_compare = [&](std::size_t i, std::size_t j) {
   277             return min_free_energy[i] < min_free_energy[j];
   279           future_state[i] = (*std::min_element(candidates.begin()
   287     std::map<std::size_t, std::vector<std::size_t>>
   289                        std::set<std::size_t> cluster_names) {
   290       std::map<std::size_t, std::vector<std::size_t>> mpp;
   291       for (std::size_t i: cluster_names) {
   292         std::vector<std::size_t> path = {i};
   293         std::set<std::size_t> visited = {i};
   294         std::size_t next_state = future_state[i];
   297         while (visited.count(next_state) == 0) {
   298           path.push_back(next_state);
   299           visited.insert(next_state);
   300           next_state = future_state[next_state];
   307     std::map<std::size_t, std::size_t>
   309                            std::set<std::size_t> cluster_names) {
   310       std::map<std::size_t, std::size_t> pops;
   311       for (std::size_t i: cluster_names) {
   312         pops[i] = std::count(clusters.begin(), clusters.end(), i);
   319     std::map<std::size_t, float>
   321                                const std::vector<float>& free_energy) {
   322       std::map<std::size_t, float> min_fe;
   323       for (std::size_t i=0; i < clustering.size(); ++i) {
   324         std::size_t state = clustering[i];
   325         if (min_fe.count(state) == 0) {
   326           min_fe[state] = free_energy[i];
   328           if (free_energy[i] < min_fe[state]) {
   329             min_fe[state] = free_energy[i];
   336     std::map<std::size_t, std::size_t>
   338                std::map<std::size_t, std::vector<std::size_t>> mpp,
   340                std::set<std::size_t> cluster_names,
   342                std::vector<float> free_energy) {
   343       std::map<std::size_t, std::size_t> pops;
   345       std::map<std::size_t, float> min_free_energy;
   347       std::map<std::size_t, std::size_t> sinks;
   348       for (std::size_t i: cluster_names) {
   349         std::vector<std::size_t> metastable_states;
   350         for (std::size_t j: mpp[i]) {
   352           if (transition_matrix(j,j) > q_min) {
   353             metastable_states.push_back(j);
   356         if (metastable_states.size() == 0) {
   358           metastable_states = mpp[i];
   361         auto pop_compare = [&](std::size_t i, std::size_t j) -> 
bool {
   362           return pops[i] < pops[j];
   365         auto fe_compare = [&](std::size_t i, std::size_t j) -> 
bool {
   366           return min_free_energy[i] < min_free_energy[j];
   369         auto candidate = std::min_element(metastable_states.begin()
   370                                         , metastable_states.end()
   372         float min_fe = free_energy[*candidate];
   373         std::set<std::size_t> sink_candidates;
   374         while (candidate != metastable_states.end()
   375             && free_energy[*candidate] == min_fe) {
   378           sink_candidates.insert(*candidate);
   379           metastable_states.erase(candidate);
   380           candidate = std::min_element(metastable_states.begin()
   381                                      , metastable_states.end()
   385         if (sink_candidates.size() == 1) {
   386           sinks[i] = (*sink_candidates.begin());
   389           sinks[i] = (*std::max_element(sink_candidates.begin()
   390                                       , sink_candidates.end()
   399     std::vector<std::size_t>
   401                       std::map<std::size_t, std::size_t> sinks) {
   402       for (std::size_t& state: trajectory) {
   403         state = sinks[state];
   410     std::tuple<std::vector<std::size_t>
   411              , std::map<std::size_t, std::size_t>
   416                                    std::vector<float> free_energy) {
   417       std::set<std::size_t> microstate_names;
   418       std::vector<std::size_t> traj = initial_trajectory;
   419       std::map<std::size_t, std::size_t> lumping;
   420       const uint MAX_ITER=100;
   422       for (iter=0; iter < MAX_ITER; ++iter) {
   424         microstate_names = std::set<std::size_t>(traj.begin(), traj.end());
   425         if (microstate_names.count(0)) {
   426           std::cerr << 
"\nwarning:\n"   427                     << 
"  there is a state '0' in your trajectory.\n"   428                     << 
"  are you sure you generated a proper"   429                     << 
" trajectory of microstates\n"   430                     << 
"  (e.g. by running a final, seeded"   431                     << 
" density-clustering to fill up the FEL)?\n"   434         logger(std::cout) << 
"          " << std::setw(3)
   436                           << 
" " << std::setw(6)
   440         std::map<std::size_t, std::size_t> future_state;
   448         std::map<std::size_t, std::vector<std::size_t>> mpp;
   451         std::map<std::size_t, std::size_t> sinks = 
path_sinks(traj
   462         std::vector<std::size_t> traj_old = traj;
   464         for (
auto from_to: sinks) {
   465           std::size_t from = from_to.first;
   466           std::size_t to = from_to.second;
   472         if (traj_old == traj) {
   476       if (iter == MAX_ITER) {
   478                                    "reached max. no. of iterations"   479                                    " for Q_min convergence: %d"   482         return std::make_tuple(traj, lumping, trans_prob);
   487     main(boost::program_options::variables_map args) {
   496       std::string basename = args[
"output"].as<std::string>();
   497       std::map<std::size_t, std::pair<std::size_t, float>> transitions;
   498       std::map<std::size_t, std::size_t> max_pop;
   499       std::map<std::size_t, float> max_qmin;
   501                                     << 
"    trajectory from: " << args[
"states"].as<std::string>()
   503       std::vector<std::size_t> traj;
   504       traj = read_clustered_trajectory(args[
"states"].as<std::string>());
   507       std::string header_comment = args[
"header"].as<std::string>();
   508       std::map<std::string,float> commentsMap = args[
"commentsMap"].as<std::map<std::string,float>>();
   511       std::size_t n_frames = traj.size();
   513                                     << args[
"free-energy-input"].as<std::string>()
   515       std::string fname_fe_in = args[
"free-energy-input"].as<std::string>();
   516       std::vector<float> free_energy = read_free_energies(fname_fe_in);
   520       float q_min_from = args[
"qmin-from"].as<
float>();
   521       float q_min_to = args[
"qmin-to"].as<
float>();
   522       float q_min_step = args[
"qmin-step"].as<
float>();
   523       int lagtime = args[
"lagtime"].as<
int>();
   524       std::vector<std::size_t> concat_limits;
   525       bool diff_sized_chunks = args.count(
"concat_limits");
   526       if (diff_sized_chunks) {
   528                                       << args[
"concat-limits"].as<std::string>() << std::endl;
   530       } 
else if (args.count(
"concat-nframes")) {
   531         std::size_t n_frames_per_subtraj = args[
"concat-nframes"].as<std::size_t>();
   532         for (std::size_t i=n_frames_per_subtraj; i <= n_frames; i += n_frames_per_subtraj) {
   533           concat_limits.push_back(i);
   536         concat_limits = {n_frames};
   542       bool tprob_given = args.count(
"tprob");
   546         std::string tprob_fname = args[
"tprob"].as<std::string>();
   548                                       << 
"     lagtime -l will be ignored." << std::endl;
   553         auto microstate_names = std::set<std::size_t>(traj.begin(), traj.end());
   554         if (diff_sized_chunks) {
   569       for (
float q_min=q_min_from; q_min <= q_min_to; q_min += q_min_step) {
   575         std::string header_qmin = header_comment;
   578                 "#\n# mpp specific parameters: \n"   579                 "#    qmin = %0.3f \n", q_min));
   581         trans_prob = std::get<2>(traj_sinks_tprob);
   583         traj = std::get<0>(traj_sinks_tprob);
   587                           , traj, header_qmin);
   589         std::map<std::size_t, std::size_t> sinks = std::get<1>(traj_sinks_tprob);
   590         for (
auto from_to: sinks) {
   591           transitions[from_to.first] = {from_to.second, q_min};
   594         std::map<std::size_t, std::size_t> pops;
   596         write_map<std::size_t, std::size_t>(stringprintf(
"%s_pop_%0.3f.dat"   599                                           , pops, header_qmin);
   601         for (std::size_t 
id: std::set<std::size_t>(traj.begin(), traj.end())) {
   602           max_pop[id] = pops[id];
   603           max_qmin[id] = q_min;
   608         std::ofstream ofs(basename + 
"_transitions.dat");
   609         for (
auto trans: transitions) {
   612               << trans.second.first
   614               << trans.second.second
   619       write_map<std::size_t, std::size_t>(basename + 
"_max_pop.dat", max_pop, header_comment);
   620       write_map<std::size_t, float>(basename + 
"_max_qmin.dat", max_qmin, header_comment);
 
boost::numeric::ublas::mapped_matrix< float > SparseMatrixF
BOOST implementation of a sparse matrix for floats. 
 
SparseMatrixF read_transition_probabilities(std::string fname)
read (row-normalized) transition matrix from file 
 
SparseMatrixF row_normalized_transition_probabilities(SparseMatrixF count_matrix, std::set< std::size_t > cluster_names)
compute transition matrix from counts by normalization of rows 
 
Most Probable Path Clustering. 
 
general namespace for clustering package 
 
std::map< std::size_t, std::size_t > microstate_populations(std::vector< std::size_t > clusters, std::set< std::size_t > cluster_names)
compute cluster populations 
 
std::map< std::size_t, float > microstate_min_free_energy(const std::vector< std::size_t > &clustering, const std::vector< float > &free_energy)
 
std::map< std::size_t, std::vector< std::size_t > > most_probable_path(std::map< std::size_t, std::size_t > future_state, std::set< std::size_t > cluster_names)
 
std::map< std::size_t, std::size_t > path_sinks(std::vector< std::size_t > clusters, std::map< std::size_t, std::vector< std::size_t >> mpp, SparseMatrixF transition_matrix, std::set< std::size_t > cluster_names, float q_min, std::vector< float > free_energy)
 
std::ostream & logger(std::ostream &s)
 
SparseMatrixF updated_transition_probabilities(SparseMatrixF transition_matrix, std::map< std::size_t, std::size_t > sinks, std::map< std::size_t, std::size_t > pops)
update transition matrix after lumping states into sinks 
 
std::map< std::size_t, std::size_t > single_step_future_state(SparseMatrixF transition_matrix, std::set< std::size_t > cluster_names, float q_min, std::map< std::size_t, float > min_free_energy)
 
SparseMatrixF weighted_transition_counts(std::vector< std::size_t > trajectory, std::vector< std::size_t > concat_limits, std::size_t n_lag_steps)
 
std::vector< std::size_t > lumped_trajectory(std::vector< std::size_t > trajectory, std::map< std::size_t, std::size_t > sinks)
 
SparseMatrixF transition_counts(std::vector< std::size_t > trajectory, std::vector< std::size_t > concat_limits, std::size_t n_lag_steps, std::size_t i_max)
 
void main(boost::program_options::variables_map args)
MPP clustering control function and user interface. 
 
std::tuple< std::vector< std::size_t >, std::map< std::size_t, std::size_t >, SparseMatrixF > fixed_metastability_clustering(std::vector< std::size_t > initial_trajectory, SparseMatrixF trans_prob, float q_min, std::vector< float > free_energy)
run clustering for given Q_min value