coring.cpp
1 /*
2 Copyright (c) 2015-2019, Florian Sittel (www.lettis.net) and Daniel Nagel
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice,
9  this list of conditions and the following disclaimer.
10 
11 2. Redistributions in binary form must reproduce the above copyright notice,
12  this list of conditions and the following disclaimer in the documentation
13  and/or other materials provided with the distribution.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
18 SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
22 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
23 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 
26 #include "coring.hpp"
27 
28 #include "logger.hpp"
29 #include "tools.hpp"
30 
31 #include <iostream>
32 #include <fstream>
33 #include <algorithm>
34 #include <vector>
35 
36 #include <omp.h>
37 
38 namespace Clustering {
39 namespace Coring {
40  WTDMap
41  compute_wtd(std::list<std::size_t> streaks) {
42  WTDMap wtd;
43  if (streaks.size() > 0) {
44  streaks.sort(std::greater<std::size_t>());
45  std::size_t max_streak = streaks.front();
46  for (std::size_t i=0; i <= max_streak; ++i) {
47  float n_steps = 0.0f;
48  for (auto s: streaks) {
49  if (i > s) {
50  break;
51  }
52  n_steps += 1.0f;
53  }
54  wtd[i] = n_steps / ((float) streaks.size());
55  }
56  }
57  return wtd;
58  }
59 
60  void
61  main(boost::program_options::variables_map args) {
62  using namespace Clustering::Tools;
63  namespace b_po = boost::program_options;
64  // load states
65  Clustering::logger(std::cout) << "~~~ reading files\n"
66  << " trajectory from: " << args["states"].as<std::string>()
67  << std::endl;
68  std::vector<std::size_t> states = Clustering::Tools::read_clustered_trajectory(args["states"].as<std::string>());
69  std::set<std::size_t> state_names(states.begin(), states.end());
70  std::size_t n_frames = states.size();
71  std::string header_comment = args["header"].as<std::string>();
72  std::map<std::string,float> commentsMap = args["commentsMap"].as<std::map<std::string,float>>();
73  // read previously used parameters
74  read_comments(args["states"].as<std::string>(), commentsMap);
75  if (args.count("output") || args.count("distribution") || args.count("cores")) {
76  // load concatenation limits to treat concatenated trajectories correctly
77  // when performing dynamical corrections
78  std::vector<std::size_t> concat_limits;
79  if (args.count("concat-limits")) {
80  Clustering::logger(std::cout) << " limits from: "
81  << args["concat-limits"].as<std::string>() << std::endl;
82  concat_limits = Clustering::Tools::read_concat_limits(args["concat-limits"].as<std::string>());
83  } else if (args.count("concat-nframes")) {
84  std::size_t n_frames_per_subtraj = args["concat-nframes"].as<std::size_t>();
85  for (std::size_t i=n_frames_per_subtraj; i <= n_frames; i += n_frames_per_subtraj) {
86  concat_limits.push_back(i);
87  }
88  } else {
89  concat_limits = {n_frames};
90  }
91  // check if concat_limits are well definied
92  Clustering::Tools::check_concat_limits(concat_limits, n_frames);
93  // load window size information
94  Clustering::logger(std::cout) << "\n\n~~~ coring windows:\n from file: "
95  << args["windows"].as<std::string>() << std::endl;
96  std::map<std::size_t, std::size_t> coring_windows;
97  {
98  std::ifstream ifs(args["windows"].as<std::string>());
99 // std::string buf1, buf2;
100  std::string buf;
101  std::size_t state, window;
102  std::size_t size_for_all = 1;
103  if (ifs.fail()) {
104  std::cerr << "error: cannot open file '"
105  << args["windows"].as<std::string>()
106  << "'" << std::endl;
107  exit(EXIT_FAILURE);
108  } else {
109  while (ifs.good()) {
110  if (std::isdigit(ifs.peek())) {
111  ifs >> state;
112  ifs >> window;
113  if ( ! ifs.fail()) {
114  coring_windows[state] = window;
115  } else {
116  std::cerr << "error: file not correctly formated." << std::endl;
117  }
118  } else if (ifs.peek() == '*') {
119  ifs >> buf;
120  ifs >> window;
121  if ( ! ifs.fail()) {
122  size_for_all = window;
123  } else {
124  std::cerr << "error: file not correctly formated." << std::endl;
125  }
126  } else {
127  ifs.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
128  }
129  }
130 // while (ifs.good()) {
131 // ifs >> buf1;
132 // ifs >> buf2;
133 // if (ifs.good()) {
134 // if (buf1 == "*") {
135 // size_for_all = string_to_num<std::size_t>(buf2);
136 // } else {
137 // coring_windows[string_to_num<std::size_t>(buf1)] = string_to_num<std::size_t>(buf2);
138 // }
139 // }
140 // }
141  }
142  // fill remaining, not explicitly defined states with common window size
143  std::size_t undefined_windows = 0;
144  for (std::size_t name: state_names) {
145  if ( ! coring_windows.count(name)){
146  coring_windows[name] = size_for_all;
147  ++undefined_windows;
148  }
149  }
150  // if only single coring window is used
151  if (coring_windows.size() == 0) {
152  commentsMap["single_coring_time"] = size_for_all;
153  }
154  header_comment.append(Clustering::Tools::stringprintf(
155  "#\n# coring specific parameters: \n"
156  "# %i state-specific coring windows were read\n"
157  "# %i frames is used for reamining states\n",
158  state_names.size() - undefined_windows, size_for_all));
159  Clustering::logger(std::cout) << " " << state_names.size() - undefined_windows
160  << " state-specific coring windows were read" << std::endl;
161  if (size_for_all > 1) {
162  Clustering::logger(std::cout) << " default window was set to " << size_for_all
163  << " frames" << std::endl;
164  }
165  }
166  // core trajectory
167  Clustering::logger(std::cout) << "\n~~~ coring trajectory" << std::endl;
168  std::vector<std::size_t> cored_traj(n_frames);
169  std::size_t current_core = states[0];
170  std::vector<long> cores(n_frames);
171  std::size_t changed_frames = 0;
172  // honour concatenation limits, i.e. treat every concatenated trajectory-part on its own
173  std::size_t last_limit = 0;
174  for (std::size_t next_limit: concat_limits) {
175  current_core = states[last_limit];
176  std::size_t next_limit_corrected = std::min(next_limit, n_frames);
177  for (std::size_t i=last_limit; i < next_limit_corrected; ++i) {
178  // coring window
179  std::size_t w = std::min(i+coring_windows[states[i]], next_limit);
180  bool is_in_core = true;
181  for (std::size_t j=i+1; j < w; ++j) {
182  if (states[j] != states[i]) {
183  is_in_core = false;
184  break;
185  }
186  }
187  if (is_in_core) {
188  current_core = states[i];
189  cores[i] = current_core;
190  } else {
191  ++changed_frames;
192  cores[i] = -1;
193  }
194  cored_traj[i] = current_core;
195  }
196  last_limit = next_limit_corrected;
197  }
198  float changed_frames_perc = (float) 100*changed_frames / n_frames;
199  Clustering::logger(std::cout) << Clustering::Tools::stringprintf(" %.2f", changed_frames_perc)
200  << "% of frames were changed\n " << changed_frames
201  << " frames in total"
202  << "\n store result in: " << args["output"].as<std::string>()
203  << std::endl;
204  // write cored trajectory to file
205  std::string header_coring = header_comment
206  + Clustering::Tools::stringprintf("# %.2f", changed_frames_perc)
207  + "% of frames were changed\n";
208  if (args.count("output")) {
209  Clustering::Tools::write_clustered_trajectory(args["output"].as<std::string>(),
210  cored_traj,
211  header_coring,
212  commentsMap);
213  }
214  // write core information to file
215  if (args.count("cores")) {
216  append_commentsMap(header_coring, commentsMap);
217  Clustering::Tools::write_single_column<long>(args["cores"].as<std::string>(),
218  cores,
219  header_coring,
220  false);
221  }
222  // compute/save escape time distributions
223  if (args.count("distribution")) {
224  Clustering::logger(std::cout) << "~~~ generating distribution" << std::endl;
225  std::map<std::size_t, std::list<std::size_t>> streaks;
226  std::size_t current_state = cored_traj[0];
227  long n_counts = 0;
228  for (std::size_t state: cored_traj) {
229  if (state == current_state) {
230  ++n_counts;
231  } else {
232  streaks[current_state].push_back(n_counts);
233  current_state = state;
234  n_counts = 1;
235  }
236  }
237  streaks[current_state].push_back(n_counts);
238 
239  // append values to header comment
240  append_commentsMap(header_comment, commentsMap);
241 
242  std::map<std::size_t, WTDMap> etds;
243  for (std::size_t state: state_names) {
244  etds[state] = compute_wtd(streaks[state]);
245  }
246  // write WTDs to file
247  Clustering::logger(std::cout) << " storing..." << std::endl;
248  for (auto state_etd: etds) {
249  std::string fname = Clustering::Tools::stringprintf(args["distribution"].as<std::string>() + "_%d", state_etd.first);
250  Clustering::Tools::write_map<std::size_t, float>(fname, state_etd.second, header_comment);
251  }
252  }
253  } else {
254  std::cerr << "\n" << "error (coring): nothing to do! please define '--output', '--distribution' or both!" << "\n\n";
255  exit(EXIT_FAILURE);
256  }
257  }
258 } // end namespace Coring
259 } // end namespace Clustering
260 
std::vector< std::size_t > read_concat_limits(std::string filename)
Definition: tools.cpp:133
Dynamical Coring.
additional tools used throughout the clustering package
Definition: tools.cpp:33
general namespace for clustering package
Definition: coring.cpp:38
Tools mainly for IO and some other functions.
Define global logger.
void check_concat_limits(std::vector< std::size_t > concat_limits, std::size_t n_frames)
check if concat limits were passed correctly
Definition: tools.cpp:189
std::ostream & logger(std::ostream &s)
Definition: logger.cpp:32
void read_comments(std::string filename, std::map< std::string, float > &stringMap)
read comments of stringMap from file. Comments should start with #@
Definition: tools.cpp:229
void main(boost::program_options::variables_map args)
controlling function and user interface for boundary corrections
Definition: coring.cpp:61
void append_commentsMap(std::string &header_comment, std::map< std::string, float > &stringMap)
append commentsMap to header comment
Definition: tools.cpp:267
void write_clustered_trajectory(std::string filename, std::vector< std::size_t > traj, std::string header_comment, std::map< std::string, float > stringMap)
write state trajectory into plain text file
Definition: tools.cpp:63
std::map< std::size_t, float > WTDMap
store the waiting time distribution for each state with time vs count.
Definition: coring.hpp:52
WTDMap compute_wtd(std::list< std::size_t > streaks)
compute the waiting time distribution for single state
Definition: coring.cpp:41
std::vector< std::size_t > read_clustered_trajectory(std::string filename)
read states from trajectory (given as plain text file)
Definition: tools.cpp:58
std::string stringprintf(const std::string &str,...)
printf-version for std::string
Definition: tools.cpp:80