tools.cpp
1 /*
2 Copyright (c) 2015-2019, Florian Sittel (www.lettis.net) and Daniel Nagel
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice,
9  this list of conditions and the following disclaimer.
10 
11 2. Redistributions in binary form must reproduce the above copyright notice,
12  this list of conditions and the following disclaimer in the documentation
13  and/or other materials provided with the distribution.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
18 SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
22 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
23 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 
26 #include "tools.hpp"
27 #include "logger.hpp"
28 
29 #include <cmath>
30 #include <stdarg.h>
31 
32 namespace Clustering {
33 namespace Tools {
34 
35 unsigned int
36 min_multiplicator(unsigned int orig
37  , unsigned int mult) {
38  return (unsigned int) std::ceil(orig / ((float) mult));
39 };
40 
41 void
42 write_fes(std::string filename, std::vector<float> fes, std::string header_comment,
43  std::map<std::string,float> stringMap) {
44  append_commentsMap(header_comment, stringMap);
45  header_comment.append("#\n# free energy of each frame\n");
46  write_single_column<float>(filename, fes, header_comment, true);
47 }
48 
49 void
50 write_pops(std::string filename, std::vector<std::size_t> pops, std::string header_comment,
51  std::map<std::string,float> stringMap) {
52  append_commentsMap(header_comment, stringMap);
53  header_comment.append("#\n# point density of each frame\n");
54  write_single_column<std::size_t>(filename, pops, header_comment, false);
55 }
56 
57 std::vector<std::size_t>
58 read_clustered_trajectory(std::string filename) {
59  return read_single_column<std::size_t>(filename);
60 }
61 
62 void
63 write_clustered_trajectory(std::string filename, std::vector<std::size_t> traj,
64  std::string header_comment, std::map<std::string,float> stringMap) {
65  append_commentsMap(header_comment, stringMap);
66  header_comment.append("#\n# state/cluster id frames are assigned to\n");
67  write_single_column<std::size_t>(filename, traj, header_comment, false);
68 }
69 
71 
79 std::string
80 stringprintf(const std::string& str, ...) {
81  unsigned int size = 256;
82  va_list args;
83  char* buf = (char*) malloc(size * sizeof(char));
84  va_start(args, str);
85  while (size <= (unsigned int) vsnprintf(buf, size, str.c_str(), args)) {
86  size *= 2;
87  buf = (char*) realloc(buf, size * sizeof(char));
88  }
89  va_end(args);
90  std::string result(buf);
91  free(buf);
92  return result;
93 }
94 
95 std::vector<float>
96 read_free_energies(std::string filename) {
97  return read_single_column<float>(filename);
98 }
99 
100 std::pair<Neighborhood, Neighborhood>
101 read_neighborhood(const std::string filename) {
102  Neighborhood nh;
103  Neighborhood nh_high_dens;
104  std::ifstream ifs(filename);
105  if (ifs.fail()) {
106  std::cerr << "error: cannot open file '" << filename << "' for reading." << std::endl;
107  exit(EXIT_FAILURE);
108  } else {
109  std::size_t i=0;
110  while (!ifs.eof() && !ifs.bad()) {
111  std::size_t buf1;
112  float buf2;
113  std::size_t buf3;
114  float buf4;
115  ifs >> buf1;
116  ifs >> buf2;
117  ifs >> buf3;
118  ifs >> buf4;
119  if ( ! ifs.fail()) {
120  nh[i] = std::pair<std::size_t, float>(buf1, buf2);
121  nh_high_dens[i] = std::pair<std::size_t, float>(buf3, buf4);
122  ++i;
123  } else { // if conversion error, skip (comment) line
124  ifs.clear();
125  ifs.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
126  }
127  }
128  }
129  return {nh, nh_high_dens};
130 }
131 
132 std::vector<std::size_t>
133 read_concat_limits(std::string filename) {
134  std::vector<std::size_t> concat_limits = read_single_column<std::size_t>(filename);
135 
136  for (std::size_t i=0; i+1 < concat_limits.size(); ++i){
137  concat_limits[i+1] += concat_limits[i];
138  }
139 
140  return concat_limits;
141 }
142 
143 void
144 write_neighborhood(const std::string filename,
145  const Neighborhood& nh,
146  const Neighborhood& nh_high_dens,
147  std::string header_comment,
148  std::map<std::string,float> stringMap) {
149  std::ofstream ofs(filename);
150  if (ofs.fail()) {
151  std::cerr << "error: cannot open file '" << filename << "' for writing." << std::endl;
152  exit(EXIT_FAILURE);
153  }
154  append_commentsMap(header_comment, stringMap);
155  header_comment.append("#\n# column definitions:\n"
156  "# nn = nearest neighbor\n"
157  "# nn_hd = nearest neighbor with higher density\n"
158  "# id(i) = id/line number of i\n"
159  "# dsqr(i) = squared euclidean distance to i\n#\n"
160  "# id(nn) dsqr(nn) id(nn_hd) dsqr(nn_hd)\n");
161  ofs << header_comment;
162  auto p = nh.begin();
163  auto p_hd = nh_high_dens.begin();
164  while (p != nh.end() && p_hd != nh_high_dens.end()) {
165  // first: key (not used)
166  // second: neighbor
167  // second.first: id; second.second: squared dist
168  ofs << p->second.first << " " << p->second.second << " "
169  << p_hd->second.first << " " << p_hd->second.second << "\n";
170  ++p;
171  ++p_hd;
172  }
173 }
174 
175 std::map<std::size_t, std::size_t>
176 microstate_populations(std::vector<std::size_t> traj) {
177  std::map<std::size_t, std::size_t> populations;
178  for (std::size_t state: traj) {
179  if (populations.count(state) == 0) {
180  populations[state] = 1;
181  } else {
182  ++populations[state];
183  }
184  }
185  return populations;
186 }
187 
188 void
189 check_concat_limits(std::vector<std::size_t> concat_limits, std::size_t n_frames) {
190  if (concat_limits.back() < n_frames) {
191  Clustering::logger(std::cout) << "warning: last " << n_frames - concat_limits.back()
192  << " frames are ignored. check concat-limits/nframes"
193  << std::endl;
194  }
195  if (concat_limits.front() == 0) {
196  Clustering::logger(std::cout) << "warning: first trajectory is of zero length. check\n"
197  << " help for correct usage of --concat-limits"
198  << std::endl;
199  }
200  if (concat_limits.back() > n_frames) {
201  Clustering::logger(std::cout) << "warning: limits are larger than the file length.\n"
202  << " Check your limits!" <<std::endl;
203  }
204 }
205 
206 float
207 read_next_float(std::ifstream &ifs) {
208  std::string s;
209  if (ifs.peek() != '\n') {
210  ifs >> std::ws;
211  while (!std::isdigit(ifs.peek()) && ifs.peek() != '\n') {
212  ifs >> s;
213  if (ifs.peek() != '\n') {
214  ifs >> std::ws;
215  }
216  }
217  }
218  float val;
219  // catch if line end was case
220  if (ifs.peek() == '\n') {
221  val = -1.;
222  } else {
223  ifs >> val;
224  }
225  return val;
226 }
227 
228 void
229 read_comments(std::string filename, std::map<std::string,float> &stringMap) {
230  std::vector<std::size_t> dat;
231  std::ifstream ifs(filename);
232  if (ifs.fail()) {
233  std::cerr << "error: cannot open file '" << filename << "'" << std::endl;
234  exit(EXIT_FAILURE);
235  } else {
236  while (!ifs.eof() && !ifs.bad()) {
237  std::size_t buf;
238  ifs >> buf;
239  if (ifs.fail()) {
240  ifs.clear();
241  std::string s;
242  ifs >> s;
243  if (s == "#@" && ifs.peek() != '\n') {
244  ifs >> s;
245  for (auto & x : stringMap)
246  {
247  if (s == x.first) {
248  float val = read_next_float(ifs);
249  if ((x.second != 0) && (std::abs(x.second-val) > 0.001)) {
250  Clustering::logger(std::cout) << "warning: the values of " << x.first
251  << " are not in agreement\n"
252  << " " << val << " vs. "
253  << x.second<< std::endl;
254  }
255  stringMap[x.first] = val;
256  }
257  }
258  }
259  ifs.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
260  }
261  }
262  }
263  return;
264 }
265 
266 void
267 append_commentsMap(std::string &header_comment, std::map<std::string,float> &stringMap) {
268  header_comment.append("#\n# The following comments are reused for identifying\n# user-based mistakes and should not be modified.\n");
269  for (auto &x : stringMap)
270  {
271  if (x.second != 0.) {
272  header_comment.append(stringprintf("#@ %s = %.5f\n", x.first.c_str(), x.second));
273  }
274  }
275  return;
276 }
277 
278 
279 } // end namespace Tools
280 } // end namespace Clustering
281 
void write_pops(std::string filename, std::vector< std::size_t > pops, std::string header_comment, std::map< std::string, float > stringMap)
write populations as column into given file
Definition: tools.cpp:50
std::vector< std::size_t > read_concat_limits(std::string filename)
Definition: tools.cpp:133
general namespace for clustering package
Definition: coring.cpp:38
Tools mainly for IO and some other functions.
std::map< std::size_t, std::size_t > microstate_populations(std::vector< std::size_t > traj)
compute microstate populations from clustered trajectory
Definition: tools.cpp:176
float read_next_float(std::ifstream &ifs)
read the next float of ifstream
Definition: tools.cpp:207
std::vector< float > read_free_energies(std::string filename)
read free energies from plain text file
Definition: tools.cpp:96
Define global logger.
void check_concat_limits(std::vector< std::size_t > concat_limits, std::size_t n_frames)
check if concat limits were passed correctly
Definition: tools.cpp:189
unsigned int min_multiplicator(unsigned int orig, unsigned int mult)
return minimum multiplicator to fulfill result * mult >= orig
Definition: tools.cpp:36
void write_neighborhood(const std::string filename, const Neighborhood &nh, const Neighborhood &nh_high_dens, std::string header_comment, std::map< std::string, float > stringMap)
Definition: tools.cpp:144
std::ostream & logger(std::ostream &s)
Definition: logger.cpp:32
std::map< std::size_t, Clustering::Tools::Neighbor > Neighborhood
map frame id to neighbors
Definition: tools.hpp:66
void read_comments(std::string filename, std::map< std::string, float > &stringMap)
read comments of stringMap from file. Comments should start with #@
Definition: tools.cpp:229
std::pair< Neighborhood, Neighborhood > read_neighborhood(const std::string filename)
Definition: tools.cpp:101
void append_commentsMap(std::string &header_comment, std::map< std::string, float > &stringMap)
append commentsMap to header comment
Definition: tools.cpp:267
void write_fes(std::string filename, std::vector< float > fes, std::string header_comment, std::map< std::string, float > stringMap)
write free energies as column into given file
Definition: tools.cpp:42
void write_clustered_trajectory(std::string filename, std::vector< std::size_t > traj, std::string header_comment, std::map< std::string, float > stringMap)
write state trajectory into plain text file
Definition: tools.cpp:63
std::vector< std::size_t > read_clustered_trajectory(std::string filename)
read states from trajectory (given as plain text file)
Definition: tools.cpp:58
std::string stringprintf(const std::string &str,...)
printf-version for std::string
Definition: tools.cpp:80