clustering.cpp
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015-2019, Florian Sittel (www.lettis.net) and Daniel Nagel
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice,
9  this list of conditions and the following disclaimer.
10 
11 2. Redistributions in binary form must reproduce the above copyright notice,
12  this list of conditions and the following disclaimer in the documentation
13  and/or other materials provided with the distribution.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
18 SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
22 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
23 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 
34 #include "config.hpp"
35 // sub-modules
36 #include "density_clustering.hpp"
37 #ifdef USE_CUDA
38  #include "density_clustering_cuda.hpp"
39 #endif
40 #ifdef DC_USE_MPI
41  #include "density_clustering_mpi.hpp"
42 #endif
43 #include "mpp.hpp"
44 #include "network_builder.hpp"
45 #include "state_filter.hpp"
46 #include "coring.hpp"
47 #include "noise.hpp"
48 // toolset
49 #include "logger.hpp"
50 #include "tools.hpp"
51 
52 #include <omp.h>
53 #include <time.h>
54 #include <boost/program_options.hpp>
66 int main(int argc, char* argv[]) {
67  namespace b_po = boost::program_options;
68  std::string general_help =
69  "\nclustering 1.0: a classification framework for MD data\n"
70  "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel\n"
71  "\n"
72  "modes:\n"
73  " density: run density clustering\n"
74  " network: build network from density clustering results\n"
75  " mpp: run MPP (Most Probable Path) clustering\n"
76  " (based on density-results)\n"
77  " coring: boundary corrections for clustering results.\n"
78  " noise: defining and dynamically reassigning noise.\n"
79  " filter: filter phase space (e.g. dihedrals) for given state\n"
80  "\n"
81  "usage:\n"
82  " clustering MODE --option1 --option2 ...\n"
83  "\n"
84  "for a list of available options per mode, run with '-h' option, e.g.\n"
85  " clustering density -h\n"
86  ;
87 
88  enum {DENSITY, MPP, NETWORK, FILTER, CORING, NOISE} mode;
89 
90 #ifdef USE_CUDA
91  // check for CUDA-enabled GPUs (will fail if none found)
92  Clustering::Density::CUDA::get_num_gpus();
93 #endif
94 
95  if (argc <= 2) {
96  std::cerr << general_help;
97  return EXIT_FAILURE;
98 
99  } else {
100  std::string str_mode(argv[1]);
101  if (str_mode.compare("density") == 0) {
102  mode = DENSITY;
103  } else if (str_mode.compare("mpp") == 0) {
104  mode = MPP;
105  } else if (str_mode.compare("network") == 0) {
106  mode = NETWORK;
107  } else if (str_mode.compare("filter") == 0) {
108  mode = FILTER;
109  } else if (str_mode.compare("coring") == 0) {
110  mode = CORING;
111  } else if (str_mode.compare("noise") == 0) {
112  mode = NOISE;
113  } else {
114  std::cerr << "\nerror: unrecognized mode '" << str_mode << "'\n\n";
115  std::cerr << general_help;
116  return EXIT_FAILURE;
117  }
118  }
119  b_po::variables_map args;
120  b_po::positional_options_description pos_opts;
121  // density options
122  b_po::options_description desc_dens (std::string(argv[1]).append(
123  "\n\nclustering 1.0: a classification framework for MD data\n"
124  "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel"
125  "\n\n"
126  "perform clustering of MD data based on phase space densities.\n"
127  "densities are approximated by counting neighboring frames inside\n"
128  "a n-dimensional hypersphere of specified radius.\n"
129  "distances are measured with n-dim P2-norm.\n"
130  "\n"
131  "options"));
132  desc_dens.add_options()
133  ("help,h", b_po::bool_switch()->default_value(false), "show this help.")
134  ("file,f", b_po::value<std::string>()->required(), "input (required): phase space coordinates (space separated ASCII).")
135  // optional
136  ("radius,r", b_po::value<float>(), "parameter: hypersphere radius. If not used, the lumping radius will be used instead.")
137  ("threshold-screening,T", b_po::value<std::vector<float>>()->multitoken(),
138  "parameters: screening of free energy landscape. format: FROM STEP TO; e.g.: '-T 0.1 0.1 11.1'.\n"
139  "set -T -1 for default values: FROM=0.1, STEP=0.1, TO=MAX_FE.\n"
140  "parameters may be given partially, e.g.: -T 0.2 0.4 to start at 0.2 and go to MAX_FE at steps 0.4.\n"
141  "for threshold-screening, --output denotes the basename only. output files will have the"
142  " current threshold limit appended to the given filename.")
143  ("output,o", b_po::value<std::string>(), "output (optional): clustering information.")
144  ("input,i", b_po::value<std::string>(), "input (optional): initial state definition.")
145  ("radii,R", b_po::value<std::vector<float>>()->multitoken(), "parameter: list of radii for population/free energy calculations "
146  "(i.e. compute populations/free energies for several radii in one go).")
147  ("population,p", b_po::value<std::string>(), "output (optional): population per frame (if -R is set: this defines only the basename).")
148  ("free-energy,d", b_po::value<std::string>(), "output (optional): free energies per frame (if -R is set: this defines only the basename).")
149  ("free-energy-input,D", b_po::value<std::string>(), "input (optional): reuse free energy info.")
150  ("nearest-neighbors,b", b_po::value<std::string>(), "output (optional): nearest neighbor info.")
151  ("nearest-neighbors-input,B", b_po::value<std::string>(), "input (optional): reuse nearest neighbor info.")
152  // defaults
153  ("nthreads,n", b_po::value<int>()->default_value(0),
154  "number of OpenMP threads. default: 0; i.e. use OMP_NUM_THREADS env-variable.")
155  ("verbose,v", b_po::bool_switch()->default_value(false), "verbose mode: print runtime information to STDOUT.")
156  ;
157  // MPP options
158  b_po::options_description desc_mpp (std::string(argv[1]).append(
159  "\n\nclustering 1.0: a classification framework for MD data\n"
160  "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel"
161  "\n\n"
162  "performs a most probable path (MPP) clustering based on the given lag time."
163  "\n"
164  "options"));
165  desc_mpp.add_options()
166  ("help,h", b_po::bool_switch()->default_value(false), "show this help.")
167  ("states,s", b_po::value<std::string>()->required(),
168  "(required): file with state information (i.e. clustered trajectory")
169  ("free-energy-input,D", b_po::value<std::string>()->required(), "input (required): reuse free energy info.")
170  ("lagtime,l", b_po::value<int>()->required(), "input (required): lagtime in units of frame numbers. Note: Lagtime should be greater than the coring time/ smallest timescale. ")
171  ("qmin-from", b_po::value<float>()->default_value(0.01, "0.01"), "initial Qmin value (default: 0.01).")
172  ("qmin-to", b_po::value<float>()->default_value(1.0, "1.00"), "final Qmin value (default: 1.00).")
173  ("qmin-step", b_po::value<float>()->default_value(0.01, "0.01"), "Qmin stepping (default: 0.01).")
174  ("concat-nframes", b_po::value<std::size_t>(),
175  "input (parameter): no. of frames per (equally sized) sub-trajectory for concatenated trajectory files.")
176  ("concat-limits", b_po::value<std::string>(),
177  "input (file): file with sizes of individual (not equally sized)"
178  " sub-trajectories for concatenated trajectory files. e.g.: for a"
179  " concatenated trajectory of three chunks of sizes 100, 50 and 300 frames: '100 50 300'")
180  ("tprob", b_po::value<std::string>(),
181  "input (file): initial transition probability matrix. -l still needs to be given, but will be ignored.\n"
182  "Format:three space-separated columns 'state_from' 'state_to' 'probability'")
183  // defaults
184  ("output,o", b_po::value<std::string>()->default_value("mpp"), " output (optional): basename for output files (default: 'mpp').")
185  ("nthreads,n", b_po::value<int>()->default_value(0),
186  "number of OpenMP threads. default: 0; i.e. use OMP_NUM_THREADS env-variable.")
187  ("verbose,v", b_po::bool_switch()->default_value(false), "verbose mode: print runtime information to STDOUT.")
188  ;
189  // network options
190  b_po::options_description desc_network (std::string(argv[1]).append(
191  "\n\nclustering 1.0: a classification framework for MD data\n"
192  "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel"
193  "\n\n"
194  "create a network from screening data."
195  "\n"
196  "options"));
197  desc_network.add_options()
198  ("help,h", b_po::bool_switch()->default_value(false), "show this help.")
199  ("minpop,p", b_po::value<std::size_t>()->required(),
200  "(required): minimum population of node to be considered for network.")
201  // optional
202  ("basename,b", b_po::value<std::string>()->default_value("clust"),
203  "(optional): basename of input files (default: clust).")
204  ("output,o", b_po::value<std::string>()->default_value("network"),
205  "(optional): basename of output files (default: network).")
206  ("min", b_po::value<float>()->default_value(0.1f, "0.10"), "(optional): minimum free energy (default: 0.10).")
207  ("max", b_po::value<float>()->default_value(0.0f, "0"), "(optional): maximum free energy (default: 0; i.e. max. available).")
208  ("step", b_po::value<float>()->default_value(0.1f, "0.10"), "(optional): free energy stepping (default: 0.10).")
209  ("network-html", b_po::bool_switch()->default_value(false), "Generate html visualization of fe tree.")
210  // defaults
211  ("verbose,v", b_po::bool_switch()->default_value(false), "verbose mode: print runtime information to STDOUT.")
212  ;
213  // filter options
214  b_po::options_description desc_filter (std::string(argv[1]).append(
215  "\n\nclustering 1.0: a classification framework for MD data\n"
216  "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel"
217  "\n\n"
218  "filter phase space (e.g. dihedral angles, cartesian coords, etc.) for given state."
219  "\n"
220  "options"));
221  desc_filter.add_options()
222  ("help,h", b_po::bool_switch()->default_value(false),
223  "show this help.")
224  ("states,s", b_po::value<std::string>()->required(),
225  "(required): file with state information (i.e. clustered trajectory).")
226  ("coords,c", b_po::value<std::string>(),
227  "file with coordinates (either plain ASCII or GROMACS' xtc).")
228  ("output,o", b_po::value<std::string>(),
229  "filtered data.")
230  ("state,S", b_po::value<std::size_t>(),
231  "state id of selected state.")
232 
233  ("list", b_po::bool_switch()->default_value(false),
234  "list states and their populations")
235  ;
236  // coring options
237  b_po::options_description desc_coring (std::string(argv[1]).append(
238  "\n\nclustering 1.0: a classification framework for MD data\n"
239  "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel"
240  "\n\n"
241  "compute boundary corrections for clustering results."
242  "\n"
243  "options"));
244  desc_coring.add_options()
245  ("help,h", b_po::bool_switch()->default_value(false),
246  "show this help.")
247  ("states,s", b_po::value<std::string>()->required(),
248  "(required): file with state information (i.e. clustered trajectory)")
249  ("windows,w", b_po::value<std::string>()->required(),
250  "(required): file with window sizes."
251  "format is space-separated lines of\n\n"
252  "STATE_ID WINDOW_SIZE\n\n"
253  "use * as STATE_ID to match all (other) states.\n"
254  "e.g.:\n\n"
255  "* 20\n"
256  "3 40\n"
257  "4 60\n\n"
258  "matches 40 frames to state 3, 60 frames to state 4 and 20 frames to all the other states.")
259  // optional
260  ("output,o", b_po::value<std::string>(),
261  "(optional): cored trajectory")
262  ("distribution,d", b_po::value<std::string>(),
263  "(optional): write waiting time distributions to file.")
264  ("cores", b_po::value<std::string>(),
265  "(optional): write core information to file, i.e. trajectory with state name if in core region or -1 if not in core region")
266  ("concat-nframes", b_po::value<std::size_t>(),
267  "input (optional parameter): no. of frames per (equally sized) sub-trajectory for concatenated trajectory files.")
268  ("concat-limits", b_po::value<std::string>(),
269  "input (file): file with sizes of individual (not equally sized)"
270  " sub-trajectories for concatenated trajectory files. e.g.: for a"
271  " concatenated trajectory of three chunks of sizes 100, 50 and 300 frames: '100 50 300'")
272  // defaults
273  ("verbose,v", b_po::bool_switch()->default_value(false),
274  "verbose mode: print runtime information to STDOUT.")
275  ;
276  // noise options
277  b_po::options_description desc_noise (std::string(argv[1]).append(
278  "\n\nclustering 1.0: a classification framework for MD data\n"
279  "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel"
280  "\n\n"
281  "defining and dynamically reassigning noise for clustering results."
282  "\n"
283  "options"));
284  desc_noise.add_options()
285  ("help,h", b_po::bool_switch()->default_value(false),
286  "show this help.")
287  ("states,s", b_po::value<std::string>()->required(),
288  "(required): file with state information (i.e. clustered trajectory)")
289  ("output,o", b_po::value<std::string>()->required(),
290  "(required): noise-reassigned trajectory")
291  // optional
292  ("basename,b", b_po::value<std::string>()->default_value("clust"),
293  "(optional): basename of input files (default: clust) used to determine isolated clusters")
294  ("cmin,c", b_po::value<float>()->default_value(0.1f, "0.10"), "(optional): population (in percent) threshold below which an isolated cluster is assigned as noise.(default: 0.1).")
295  ("cores", b_po::value<std::string>(),
296  "(optional): write core information to file, i.e. trajectory with state name if in core region or -1 if not in core region")
297  ("concat-nframes", b_po::value<std::size_t>(),
298  "input (optional parameter): no. of frames per (equally sized) sub-trajectory for concatenated trajectory files.")
299  ("concat-limits", b_po::value<std::string>(),
300  "input (file): file with sizes of individual (not equally sized)"
301  " sub-trajectories for concatenated trajectory files. e.g.: for a"
302  " concatenated trajectory of three chunks of sizes 100, 50 and 300 frames: '100 50 300'")
303  // defaults
304  ("verbose,v", b_po::bool_switch()->default_value(false),
305  "verbose mode: print runtime information to STDOUT.")
306  ;
307  // parse cmd arguments
308  b_po::options_description desc;
309  switch(mode){
310  case DENSITY:
311  desc.add(desc_dens);
312  break;
313  case MPP:
314  desc.add(desc_mpp);
315  break;
316  case NETWORK:
317  desc.add(desc_network);
318  break;
319  case FILTER:
320  desc.add(desc_filter);
321  break;
322  case CORING:
323  desc.add(desc_coring);
324  break;
325  case NOISE:
326  desc.add(desc_noise);
327  break;
328  default:
329  std::cerr << "error: unknown mode. this should never happen." << std::endl;
330  return EXIT_FAILURE;
331  }
332  try {
333  b_po::store(b_po::command_line_parser(argc, argv).options(desc).run(), args);
334  b_po::notify(args);
335  } catch (b_po::error& e) {
336 // TODO: this is just a temporal solution
337 // if ( ! args["help"].as<bool>()) {
338  std::cerr << "\nerror parsing arguments:\n\n" << e.what() << "\n\n" << std::endl;
339 // }
340  std::cerr << desc << std::endl;
341  return EXIT_FAILURE;
342  }
343  if (args["help"].as<bool>()) {
344  std::cout << desc << std::endl;
345  return EXIT_SUCCESS;
346  }
347  // setup defaults
348  if (args.count("verbose")) {
349  Clustering::verbose = args["verbose"].as<bool>();
350  }
351  // print head
352  std::string leading_whitespace(20, ' ');
353  std::string leading_whitespace2nd(20 + (20-strlen(argv[1]))/2, ' ');
354  Clustering::logger(std::cout) << "\n" << leading_whitespace
355  << "~~~ clustering v1.0 ~~~\n"
356  << leading_whitespace2nd << "~ " << argv[1] << " ~\n\n"
357  << "~~~ using for parallization: ";
358 #ifdef USE_CUDA
359  Clustering::logger(std::cout) << "CUDA" << std::endl;
360 #else
361  Clustering::logger(std::cout) << "cpu" << std::endl;
362 #endif
363  // setup OpenMP
364  int n_threads = 0;
365  if (args.count("nthreads")) {
366  n_threads = args["nthreads"].as<int>();
367  }
368  if (n_threads > 0) {
369  omp_set_num_threads(n_threads);
370  }
371  // generate header comment
372  std::ostringstream header;
373  time_t rawtime;
374  time(&rawtime);
375  struct tm * timeinfo = localtime(&rawtime);
376  header << "# clustering v1.0 - " << argv[1] << "\n"
377  << "#\n"
378  << "# Created " << asctime(timeinfo)
379  << "# by following command:\n#\n# ";
380  std::vector<std::string> arguments(argv, argv + argc);
381  for (std::string& arg_string : arguments){
382  header << arg_string << " ";
383  }
384  header << "\n#\n# Copyright (c) 2015-2019 Florian Sittel and Daniel Nagel\n"
385  << "# please cite the corresponding paper, "
386  << "see https://github.com/moldyn/clustering\n";
387  args.insert(std::make_pair("header", b_po::variable_value(header.str(), false)));
388  // add parameters which should be read from comments
389  std::map<std::string,float> commentsMap = {{"clustering_radius", 0.},
390  {"lumping_radius", 0.},
391  {"screening_from", 0.},
392  {"screening_to", 0.},
393  {"screening_step", 0.},
394  {"minimal_population", 0.},
395  {"cmin", 0.},
396  {"single_coring_time", 0.}};
397  args.insert(std::make_pair("commentsMap", b_po::variable_value(commentsMap, false)));
398  // run selected subroutine
399  switch(mode) {
400  case DENSITY:
401  #ifdef DC_USE_MPI
403  #else
405  #endif
406  break;
407  case MPP:
408  Clustering::MPP::main(args);
409  break;
410  case NETWORK:
412  break;
413  case FILTER:
415  break;
416  case CORING:
418  break;
419  case NOISE:
421  break;
422  default:
423  std::cerr << "error: unknown mode. this should never happen." << std::endl;
424  return EXIT_FAILURE;
425  }
426  return EXIT_SUCCESS;
427 }
428 
Dynamical Coring.
Most Probable Path Clustering.
bool verbose
global flag: use verbose output?
Definition: logger.cpp:29
Tools mainly for IO and some other functions.
int main(int argc, char *argv[])
Parses option and execute corresponding sub-module.
Definition: clustering.cpp:66
void main(boost::program_options::variables_map args)
user interface and controlling function for density-based geometric clustering.
Define global logger.
std::ostream & logger(std::ostream &s)
Definition: logger.cpp:32
void main(boost::program_options::variables_map args)
controlling function and user interface for boundary corrections
Definition: coring.cpp:61
void main(boost::program_options::variables_map args)
controlling function and user interface for noise assignment
Definition: noise.cpp:42
void main(boost::program_options::variables_map args)
void main(boost::program_options::variables_map args)
Density-based clustering.
Network Builder.
Noise Assignment.
void main(boost::program_options::variables_map args)
void main(boost::program_options::variables_map args)
MPP clustering control function and user interface.
Definition: mpp.cpp:487