38 #include "density_clustering_cuda.hpp" 41 #include "density_clustering_mpi.hpp" 45 #include "state_filter.hpp" 54 #include <boost/program_options.hpp> 66 int main(
int argc,
char* argv[]) {
67 namespace b_po = boost::program_options;
68 std::string general_help =
69 "\nclustering 1.0: a classification framework for MD data\n" 70 "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel\n" 73 " density: run density clustering\n" 74 " network: build network from density clustering results\n" 75 " mpp: run MPP (Most Probable Path) clustering\n" 76 " (based on density-results)\n" 77 " coring: boundary corrections for clustering results.\n" 78 " noise: defining and dynamically reassigning noise.\n" 79 " filter: filter phase space (e.g. dihedrals) for given state\n" 82 " clustering MODE --option1 --option2 ...\n" 84 "for a list of available options per mode, run with '-h' option, e.g.\n" 85 " clustering density -h\n" 88 enum {DENSITY, MPP, NETWORK, FILTER, CORING, NOISE} mode;
92 Clustering::Density::CUDA::get_num_gpus();
96 std::cerr << general_help;
100 std::string str_mode(argv[1]);
101 if (str_mode.compare(
"density") == 0) {
103 }
else if (str_mode.compare(
"mpp") == 0) {
105 }
else if (str_mode.compare(
"network") == 0) {
107 }
else if (str_mode.compare(
"filter") == 0) {
109 }
else if (str_mode.compare(
"coring") == 0) {
111 }
else if (str_mode.compare(
"noise") == 0) {
114 std::cerr <<
"\nerror: unrecognized mode '" << str_mode <<
"'\n\n";
115 std::cerr << general_help;
119 b_po::variables_map args;
120 b_po::positional_options_description pos_opts;
122 b_po::options_description desc_dens (std::string(argv[1]).append(
123 "\n\nclustering 1.0: a classification framework for MD data\n" 124 "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel" 126 "perform clustering of MD data based on phase space densities.\n" 127 "densities are approximated by counting neighboring frames inside\n" 128 "a n-dimensional hypersphere of specified radius.\n" 129 "distances are measured with n-dim P2-norm.\n" 132 desc_dens.add_options()
133 (
"help,h", b_po::bool_switch()->default_value(
false),
"show this help.")
134 (
"file,f", b_po::value<std::string>()->required(),
"input (required): phase space coordinates (space separated ASCII).")
136 (
"radius,r", b_po::value<float>(),
"parameter: hypersphere radius. If not used, the lumping radius will be used instead.")
137 (
"threshold-screening,T", b_po::value<std::vector<float>>()->multitoken(),
138 "parameters: screening of free energy landscape. format: FROM STEP TO; e.g.: '-T 0.1 0.1 11.1'.\n" 139 "set -T -1 for default values: FROM=0.1, STEP=0.1, TO=MAX_FE.\n" 140 "parameters may be given partially, e.g.: -T 0.2 0.4 to start at 0.2 and go to MAX_FE at steps 0.4.\n" 141 "for threshold-screening, --output denotes the basename only. output files will have the" 142 " current threshold limit appended to the given filename.")
143 (
"output,o", b_po::value<std::string>(),
"output (optional): clustering information.")
144 (
"input,i", b_po::value<std::string>(),
"input (optional): initial state definition.")
145 (
"radii,R", b_po::value<std::vector<float>>()->multitoken(),
"parameter: list of radii for population/free energy calculations " 146 "(i.e. compute populations/free energies for several radii in one go).")
147 (
"population,p", b_po::value<std::string>(),
"output (optional): population per frame (if -R is set: this defines only the basename).")
148 (
"free-energy,d", b_po::value<std::string>(),
"output (optional): free energies per frame (if -R is set: this defines only the basename).")
149 (
"free-energy-input,D", b_po::value<std::string>(),
"input (optional): reuse free energy info.")
150 (
"nearest-neighbors,b", b_po::value<std::string>(),
"output (optional): nearest neighbor info.")
151 (
"nearest-neighbors-input,B", b_po::value<std::string>(),
"input (optional): reuse nearest neighbor info.")
153 (
"nthreads,n", b_po::value<int>()->default_value(0),
154 "number of OpenMP threads. default: 0; i.e. use OMP_NUM_THREADS env-variable.")
155 (
"verbose,v", b_po::bool_switch()->default_value(
false),
"verbose mode: print runtime information to STDOUT.")
158 b_po::options_description desc_mpp (std::string(argv[1]).append(
159 "\n\nclustering 1.0: a classification framework for MD data\n" 160 "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel" 162 "performs a most probable path (MPP) clustering based on the given lag time." 165 desc_mpp.add_options()
166 (
"help,h", b_po::bool_switch()->default_value(
false),
"show this help.")
167 (
"states,s", b_po::value<std::string>()->required(),
168 "(required): file with state information (i.e. clustered trajectory")
169 (
"free-energy-input,D", b_po::value<std::string>()->required(),
"input (required): reuse free energy info.")
170 (
"lagtime,l", b_po::value<int>()->required(),
"input (required): lagtime in units of frame numbers. Note: Lagtime should be greater than the coring time/ smallest timescale. ")
171 (
"qmin-from", b_po::value<float>()->default_value(0.01,
"0.01"),
"initial Qmin value (default: 0.01).")
172 (
"qmin-to", b_po::value<float>()->default_value(1.0,
"1.00"),
"final Qmin value (default: 1.00).")
173 (
"qmin-step", b_po::value<float>()->default_value(0.01,
"0.01"),
"Qmin stepping (default: 0.01).")
174 (
"concat-nframes", b_po::value<std::size_t>(),
175 "input (parameter): no. of frames per (equally sized) sub-trajectory for concatenated trajectory files.")
176 (
"concat-limits", b_po::value<std::string>(),
177 "input (file): file with sizes of individual (not equally sized)" 178 " sub-trajectories for concatenated trajectory files. e.g.: for a" 179 " concatenated trajectory of three chunks of sizes 100, 50 and 300 frames: '100 50 300'")
180 (
"tprob", b_po::value<std::string>(),
181 "input (file): initial transition probability matrix. -l still needs to be given, but will be ignored.\n" 182 "Format:three space-separated columns 'state_from' 'state_to' 'probability'")
184 (
"output,o", b_po::value<std::string>()->default_value(
"mpp"),
" output (optional): basename for output files (default: 'mpp').")
185 (
"nthreads,n", b_po::value<int>()->default_value(0),
186 "number of OpenMP threads. default: 0; i.e. use OMP_NUM_THREADS env-variable.")
187 (
"verbose,v", b_po::bool_switch()->default_value(
false),
"verbose mode: print runtime information to STDOUT.")
190 b_po::options_description desc_network (std::string(argv[1]).append(
191 "\n\nclustering 1.0: a classification framework for MD data\n" 192 "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel" 194 "create a network from screening data." 197 desc_network.add_options()
198 (
"help,h", b_po::bool_switch()->default_value(
false),
"show this help.")
199 (
"minpop,p", b_po::value<std::size_t>()->required(),
200 "(required): minimum population of node to be considered for network.")
202 (
"basename,b", b_po::value<std::string>()->default_value(
"clust"),
203 "(optional): basename of input files (default: clust).")
204 (
"output,o", b_po::value<std::string>()->default_value(
"network"),
205 "(optional): basename of output files (default: network).")
206 (
"min", b_po::value<float>()->default_value(0.1f,
"0.10"),
"(optional): minimum free energy (default: 0.10).")
207 (
"max", b_po::value<float>()->default_value(0.0f,
"0"),
"(optional): maximum free energy (default: 0; i.e. max. available).")
208 (
"step", b_po::value<float>()->default_value(0.1f,
"0.10"),
"(optional): free energy stepping (default: 0.10).")
209 (
"network-html", b_po::bool_switch()->default_value(
false),
"Generate html visualization of fe tree.")
211 (
"verbose,v", b_po::bool_switch()->default_value(
false),
"verbose mode: print runtime information to STDOUT.")
214 b_po::options_description desc_filter (std::string(argv[1]).append(
215 "\n\nclustering 1.0: a classification framework for MD data\n" 216 "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel" 218 "filter phase space (e.g. dihedral angles, cartesian coords, etc.) for given state." 221 desc_filter.add_options()
222 (
"help,h", b_po::bool_switch()->default_value(
false),
224 (
"states,s", b_po::value<std::string>()->required(),
225 "(required): file with state information (i.e. clustered trajectory).")
226 (
"coords,c", b_po::value<std::string>(),
227 "file with coordinates (either plain ASCII or GROMACS' xtc).")
228 (
"output,o", b_po::value<std::string>(),
230 (
"state,S", b_po::value<std::size_t>(),
231 "state id of selected state.")
233 (
"list", b_po::bool_switch()->default_value(
false),
234 "list states and their populations")
237 b_po::options_description desc_coring (std::string(argv[1]).append(
238 "\n\nclustering 1.0: a classification framework for MD data\n" 239 "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel" 241 "compute boundary corrections for clustering results." 244 desc_coring.add_options()
245 (
"help,h", b_po::bool_switch()->default_value(
false),
247 (
"states,s", b_po::value<std::string>()->required(),
248 "(required): file with state information (i.e. clustered trajectory)")
249 (
"windows,w", b_po::value<std::string>()->required(),
250 "(required): file with window sizes." 251 "format is space-separated lines of\n\n" 252 "STATE_ID WINDOW_SIZE\n\n" 253 "use * as STATE_ID to match all (other) states.\n" 258 "matches 40 frames to state 3, 60 frames to state 4 and 20 frames to all the other states.")
260 (
"output,o", b_po::value<std::string>(),
261 "(optional): cored trajectory")
262 (
"distribution,d", b_po::value<std::string>(),
263 "(optional): write waiting time distributions to file.")
264 (
"cores", b_po::value<std::string>(),
265 "(optional): write core information to file, i.e. trajectory with state name if in core region or -1 if not in core region")
266 (
"concat-nframes", b_po::value<std::size_t>(),
267 "input (optional parameter): no. of frames per (equally sized) sub-trajectory for concatenated trajectory files.")
268 (
"concat-limits", b_po::value<std::string>(),
269 "input (file): file with sizes of individual (not equally sized)" 270 " sub-trajectories for concatenated trajectory files. e.g.: for a" 271 " concatenated trajectory of three chunks of sizes 100, 50 and 300 frames: '100 50 300'")
273 (
"verbose,v", b_po::bool_switch()->default_value(
false),
274 "verbose mode: print runtime information to STDOUT.")
277 b_po::options_description desc_noise (std::string(argv[1]).append(
278 "\n\nclustering 1.0: a classification framework for MD data\n" 279 "Copyright (c) 2015-2019, Florian Sittel and Daniel Nagel" 281 "defining and dynamically reassigning noise for clustering results." 284 desc_noise.add_options()
285 (
"help,h", b_po::bool_switch()->default_value(
false),
287 (
"states,s", b_po::value<std::string>()->required(),
288 "(required): file with state information (i.e. clustered trajectory)")
289 (
"output,o", b_po::value<std::string>()->required(),
290 "(required): noise-reassigned trajectory")
292 (
"basename,b", b_po::value<std::string>()->default_value(
"clust"),
293 "(optional): basename of input files (default: clust) used to determine isolated clusters")
294 (
"cmin,c", b_po::value<float>()->default_value(0.1f,
"0.10"),
"(optional): population (in percent) threshold below which an isolated cluster is assigned as noise.(default: 0.1).")
295 (
"cores", b_po::value<std::string>(),
296 "(optional): write core information to file, i.e. trajectory with state name if in core region or -1 if not in core region")
297 (
"concat-nframes", b_po::value<std::size_t>(),
298 "input (optional parameter): no. of frames per (equally sized) sub-trajectory for concatenated trajectory files.")
299 (
"concat-limits", b_po::value<std::string>(),
300 "input (file): file with sizes of individual (not equally sized)" 301 " sub-trajectories for concatenated trajectory files. e.g.: for a" 302 " concatenated trajectory of three chunks of sizes 100, 50 and 300 frames: '100 50 300'")
304 (
"verbose,v", b_po::bool_switch()->default_value(
false),
305 "verbose mode: print runtime information to STDOUT.")
308 b_po::options_description desc;
317 desc.add(desc_network);
320 desc.add(desc_filter);
323 desc.add(desc_coring);
326 desc.add(desc_noise);
329 std::cerr <<
"error: unknown mode. this should never happen." << std::endl;
333 b_po::store(b_po::command_line_parser(argc, argv).options(desc).run(), args);
335 }
catch (b_po::error& e) {
338 std::cerr <<
"\nerror parsing arguments:\n\n" << e.what() <<
"\n\n" << std::endl;
340 std::cerr << desc << std::endl;
343 if (args[
"help"].as<bool>()) {
344 std::cout << desc << std::endl;
348 if (args.count(
"verbose")) {
352 std::string leading_whitespace(20,
' ');
353 std::string leading_whitespace2nd(20 + (20-strlen(argv[1]))/2,
' ');
355 <<
"~~~ clustering v1.0 ~~~\n" 356 << leading_whitespace2nd <<
"~ " << argv[1] <<
" ~\n\n" 357 <<
"~~~ using for parallization: ";
365 if (args.count(
"nthreads")) {
366 n_threads = args[
"nthreads"].as<
int>();
369 omp_set_num_threads(n_threads);
372 std::ostringstream header;
375 struct tm * timeinfo = localtime(&rawtime);
376 header <<
"# clustering v1.0 - " << argv[1] <<
"\n" 378 <<
"# Created " << asctime(timeinfo)
379 <<
"# by following command:\n#\n# ";
380 std::vector<std::string> arguments(argv, argv + argc);
381 for (std::string& arg_string : arguments){
382 header << arg_string <<
" ";
384 header <<
"\n#\n# Copyright (c) 2015-2019 Florian Sittel and Daniel Nagel\n" 385 <<
"# please cite the corresponding paper, " 386 <<
"see https://github.com/moldyn/clustering\n";
387 args.insert(std::make_pair(
"header", b_po::variable_value(header.str(),
false)));
389 std::map<std::string,float> commentsMap = {{
"clustering_radius", 0.},
390 {
"lumping_radius", 0.},
391 {
"screening_from", 0.},
392 {
"screening_to", 0.},
393 {
"screening_step", 0.},
394 {
"minimal_population", 0.},
396 {
"single_coring_time", 0.}};
397 args.insert(std::make_pair(
"commentsMap", b_po::variable_value(commentsMap,
false)));
423 std::cerr <<
"error: unknown mode. this should never happen." << std::endl;
Most Probable Path Clustering.
bool verbose
global flag: use verbose output?
int main(int argc, char *argv[])
Parses option and execute corresponding sub-module.
void main(boost::program_options::variables_map args)
user interface and controlling function for density-based geometric clustering.
std::ostream & logger(std::ostream &s)
void main(boost::program_options::variables_map args)
controlling function and user interface for boundary corrections
void main(boost::program_options::variables_map args)
controlling function and user interface for noise assignment
void main(boost::program_options::variables_map args)
void main(boost::program_options::variables_map args)
Density-based clustering.
void main(boost::program_options::variables_map args)
void main(boost::program_options::variables_map args)
MPP clustering control function and user interface.