density_clustering_common.cpp
1 /*
2 Copyright (c) 2015-2019, Florian Sittel (www.lettis.net) and Daniel Nagel
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice,
9  this list of conditions and the following disclaimer.
10 
11 2. Redistributions in binary form must reproduce the above copyright notice,
12  this list of conditions and the following disclaimer in the documentation
13  and/or other materials provided with the distribution.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
18 SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
22 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
23 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 
26 #include "logger.hpp"
28 #include <iomanip>
29 
30 #ifdef DC_USE_MPI
31  #include "density_clustering_mpi.hpp"
32 #endif
33 
34 namespace Clustering {
35 namespace Density {
36 
37  std::vector<std::size_t>
38  screening(const std::vector<float>& free_energy
39  , const Neighborhood& nh
40  , const float free_energy_threshold
41  , const float* coords
42  , const std::size_t n_rows
43  , const std::size_t n_cols
44  , const std::vector<std::size_t> initial_clusters
45 #ifdef DC_USE_MPI
46  , const int mpi_n_nodes
47  , const int mpi_node_id
48 #endif
49  ) {
50 #ifdef DC_USE_MPI
51  using namespace Clustering::Density::MPI;
52 #endif
53  std::vector<std::size_t> clustering;
54  std::size_t first_frame_above_threshold;
55  double sigma2;
56  std::vector<FreeEnergy> fe_sorted;
57  std::set<std::size_t> visited_frames;
58  std::size_t distinct_name;
59  // data preparation
60  std::tie(clustering
61  , first_frame_above_threshold
62  , sigma2
63  , fe_sorted
64  , visited_frames
65  , distinct_name) = prepare_initial_clustering(free_energy
66  , nh
67  , free_energy_threshold
68  , n_rows
69  , initial_clusters);
70  Clustering::logger(std::cout) << " " << std::setw(6)
71  << Clustering::Tools::stringprintf("%.2f", free_energy_threshold)
72  << " " << std::setw(9)
73  << Clustering::Tools::stringprintf("%i", first_frame_above_threshold)
74  << std::endl;
75 #ifdef DC_USE_MPI
76  if (mpi_node_id == MAIN_PROCESS) {
77 #endif
78  screening_log(sigma2
79  , first_frame_above_threshold
80  , fe_sorted);
81 #ifdef DC_USE_MPI
82  }
83 #endif
84  // indices inside this loop are in order of sorted(!) free energies
85  bool neighboring_clusters_merged = false;
86  //TODO: this while loop shouldn't be necessary, resp.
87  // will always finish trivially after 2 runs, since nothing will
88  // happen as all frames will have been visitied...
89  while ( ! neighboring_clusters_merged) {
90  neighboring_clusters_merged = true;
91 #ifdef DC_USE_MPI
92  if (mpi_node_id == MAIN_PROCESS) {
93 #endif
94 // logger(std::cout) << "initial merge iteration" << std::endl;
95 #ifdef DC_USE_MPI
96  }
97 #endif
98  for (std::size_t i=0; i < first_frame_above_threshold; ++i) {
99  if (visited_frames.count(i) == 0) {
100  visited_frames.insert(i);
101  // all frames/clusters in local neighborhood should be merged ...
102 #ifdef DC_USE_MPI
104  std::set<std::size_t> local_nh = hdn_mpi(coords,
105  n_cols,
106  fe_sorted,
107  i,
108  first_frame_above_threshold,
109  4*sigma2,
110  mpi_n_nodes,
111  mpi_node_id);
112 #else
113  //TODO use box-assisted search on
114  // fe_sorted coords for 'high_density_neighborhood'
115  std::set<std::size_t> local_nh = high_density_neighborhood(coords,
116  n_cols,
117  fe_sorted,
118  i,
119  first_frame_above_threshold,
120  4*sigma2);
121 #endif
122  neighboring_clusters_merged = lump_initial_clusters(local_nh
123  , distinct_name
124  , clustering
125  , fe_sorted
126  , first_frame_above_threshold)
127  && neighboring_clusters_merged;
128  }
129  } // end for
130  } // end while
131  return normalized_cluster_names(first_frame_above_threshold
132  , clustering
133  , fe_sorted);
134  }
135 
136 } // end namespace Density
137 } // end namespace Clustering
138 
const int MAIN_PROCESS
identify MPI process 0 as main process
std::vector< std::size_t > normalized_cluster_names(std::size_t first_frame_above_threshold, std::vector< std::size_t > clustering, std::vector< FreeEnergy > &fe_sorted)
return clustered trajectory with new, distinct cluster names.
general namespace for clustering package
Definition: coring.cpp:38
MPI implementations of compute intensive functions.
bool lump_initial_clusters(const std::set< std::size_t > &local_nh, std::size_t &distinct_name, std::vector< std::size_t > &clustering, const std::vector< FreeEnergy > &fe_sorted, std::size_t first_frame_above_threshold)
lump clusters based on distance threshold in screening process
Clustering::Tools::Neighborhood Neighborhood
map frame id to neighbors
Define global logger.
std::ostream & logger(std::ostream &s)
Definition: logger.cpp:32
std::tuple< std::vector< std::size_t >, std::size_t, double, std::vector< FreeEnergy >, std::set< std::size_t >, std::size_t > prepare_initial_clustering(const std::vector< float > &free_energy, const Neighborhood &nh, const float free_energy_threshold, const std::size_t n_rows, const std::vector< std::size_t > initial_clusters)
void screening_log(const double sigma2, const std::size_t first_frame_above_threshold, const std::vector< FreeEnergy > &fe_sorted)
log output for screening steps
std::string stringprintf(const std::string &str,...)
printf-version for std::string
Definition: tools.cpp:80
std::set< std::size_t > high_density_neighborhood(const float *coords, const std::size_t n_cols, const std::vector< FreeEnergy > &sorted_fe, const std::size_t i_frame, const std::size_t limit, const float max_dist, const int mpi_n_nodes, const int mpi_node_id)
std::vector< std::size_t > screening(const std::vector< float > &free_energy, const Neighborhood &nh, const float free_energy_threshold, const float *coords, const std::size_t n_rows, const std::size_t n_cols, const std::vector< std::size_t > initial_clusters)