density_clustering.hpp
Go to the documentation of this file.
1 /*
2 Copyright (c) 2015-2019, Florian Sittel (www.lettis.net) and Daniel Nagel
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without modification,
6 are permitted provided that the following conditions are met:
7 
8 1. Redistributions of source code must retain the above copyright notice,
9  this list of conditions and the following disclaimer.
10 
11 2. Redistributions in binary form must reproduce the above copyright notice,
12  this list of conditions and the following disclaimer in the documentation
13  and/or other materials provided with the distribution.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
16 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
18 SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
22 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
23 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 
26 #pragma once
27 
28 #include <vector>
29 #include <map>
30 #include <set>
31 #include <array>
32 #include <utility>
33 #include <string>
34 
35 #include <boost/program_options.hpp>
36 
37 #include "tools.hpp"
44 namespace Clustering {
52  namespace Density {
54  using FreeEnergy = std::pair<std::size_t, float>;
60  using Box = std::array<int, 2>;
64  constexpr int BOX_DIFF[9][2] = {{-1, 1}, { 0, 1}, { 1, 1}
65  , {-1, 0}, { 0, 0}, { 1, 0}
66  , {-1,-1}, { 0,-1}, { 1,-1}};
68  const int N_NEIGHBOR_BOXES = 9;
69 // //! encodes box differences in 3D, i.e. if you are at
70 // //! the center box, the 27 different tuples hold the steppings
71 // //! to the 27 spacial neighbors (including the center box itself).
72 // constexpr int BOX_DIFF[27][3] = {{-1, 1,-1}, { 0, 1,-1}, { 1, 1,-1}
73 // , {-1, 0,-1}, { 0, 0,-1}, { 1, 0,-1}
74 // , {-1,-1,-1}, { 0,-1,-1}, { 1,-1,-1}
75 // , {-1, 1, 0}, { 0, 1, 0}, { 1, 1, 0}
76 // , {-1, 0, 0}, { 0, 0, 0}, { 1, 0, 0}
77 // , {-1,-1, 0}, { 0,-1, 0}, { 1,-1, 0}
78 // , {-1, 1, 1}, { 0, 1, 1}, { 1, 1, 1}
79 // , {-1, 0, 1}, { 0, 0, 1}, { 1, 0, 1}
80 // , {-1,-1, 1}, { 0,-1, 1}, { 1,-1, 1}};
81 // //! number of neigbor boxes in cubic 3D grid (including center box).
82 // const int N_NEIGHBOR_BOXES = 27;
85  struct BoxGrid {
87  std::vector<int> n_boxes;
89  std::vector<Box> assigned_box;
91  std::map<Box, std::vector<int>> boxes;
92  };
95  constexpr Box
96  neighbor_box(const Box center, const int i_neighbor);
99  BoxGrid
100  compute_box_grid(const float* coords,
101  const std::size_t n_rows,
102  const std::size_t n_cols,
103  const float radius);
106  bool
107  is_valid_box(const Box box,
108  const BoxGrid& grid);
110  std::vector<std::size_t>
111  calculate_populations(const float* coords,
112  const std::size_t n_rows,
113  const std::size_t n_cols,
114  const float radius);
118  std::map<float, std::vector<std::size_t>>
119  calculate_populations(const float* coords,
120  const std::size_t n_rows,
121  const std::size_t n_cols,
122  const std::vector<float> radii);
125  std::vector<float>
126  calculate_free_energies(const std::vector<std::size_t>& pops);
129  std::vector<FreeEnergy>
130  sorted_free_energies(const std::vector<float>& fe);
133  std::tuple<Neighborhood, Neighborhood>
134  nearest_neighbors(const float* coords,
135  const std::size_t n_rows,
136  const std::size_t n_cols,
137  const std::vector<float>& free_energy);
139  void
140  screening_log(const double sigma2
141  , const std::size_t first_frame_above_threshold
142  , const std::vector<FreeEnergy>& fe_sorted);
145  std::tuple<std::vector<std::size_t>
146  , std::size_t
147  , double
148  , std::vector<FreeEnergy>
149  , std::set<std::size_t>
150  , std::size_t>
151  prepare_initial_clustering(const std::vector<float>& free_energy
152  , const Neighborhood& nh
153  , const float free_energy_threshold
154  , const std::size_t n_rows
155  , const std::vector<std::size_t> initial_clusters);
157  std::vector<std::size_t>
158  normalized_cluster_names(std::size_t first_frame_above_threshold
159  , std::vector<std::size_t> clustering
160  , std::vector<FreeEnergy>& fe_sorted);
162  bool
163  lump_initial_clusters(const std::set<std::size_t>& local_nh
164  , std::size_t& distinct_name
165  , std::vector<std::size_t>& clustering
166  , const std::vector<FreeEnergy>& fe_sorted
167  , std::size_t first_frame_above_threshold);
171  std::set<std::size_t>
172  high_density_neighborhood(const float* coords,
173  const std::size_t n_cols,
174  const std::vector<FreeEnergy>& sorted_fe,
175  const std::size_t i_frame,
176  const std::size_t limit,
177  const float max_dist);
181  double
182  compute_sigma2(const Neighborhood& nh);
184  bool
185  compare2DVector(const std::pair<std::size_t,std::size_t>& p1,
186  const std::pair<std::size_t,std::size_t>& p2);
188  bool
189  has2digits(float val);
191  std::vector<std::size_t>
192  sorted_cluster_names (std::vector<std::size_t> clustering);
199  std::vector<std::size_t>
200  assign_low_density_frames(const std::vector<std::size_t>& initial_clustering,
201  const Neighborhood& nh_high_dens,
202  const std::vector<float>& free_energy);
217  void
218  main(boost::program_options::variables_map args);
219  } // end namespace Density
220 } // end namespace Clustering
221 
std::vector< std::size_t > assign_low_density_frames(const std::vector< std::size_t > &initial_clustering, const Neighborhood &nh_high_dens, const std::vector< float > &free_energy)
std::vector< std::size_t > normalized_cluster_names(std::size_t first_frame_above_threshold, std::vector< std::size_t > clustering, std::vector< FreeEnergy > &fe_sorted)
return clustered trajectory with new, distinct cluster names.
general namespace for clustering package
Definition: coring.cpp:38
Tools mainly for IO and some other functions.
double compute_sigma2(const Neighborhood &nh)
const int N_NEIGHBOR_BOXES
number of neigbor boxes in 2D grid (including center box).
bool is_valid_box(const Box box, const BoxGrid &grid)
std::set< std::size_t > high_density_neighborhood(const float *coords, const std::size_t n_cols, const std::vector< FreeEnergy > &sorted_fe, const std::size_t i_frame, const std::size_t limit, const float max_dist)
bool lump_initial_clusters(const std::set< std::size_t > &local_nh, std::size_t &distinct_name, std::vector< std::size_t > &clustering, const std::vector< FreeEnergy > &fe_sorted, std::size_t first_frame_above_threshold)
lump clusters based on distance threshold in screening process
void main(boost::program_options::variables_map args)
user interface and controlling function for density-based geometric clustering.
Clustering::Tools::Neighborhood Neighborhood
map frame id to neighbors
std::vector< int > n_boxes
total number of boxes
std::map< Box, std::vector< int > > boxes
the boxes with a list of assigned frame ids
std::tuple< Neighborhood, Neighborhood > nearest_neighbors(const float *coords, const std::size_t n_rows, const std::size_t n_cols, const std::vector< float > &free_energy)
constexpr int BOX_DIFF[9][2]
std::pair< std::size_t, float > Neighbor
matches neighbor&#39;s frame id to distance
Definition: tools.hpp:64
std::map< std::size_t, Clustering::Tools::Neighbor > Neighborhood
map frame id to neighbors
Definition: tools.hpp:66
std::tuple< std::vector< std::size_t >, std::size_t, double, std::vector< FreeEnergy >, std::set< std::size_t >, std::size_t > prepare_initial_clustering(const std::vector< float > &free_energy, const Neighborhood &nh, const float free_energy_threshold, const std::size_t n_rows, const std::vector< std::size_t > initial_clusters)
BoxGrid compute_box_grid(const float *coords, const std::size_t n_rows, const std::size_t n_cols, const float radius)
std::vector< std::size_t > calculate_populations(const float *coords, const std::size_t n_rows, const std::size_t n_cols, const float radius)
calculate population of n-dimensional hypersphere per frame for one fixed radius. ...
constexpr Box neighbor_box(const Box center, const int i_neighbor)
void screening_log(const double sigma2, const std::size_t first_frame_above_threshold, const std::vector< FreeEnergy > &fe_sorted)
log output for screening steps
std::vector< float > calculate_free_energies(const std::vector< std::size_t > &pops)
std::vector< FreeEnergy > sorted_free_energies(const std::vector< float > &fe)
bool has2digits(float val)
check if float has at maximum two digits
std::pair< std::size_t, float > FreeEnergy
matches frame id to free energy
std::array< int, 2 > Box
encodes 2D box for box-assisted search algorithm
Clustering::Tools::Neighbor Neighbor
matches neighbor&#39;s frame id to distance
std::vector< Box > assigned_box
matching frame id to the frame&#39;s assigned box
bool compare2DVector(const std::pair< std::size_t, std::size_t > &p1, const std::pair< std::size_t, std::size_t > &p2)
compare two two dimensional pairs by their second entry
std::vector< std::size_t > sorted_cluster_names(std::vector< std::size_t > clustering)
sorts the cluster by decreasing population and renames them from 1..N