30 #include "embedded_cytoscape.hpp" 34 #include <unordered_set> 37 #include <boost/program_options.hpp> 38 #include <boost/filesystem.hpp> 47 int HORIZONTAL_SPACING = 10;
48 int VERTICAL_SPACING = 50;
57 std::size_t POP_MIN = 0;
58 std::size_t POP_MAX = 0;
67 std::map<std::size_t, Node> children;
70 int _subtree_width = 0;
73 Node(std::size_t _id,
float _fe, std::size_t _pop);
74 Node* find_parent_of(std::size_t search_id);
75 void set_pos(
int x,
int y);
77 void print_subtree(std::ostream& os);
78 void print_node_and_subtree(std::ostream& os);
81 constexpr
bool fuzzy_equal(
float a,
float b,
float prec) {
82 return (a <= b + prec) && (a >= b - prec);
87 std::ostream& operator<<(std::ostream& os,
const Node& n) {
95 os <<
Clustering::Tools::stringprintf(
"{group:'nodes',id:'n%d',position:{x:%d,y:%d},data:{id:'n%d',pop:%d,fe:%f,info:'%d: fe=%0.2f, pop=%d',logpop:%0.2f}},\n",
96 n.id, n.pos_x, n.pos_y, n.id, n.pop, n.fe, n.id, n.fe, n.pop, log_pop);
98 for (
auto& id_child: n.children) {
99 std::size_t cid = id_child.first;
107 Node::Node(std::size_t _id,
115 Node* Node::find_parent_of(std::size_t search_id) {
116 if (this->children.count(search_id)) {
119 for (
auto& id_child: children) {
120 Node* cc = id_child.second.find_parent_of(search_id);
129 void Node::set_pos(
int x,
int y) {
132 std::vector<int> width_children;
134 for (
auto& id_child: children) {
135 width_children.push_back(id_child.second.subtree_width());
136 total_width += id_child.second.subtree_width();
140 int cur_x = (x-0.5*total_width);
141 for (
auto& id_child: children) {
142 int stw = id_child.second.subtree_width();
143 id_child.second.set_pos(cur_x + 0.5*stw, y + VERTICAL_SPACING);
148 int Node::subtree_width() {
151 if ( ! this->_subtree_width) {
152 int self_width = 10 + 2*HORIZONTAL_SPACING;
153 if (children.empty()) {
154 this->_subtree_width = self_width;
157 for (
auto& id_child: children) {
158 sum += id_child.second.subtree_width();
160 if (sum > self_width) {
161 this->_subtree_width = sum;
163 this->_subtree_width = self_width;
167 return this->_subtree_width;
170 void Node::print_subtree(std::ostream& os) {
171 for (
auto& id_child: children) {
172 id_child.second.print_node_and_subtree(os);
176 void Node::print_node_and_subtree(std::ostream& os) {
177 os << (*this) << std::endl;
178 this->print_subtree(os);
183 save_network_links(std::string fname, std::map<std::size_t, std::size_t> network,
184 std::string header_comment, std::map<std::string,float> commentsMap) {
185 fname.append(
"_links.dat");
188 header_comment.append(
"#\n# Name of the cluster connected to the name in next " 189 "higher free energy level\n# Named by the remapped clusters.\n#\n" 190 "# cluster_name(fe+step) cluster_name(fe)\n");
191 Clustering::Tools::write_map<std::size_t, std::size_t>(fname, network, header_comment,
true);
195 save_node_info(std::string fname,
196 std::map<std::size_t, float> free_energies,
197 std::map<std::size_t, std::size_t> pops,
198 std::string header_comment,
199 std::map<std::string,float> commentsMap) {
200 fname.append(
"_nodes.dat");
203 header_comment.append(
"#\n# nodes\n");
204 header_comment.append(
"#\n# Name of all clusters at a given free energies (fe) " 205 "with the corresponding populations pop.\n" 206 "# id(cluster) fe pop\n");
207 std::ofstream ofs(fname);
209 std::cerr <<
"error: cannot open file '" << fname <<
"' for writing." << std::endl;
212 ofs << header_comment;
213 for (
auto node_pop: pops) {
214 std::size_t key = node_pop.first;
215 ofs << key <<
" " << free_energies[key] <<
" " << node_pop.second <<
"\n";
220 std::set<std::size_t>
221 compute_and_save_leaves(std::string fname,
222 std::map<std::size_t, std::size_t> network,
223 std::string header_comment,
224 std::map<std::string,float> commentsMap) {
225 fname.append(
"_leaves.dat");
227 std::set<std::size_t> leaves;
228 std::set<std::size_t> not_leaves;
229 for (
auto from_to: network) {
230 std::size_t src = from_to.first;
231 std::size_t target = from_to.second;
232 not_leaves.insert(target);
233 if (not_leaves.count(src)) {
239 std::vector<std::size_t> leaves_vec( leaves.begin(), leaves.end() );
241 header_comment.append(
"#\n# All network leaves, i.e. nodes (microstates) without child\n" 242 "# nodes at a lower free energy level. These microstates represent\n" 243 "# the minima of their local basins.\n#\n" 246 Clustering::Tools::write_single_column<std::size_t>(fname, leaves_vec, header_comment,
false);
251 save_traj_of_leaves(std::string fname,
252 std::set<std::size_t> leaves,
256 std::string remapped_name,
258 std::string header_comment,
259 std::map<std::string,float> commentsMap) {
260 fname.append(
"_end_node_traj.dat");
261 Clustering::logger(std::cout) <<
" saving end-node trajectory in: " << fname << std::endl;
262 std::vector<std::size_t> traj(n_rows);
263 const float prec = d_step / 10.0f;
264 for (
float d=d_min; ! fuzzy_equal(d, d_max+d_step, prec); d += d_step) {
267 for (std::size_t i=0; i < n_rows; ++i) {
268 if (leaves.count(cl_now[i])) {
274 header_comment.append(
"#\n# All frames beloning to a leaf node are marked with\n" 275 "# the custer id. All others with zero.\n");
276 header_comment.append(
"#\n# state/cluster id frames are assigned to\n");
277 Clustering::Tools::write_single_column<std::size_t>(fname, traj, header_comment);
281 save_network_to_html(std::string fname,
282 std::map<std::size_t, std::size_t> network,
283 std::map<std::size_t, float> free_energies,
284 std::map<std::size_t, std::size_t> pops) {
285 Clustering::logger(std::cout) <<
"\n~~~ computing network visualization" << std::endl;
287 FE_MAX = std::max_element(free_energies.begin(),
289 [](std::pair<std::size_t, float> fe1,
290 std::pair<std::size_t, float> fe2) ->
bool {
291 return fe1.second < fe2.second;
293 FE_MIN = std::min_element(free_energies.begin(),
295 [](std::pair<std::size_t, float> fe1,
296 std::pair<std::size_t, float> fe2) ->
bool {
297 return fe1.second < fe2.second;
299 POP_MAX = std::max_element(pops.begin(),
301 [](std::pair<std::size_t, std::size_t> p1,
302 std::pair<std::size_t, std::size_t> p2) ->
bool {
303 return p1.second < p2.second;
305 POP_MIN = std::min_element(pops.begin(),
307 [](std::pair<std::size_t, std::size_t> p1,
308 std::pair<std::size_t, std::size_t> p2) ->
bool {
309 return p1.second < p2.second;
316 for (
auto from_to: network) {
319 std::size_t i_from = from_to.first;
320 std::size_t i_to = from_to.second;
322 Node* parent_to = fake_root.find_parent_of(i_to);
324 fake_root.children[i_to] = {i_to, free_energies[i_to], pops[i_to]};
325 parent_to = &fake_root;
327 Node* parent_from = fake_root.find_parent_of(i_from);
330 parent_to->children[i_to].children[i_from] = parent_from->children[i_from];
331 parent_from->children.erase(i_from);
334 parent_to->children[i_to].children[i_from] = {i_from, free_energies[i_from], pops[i_from]};
339 fname.append(
"_visualization.html");
340 std::ofstream ofs(fname);
342 std::cerr <<
"error: cannot open file '" << fname <<
"' for writing." << std::endl;
345 float LOG_POP_MIN, LOG_POP_MAX;
349 LOG_POP_MIN = log(POP_MIN);
354 LOG_POP_MAX = log(POP_MAX);
356 ofs << Clustering::Network::viewer_header
358 <<
"style: cytoscape.stylesheet().selector('node').css({" 363 <<
".selector('edge').css({'opacity': '1.0', 'width': '5', 'target-arrow-shape': 'triangle'})" 364 <<
".selector(':selected').css({'content': 'data(info)', 'font-size': 24, 'color': '#00ff00'})" 366 <<
", elements: [\n";
367 fake_root.set_pos(0, 0);
368 fake_root.print_subtree(ofs);
370 ofs << Clustering::Network::viewer_footer;
377 namespace NetworkBuilder {
380 main(boost::program_options::variables_map args) {
381 namespace b_fs = boost::filesystem;
384 omp_set_num_threads(2);
388 float d_min = args[
"min"].as<
float>();
389 float d_max = args[
"max"].as<
float>();
390 float d_step = args[
"step"].as<
float>();
391 std::string basename = args[
"basename"].as<std::string>();
392 std::string basename_output = args[
"output"].as<std::string>();
393 basename.append(
".%0.2f");
394 std::string remapped_name =
"remapped_" + basename;
395 std::size_t minpop = args[
"minpop"].as<std::size_t>();
396 bool network_html = args[
"network-html"].as<
bool>();
397 std::string header_comment = args[
"header"].as<std::string>();
398 std::map<std::string,float> commentsMap = args[
"commentsMap"].as<std::map<std::string,float>>();
400 std::map<std::size_t, std::size_t> network;
401 std::map<std::size_t, std::size_t> pops;
402 std::map<std::size_t, float> free_energies;
406 if ( ! b_fs::exists(fname_next)) {
407 std::cerr <<
"error: file does not exist: " << fname_next
408 <<
" check basename (-b) and --min/--max/--step" << std::endl;
413 std::vector<std::size_t> cl_now;
415 std::size_t n_rows = cl_next.size();
419 const float prec = d_step / 10.0f;
422 d_max = std::numeric_limits<float>::max();
427 Clustering::logger(std::cout) <<
"~~~ remapping cluster files and generating network" << std::endl;
428 for (d=d_min; ! fuzzy_equal(d, d_max, prec) && b_fs::exists(fname_next); d += d_step) {
433 #pragma omp parallel sections 444 if (b_fs::exists(fname_next)) {
446 max_id = *std::max_element(cl_now.begin(), cl_now.end());
447 for (std::size_t i=0; i < n_rows; ++i) {
448 if (cl_next[i] != 0) {
449 cl_next[i] += max_id;
450 if (cl_now[i] != 0) {
451 network[cl_now[i]] = cl_next[i];
453 free_energies[cl_now[i]] = d;
464 commentsMap[
"minimal_population"] = minpop;
467 << minpop << std::endl;
468 std::unordered_set<std::size_t> removals;
469 auto pop_it = pops.begin();
471 while (pop_it != pops.end()) {
472 if (pop_it->second < minpop) {
473 removals.insert(pop_it->first);
474 pops.erase(pop_it++);
480 auto net_it = network.begin();
481 while (net_it != network.end()) {
482 std::size_t a = net_it->first;
483 std::size_t b = net_it->second;
484 if (removals.count(a) > 0 || removals.count(b) > 0) {
485 network.erase(net_it++);
493 save_network_links(basename_output, network, header_comment, commentsMap);
495 save_node_info(basename_output, free_energies, pops, header_comment, commentsMap);
497 std::set<std::size_t> leaves = compute_and_save_leaves(basename_output,
498 network, header_comment, commentsMap);
501 save_traj_of_leaves(basename_output, leaves,
502 d_min, d_max, d_step, remapped_name, n_rows, header_comment, commentsMap);
505 save_network_to_html(basename_output, network, free_energies, pops);
bool verbose
global flag: use verbose output?
general namespace for clustering package
int main(int argc, char *argv[])
Parses option and execute corresponding sub-module.
std::ostream & logger(std::ostream &s)