licornea_tools
duplicates.cc
Go to the documentation of this file.
1 #include "../lib/args.h"
2 #include "../lib/dataset.h"
3 #include "../lib/filesystem.h"
4 #include <fstream>
5 #include <algorithm>
6 #include <cstdlib>
7 #include <sstream>
8 #include <vector>
9 #include <string>
10 
11 using namespace tlz;
12 
13 bool files_are_equal(const std::string& filename1, const std::string& filename2) {
14  if(file_size(filename1) != file_size(filename2)) return false;
15 
16  std::ifstream file1(filename1, std::ifstream::binary);
17  std::ifstream file2(filename2, std::ifstream::binary);
18 
19  std::istreambuf_iterator<char> begin1(file1);
20  std::istreambuf_iterator<char> begin2(file2);
21  bool equal = std::equal(begin1, std::istreambuf_iterator<char>(), begin2);
22 
23  return equal;
24 }
25 
26 int main(int argc, const char* argv[]) {
27  get_args(argc, argv, "dataset_parameters.json +x/-x/+y/-y [bad_files.txt] [dataset_group]");
28  dataset datas = dataset_arg();
29  std::string mode = enum_arg({ "+x", "-x", "+y", "-y" });
30  std::string bad_files_filename = out_filename_opt_arg("");
31  std::string dataset_group_name = string_opt_arg("");
32 
33  dataset_group datag = datas.group(dataset_group_name);
34 
35  int duplicates_count = 0;
36  std::vector<std::string> bad_files;
37 
38  auto cmp = [&](const view_index& prev, const view_index& cur) {
39  dataset_view view_prev = datag.view(prev);
40  dataset_view view_cur = datag.view(cur);
41 
42  std::string prev_image_filename = view_prev.image_filename();
43  std::string cur_image_filename = view_cur.image_filename();
44  if(file_exists(prev_image_filename) && file_exists(cur_image_filename) && files_are_equal(prev_image_filename, cur_image_filename)) {
45  duplicates_count++;
46  bad_files.push_back(cur_image_filename);
47  std::cout << "same images: " << prev << " and " << cur << std::endl;
48  }
49 
50  std::string prev_depth_filename = view_prev.depth_filename();
51  std::string cur_depth_filename = view_cur.depth_filename();
52  if(file_exists(prev_depth_filename) && file_exists(cur_depth_filename) && files_are_equal(prev_depth_filename, cur_depth_filename)) {
53  duplicates_count++;
54  bad_files.push_back(cur_depth_filename);
55  std::cout << "same depths: " << prev << " and " << cur << std::endl;
56  }
57  };
58 
59  if(mode == "+x") {
60  for(int y = datas.y_min(); y <= datas.y_max(); y += datas.y_step()) {
61  for(int x = datas.x_min() + datas.x_step(); x <= datas.x_max(); x += datas.x_step()) {
62  view_index ind_prev(x - datas.x_step(), y), ind(x, y);
63  cmp(ind_prev, ind);
64  }
65  }
66 
67  } else if(mode == "-x") {
68  for(int y = datas.y_min(); y <= datas.y_max(); y += datas.y_step()) {
69  for(int x = datas.x_max() - datas.x_step(); x >= datas.x_min(); x -= datas.x_step()) {
70  view_index ind_prev(x + datas.x_step(), y), ind(x, y);
71  cmp(ind_prev, ind);
72  }
73  }
74 
75  } else if(mode == "+y") {
76  // TODO
77 
78  } else if(mode == "-y") {
79  // TODO
80 
81  }
82 
83  std::cout << "\nfound " << duplicates_count << " duplicates of out " << 2*datas.x_count()*datas.y_count() << " images" << std::endl;
84 
85  if(! bad_files_filename.empty()) {
86  std::ofstream stream(bad_files_filename);
87  for(const std::string& filename : bad_files) stream << filename << "\n";
88  stream.close();
89  }
90 }
int y_count() const
Definition: dataset.cc:221
bool file_exists(const std::string &filename)
int x_step() const
Definition: dataset.cc:173
std::string image_filename() const
Definition: dataset.cc:64
int main(int argc, const char *argv[])
Definition: duplicates.cc:26
int x_max() const
Definition: dataset.cc:169
int x_min() const
Definition: dataset.cc:165
std::string enum_arg(const std::vector< std::string > &options)
Definition: args.cc:154
dataset_view view(int x) const
Definition: dataset.cc:105
std::size_t file_size(const std::string &filename)
std::string depth_filename() const
Definition: dataset.cc:68
std::string out_filename_opt_arg(const std::string &def)
Definition: args.cc:110
dataset dataset_arg()
Definition: dataset.cc:297
int y_step() const
Definition: dataset.cc:206
int x_count() const
Definition: dataset.cc:182
std::string mode
int y_max() const
Definition: dataset.cc:201
bool files_are_equal(const std::string &filename1, const std::string &filename2)
Definition: duplicates.cc:13
int y_min() const
Definition: dataset.cc:196
dataset_group group(const std::string &grp) const
Definition: dataset.cc:265
std::string string_opt_arg(const std::string &def="")
Definition: args.h:36
void get_args(int argc, const char *argv[], const std::string &usage)
Definition: args.cc:49