Skip to content

Commit

Permalink
Added main.cpp file. It's functional. Files are compared by file size…
Browse files Browse the repository at this point in the history
… only.
  • Loading branch information
rvasin committed Jan 14, 2022
1 parent 5cbeef7 commit 5039ceb
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 0 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,22 @@
# dubdog
An ultimate CLI duplicate file finder and remover!

This is a demo project to demonstrate my skills in C++11/14/17/20.
I intentionally put all code into just one main.cpp file because it's simple to review it by page scrolling.

While created as demo project it still could be really useful in to finding and removing duplicate files.

Build it with
g++ main.cpp -o dubdog

Usage:
dubdog path extensions

extensions - is comma separated list of file's extensions.

For example when running on Windows you may run:
dubdog 'C:\Books' pdf,djvu,epub,fb2

Current version compares files only by file's size (not MD5 or similar message digest).
Thus we careful to use it on real files.

107 changes: 107 additions & 0 deletions main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#include <iostream>
#include <cstdint>
#include <thread>
#include <filesystem>
#include <vector>
#include <map>
#include <regex>
#include <memory>
#include <fstream>
#include <cstring>
#include <iomanip>

using namespace std;


class DupList
{
public:
uintmax_t fsize;
vector<string> lst;
};

int main(int argc, char *argv[])
{
if (argc < 3) {
cout << "Usage: " << endl;
cout << " dupdog path masks" << endl;
cout << "Example:" << endl;
cout << " dupbog C:\\books pdf;djvu;epub;fb2" << endl;
return 0;
}
cout << "Scanning for duplicate files..." << endl;

string FilePath = argv[1];

char *exts = argv[2];
char *ext = strtok(exts, ",");
string masks = "";
while (ext) {
if (masks.size() > 0) masks += "|";
masks += "\\."+string(ext);
ext = strtok(nullptr, ",");
}

regex mask {"("+masks+")"};
string ExtMask;

map<uintmax_t,shared_ptr<DupList>> candidates;

using rdi = std::filesystem::recursive_directory_iterator;
for (const auto& entry : rdi(FilePath)) {
string ext = entry.path().extension().string();
if (entry.is_regular_file() && regex_match(ext, mask)) {
auto& dup = candidates[entry.file_size()];
if (dup == nullptr) {
shared_ptr<DupList> newdup(new DupList());
candidates[entry.file_size()] = newdup;
dup = candidates[entry.file_size()];
}
dup->fsize = entry.file_size();
dup->lst.push_back(entry.path().string());

}
}

cout << "Printing duplicates:" << endl;
int no = 1;
vector<string> flist;
for (const auto& [fsize, dup] : candidates) {
if (dup->lst.size() > 1) {
cout << "size: " << fsize << endl;
for (const auto& f : dup->lst) {
cout << "#" << no << " " << f << endl;
flist.push_back(f);
no++;
}
}
}
cout << "Which files do you want to remove? Write down files numbers separated by space and press Enter" << endl;
cout << "Or just press Enter if you don't want to remove any files." << endl;
string filenumbers;
cout << ">";
std::getline(std::cin, filenumbers);

if (filenumbers.size()>0) {
char *fn = new char[filenumbers.length() + 1];
strcpy(fn, filenumbers.c_str());
char *num;
num = strtok(fn," ");
int i;
while (num) {
i = stoi(num) ;
cout << "Removing file: #" << i << " " << flist[i-1] << endl;
try {
filesystem::remove(flist[i-1]);
}
catch (...) {
cout << "Cannot remove file" << endl;
}
num = strtok(nullptr," ");
}
delete [] fn;
}
cout << "Done!" << endl;

return 0;
}

0 comments on commit 5039ceb

Please sign in to comment.