forked from trizen/perl-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fdf-filename
executable file
·114 lines (89 loc) · 2.42 KB
/
fdf-filename
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/perl
# Daniel "Trizen" Șuteu
# License: GPLv3
# Date: 21 June 2012
# https://github.com/trizen
# Find and list duplicated files from one or more paths
#
## WARNING! For strict duplicates, use the 'fdf' script:
# https://github.com/trizen/perl-scripts/blob/master/Finders/fdf
#
use 5.005;
use strict;
use warnings;
use File::Find qw(find);
use File::Basename qw(basename);
use Getopt::Std qw(getopts);
my @dirs = grep { -d } @ARGV;
die <<"HELP" if !@dirs;
usage: $0 [options] /my/path [...]
Options:
-f : keep only the first duplicated file
-l : keep only the last duplicated file
HELP
my %opts;
if (@ARGV) {
getopts("fl", \%opts);
}
sub compare_strings ($$) {
my ($name1, $name2) = @_;
return 0 if $name1 eq $name2;
if (length($name1) > length($name2)) {
($name2, $name1) = ($name1, $name2);
}
my $len1 = length($name1);
my $len2 = length($name2);
my $min = int(0.5 + $len2 / 2);
return -1 if $min > $len1;
my $diff = $len1 - $min;
foreach my $i (0 .. $diff) {
foreach my $j ($i .. $diff) {
if (index($name2, substr($name1, $i, $min + $j - $i)) != -1) {
return 0;
}
}
}
return 1;
}
sub find_duplicated_files (&@) {
my $code = shift;
my %files;
find {
no_chdir => 1,
wanted => sub {
lstat;
return if ((-s _) < 4 * 1024); # skips files smaller than 4KB
-f _ && (not -l _) && push @{$files{-s _}}, $_;
}
} => @_;
foreach my $files (values %files) {
next if $#{$files} < 1;
my %dups;
foreach my $i (0 .. $#{$files} - 1) {
for (my $j = $i + 1 ; $j <= $#{$files} ; $j++) {
if (compare_strings(basename($files->[$i]), basename($files->[$j])) == 0) {
push @{$dups{$files->[$i]}}, splice @{$files}, $j--, 1;
}
}
}
while (my ($fparent, $fdups) = each %dups) {
$code->(sort $fparent, @{$fdups});
}
}
return;
}
{
local $, = "\n";
local $\ = "\n";
find_duplicated_files {
print @_, "-" x 80 if @_;
foreach my $i (
$opts{f} ? (1 .. $#_)
: $opts{l} ? (0 .. $#_ - 1)
: ()
) {
unlink $_[$i] or warn "[error]: Can't delete: $!\n";
}
}
@dirs;
}