forked from trizen/perl-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lzt-fast.pl
executable file
·82 lines (61 loc) · 2.29 KB
/
lzt-fast.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/perl
# Author: Daniel "Trizen" Șuteu
# License: GPLv3
# Date: 26 April 2015
# Website: https://github.com/trizen
# A very good and very fast compression algorithm. (concept only)
use 5.010;
use strict;
use warnings;
sub lzt_compress {
my ($str) = @_;
my $k = 0; # must be zero
my $min = 4; # the minimum length of a substring
my $max = 15; # the maximum length of a substring
my $i = 0; # iterator (0 to length(str)-1)
my $remember = 0; # remember mode
my $memo = ''; # short-term memory
my @dups; # array of duplicated substrings with positions
my @cache; # cache of substrings
my %dict; # dictionary of substrings
foreach my $c (split(//, $str)) {
if (not $remember and exists $dict{$c}) {
$remember = 1; # activate the remember mode
}
$cache[$_] .= $c for ($k .. $i); # create the substrings
# If remember mode is one, do some checks.
if ($remember) {
# Check to see if $memo + the current character exists in the dictionary
if (exists $dict{$memo . $c}) {
## say "found in cache [$i]: $memo$c";
}
# If it doesn't exists, then the $memo is the largest
# duplicated substring in the dictionary at this point.
else {
$remember = 0; # turn-off remember mode
if (length($memo) >= $min) { # check for the minimum length of the word
push @dups, [$dict{$memo}, length($memo), $memo, $i - length($memo)];
}
# $memo has been stored. Now, clear the memory.
$memo = '';
}
# Remember one more character
$memo .= $c;
}
# Increment the iterator
$i++;
# Create the dictionary from the cache of substrings
foreach my $item (@cache) {
exists($dict{$item})
|| ($dict{$item} = $i - length($item));
}
# Update the minimum length
++$k if (($i - $k) >= $max);
}
return \@dups;
}
#
## Usage
#
my $str = @ARGV ? do { local $/; <> } : "TOBEORNOTTOBEORTOBEORNOT#";
say '[', join(', ', @{$_}), ']' for @{lzt_compress($str)};