-
Notifications
You must be signed in to change notification settings - Fork 2
/
dedup.go
51 lines (42 loc) · 1.21 KB
/
dedup.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
package dedup
import (
"crypto/sha512"
"hash"
"io"
"github.com/amoghe/dedup/codec"
)
// Deduplicator performs deduplication of the specified file
type Deduplicator struct {
segmenter *Segmenter
tracker *SegmentTracker
seghasher hash.Hash
}
// NewDeduplicator returns a Deduplicator
func NewDeduplicator(winsz, mask uint64) *Deduplicator {
d := Deduplicator{
//writer: codec.NewGobWriter(output),
segmenter: &Segmenter{WindowSize: winsz, Mask: mask},
tracker: NewSegmentTracker(),
seghasher: sha512.New(),
}
return &d
}
// Do runs the deduplication of the specified input stream
func (d *Deduplicator) Do(input io.Reader, output io.Writer) error {
writer := codec.NewGobWriter(output)
handler := func(seg []byte) error {
stat := d.tracker.Track(seg, d.seghasher.Sum(seg))
cmsg := codec.Message{}
if stat.Freq <= 1 {
cmsg = codec.Message{Type: codec.MessageDef, DefID: stat.ID, DefBytes: seg}
} else {
cmsg = codec.Message{Type: codec.MessageRef, RefID: stat.ID}
}
return writer.Write(&cmsg)
}
return d.segmenter.SegmentFile(input, handler)
}
// PrintStats prints stats to the given writer
func (d *Deduplicator) PrintStats(out io.Writer) error {
return d.tracker.PrintStats(out)
}