Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exclude well-known empty entities #27

Merged
merged 1 commit into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions denylist.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ const maxHeaderSize = 1 << 20 // 1MiB per the spec
const maxLineSize = 2 << 20 // 2MiB per the spec
const currentVersion = 1

// SafeCids is a map of known, innoffensive CIDs that correspond to
// empty-blocks or empty-directories. Blocking these can break applications so
// they are ignored (with a warning), when they appear on a denylist.
var SafeCids = map[cid.Cid]string{
cid.MustParse("QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn"): "empty unixfs directory",
cid.MustParse("bafyaabakaieac"): "empty unixfs directory inlined",
cid.MustParse("bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku"): "empty block",
cid.MustParse("bafkqaaa"): "empty block inlined",
cid.MustParse("QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH"): "empty block dag-pb",
cid.MustParse("bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua"): "empty block dag-cbor",
cid.MustParse("baguqeeraiqjw7i2vwntyuekgvulpp2det2kpwt6cd7tx5ayqybqpmhfk76fa"): "empty block dag-json",
}

// DenylistHeader represents the header of a Denylist file.
type DenylistHeader struct {
Version int
Expand Down Expand Up @@ -422,6 +435,14 @@ func (dl *Denylist) parseLine(line string, number uint64) error {
if err != nil {
return fmt.Errorf("error extracting cid %s (%s:%d): %w", cidStr, dl.Filename, number, err)
}

// Blocking these by mistake can break some applications (by
// "some" we mean Kubo).
if _, ok := SafeCids[c]; ok {
logger.Warnf("Ignored: %s corresponds to a known empty folder or block and will not be blocked", c)
return nil
}

e.Multihash = c.Hash()

blockedPath, err := NewBlockedPath(subPath)
Expand Down
17 changes: 17 additions & 0 deletions tester/test.deny
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,20 @@ author: "@hsanjuan"
# But not /path2
//gW813G35CnLsy7gRYYHuf63hrz71U1xoLFDVeV7actx6oX

# rule14
# These are known cids corresponding to empty blocks/folders
# Even if they appear here, they should not be blocked
# empty unixfs directory
/ipfs/QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn
# empty unixfs directory inlined
/ipfs/bafyaabakaieac
# empty block
/ipfs/bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku
# empty block inlined
/ipfs/bafkqaaa
# empty block dag-pb
/ipfs/QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH
# empty block dag-cbor
/ipfs/bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua
# empty block dag-json
/ipfs/baguqeeraiqjw7i2vwntyuekgvulpp2det2kpwt6cd7tx5ayqybqpmhfk76fa
38 changes: 25 additions & 13 deletions tester/tester.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,22 +133,34 @@ hints:

func (s *Suite) testCID() error {
// rule1
c1 := cid.MustParse("bafybeihvvulpp4evxj7x7armbqcyg6uezzuig6jp3lktpbovlqfkuqeuoq")
c2 := cid.MustParse("bafkreihvvulpp4evxj7x7armbqcyg6uezzuig6jp3lktpbovlqfkuqeuoq")
c3 := cid.MustParse("QmesfgDQ3q6prBy2Kg2gKbW4MAGuWiRP2DVuGA5MZSERLo")

if !s.b.IsCidBlocked(c1) {
return errors.New("testCID: c1 should be blocked (rule1)")
}

if !s.b.IsCidBlocked(c2) {
return errors.New("testCID: c2 should be blocked (rule1)")
blockedCids := []cid.Cid{
cid.MustParse("bafybeihvvulpp4evxj7x7armbqcyg6uezzuig6jp3lktpbovlqfkuqeuoq"),
cid.MustParse("bafkreihvvulpp4evxj7x7armbqcyg6uezzuig6jp3lktpbovlqfkuqeuoq"),
cid.MustParse("QmesfgDQ3q6prBy2Kg2gKbW4MAGuWiRP2DVuGA5MZSERLo"),
}

// rule14
allowCids := []cid.Cid{
cid.MustParse("QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn"),
cid.MustParse("bafyaabakaieac"),
cid.MustParse("bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku"),
cid.MustParse("bafkqaaa"),
cid.MustParse("QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH"),
cid.MustParse("bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua"),
cid.MustParse("baguqeeraiqjw7i2vwntyuekgvulpp2det2kpwt6cd7tx5ayqybqpmhfk76fa"),
Comment on lines +143 to +150
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not I'm not referencing nopfs.SafeCids here so that the testing module remains fully independent from nopfs, as perhaps one day it can test other implementations.

}

for _, c := range blockedCids {
if !s.b.IsCidBlocked(c) {
return fmt.Errorf("testCID: %s should be blocked (rule1)", c)
}
}

if !s.b.IsCidBlocked(c3) {
return errors.New("testCID: c3 should be blocked (rule1)")
for _, c := range allowCids {
if s.b.IsCidBlocked(c) {
return fmt.Errorf("testCID: %s should NOT be blocked (rule14)", c)
}
}

return nil
}

Expand Down