Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion core/commands/dag/dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,10 @@ var DagStatCmd = &cmds.Command{
'ipfs dag stat' fetches a DAG and returns various statistics about it.
Statistics include size and number of blocks.

Note: This command skips duplicate blocks in reporting both size and the number of blocks
Note: Duplicate blocks are identified by content hash (multihash) to reflect
actual disk usage. Identical data referenced via different CIDs is counted
once. 'dag export' uses CID-based keying and may include the same data
multiple times if referenced by different CIDs.
`,
},
Arguments: []cmds.Argument{
Expand Down
14 changes: 9 additions & 5 deletions core/commands/dag/stat.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ import (

mdag "github.com/ipfs/boxo/ipld/merkledag"
"github.com/ipfs/boxo/ipld/merkledag/traverse"
cid "github.com/ipfs/go-cid"
cmds "github.com/ipfs/go-ipfs-cmds"
mh "github.com/multiformats/go-multihash"

"github.com/ipfs/kubo/core/commands/cmdenv"
"github.com/ipfs/kubo/core/commands/cmdutils"
"github.com/ipfs/kubo/core/commands/e"
Expand All @@ -26,7 +27,10 @@ func dagStat(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment)
}
nodeGetter := mdag.NewSession(req.Context, api.Dag())

cidSet := cid.NewSet()
// Use multihash set for deduplication to reflect actual storage.
// Since Kubo v0.12.0, blocks are stored by multihash, so identical
// data with different CIDs (e.g., CIDv0 vs CIDv1) is stored once.
mhSet := mh.NewSet()
dagStatSummary := &DagStatSummary{DagStatsArray: []*DagStat{}}
for _, a := range req.Arguments {
p, err := cmdutils.PathOrCidPath(a)
Expand Down Expand Up @@ -54,11 +58,11 @@ func dagStat(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment)
currentNodeSize := uint64(len(current.Node.RawData()))
dagstats.Size += currentNodeSize
dagstats.NumBlocks++
if !cidSet.Has(current.Node.Cid()) {
// Visit returns true if this multihash was not seen before
if mhSet.Visit(current.Node.Cid().Hash()) {
dagStatSummary.incrementTotalSize(currentNodeSize)
}
dagStatSummary.incrementRedundantSize(currentNodeSize)
cidSet.Add(current.Node.Cid())
if progressive {
if err := res.Emit(dagStatSummary); err != nil {
return err
Expand All @@ -74,7 +78,7 @@ func dagStat(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment)
}
}

dagStatSummary.UniqueBlocks = cidSet.Len()
dagStatSummary.UniqueBlocks = mhSet.Len()
dagStatSummary.calculateSummary()

if err := res.Emit(dagStatSummary); err != nil {
Expand Down
7 changes: 6 additions & 1 deletion docs/changelogs/v0.40.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ This release was brought to you by the [Shipyard](https://ipshipyard.com/) team.
- [Overview](#overview)
- [🔦 Highlights](#-highlights)
- [Routing V1 HTTP API now exposed by default](#routing-v1-http-api-now-exposed-by-default)
- [Track total size when adding pins](#track-total-size-when-adding-pins]
- [Track total size when adding pins](#track-total-size-when-adding-pins)
- [Fixed `ipfs dag stat` block counting](#fixed-ipfs-dag-stat-block-counting)
- [📝 Changelog](#-changelog)
- [👨‍👩‍👧‍👦 Contributors](#-contributors)

Expand All @@ -32,6 +33,10 @@ Example output:
Fetched/Processed 336 nodes (83 MB)
```

#### Fixed `ipfs dag stat` block counting

Since Kubo v0.12.0, blocks are stored by multihash, so the same data is stored only once regardless of which CID references it. The `dag stat` command now reflects actual storage by deduplicating blocks by content hash (e.g., data referenced via both CIDv0 and CIDv1 is counted once). See `ipfs dag stat --help` for more details.

### 📝 Changelog

### 👨‍👩‍👧‍👦 Contributors
21 changes: 21 additions & 0 deletions test/cli/dag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,27 @@ func TestDag(t *testing.T) {
stat := node.RunIPFS("dag", "stat", "--progress=false", node1Cid, node2Cid)
assert.Equal(t, content, stat.Stdout.Bytes())
})

t.Run("dag stat deduplicates by multihash", func(t *testing.T) {
t.Parallel()
node := harness.NewT(t).NewNode().Init().StartDaemon()

// Add content and get CIDv0 with dag-pb (not raw leaves)
cidV0 := node.IPFSAddStr("hello world", "--cid-version=0", "--raw-leaves=false")

// Convert to CIDv1 (same multihash, different CID)
cidV1 := node.IPFS("cid", "format", "-v", "1", "-b", "base32", cidV0).Stdout.Trimmed()

// Run dag stat with both CIDs - should deduplicate by multihash
stat := node.RunIPFS("dag", "stat", "--progress=false", "--enc=json", cidV0, cidV1)
var data Data
err := json.Unmarshal(stat.Stdout.Bytes(), &data)
require.NoError(t, err)

// Same block referenced via CIDv0 and CIDv1 should be counted once
assert.Equal(t, 1, data.UniqueBlocks, "same data via different CIDs should be 1 unique block")
assert.Equal(t, 2.0, data.Ratio, "ratio should be 2.0 (2 refs to 1 block)")
})
}

func TestDagImportFastProvide(t *testing.T) {
Expand Down
Loading