Skip to content

Commit

Permalink
Bugfix/parquet inventory failure (#1979)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnnyaug authored May 23, 2021
1 parent eefe10c commit 36eb6ae
Showing 1 changed file with 18 additions and 7 deletions.
25 changes: 18 additions & 7 deletions pkg/cloud/aws/s3inventory/parquet_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,31 @@ func (p *ParquetInventoryFileReader) Close() error {
return p.PFile.Close()
}

func (p *ParquetInventoryFileReader) getKeyColumnStatistics() *parquet.Statistics {
for i, c := range p.Footer.RowGroups[0].Columns {
if c.MetaData.PathInSchema[len(c.GetMetaData().GetPathInSchema())-1] == "Key" {
return p.Footer.RowGroups[0].Columns[i].GetMetaData().GetStatistics()
func (p *ParquetInventoryFileReader) getKeyColumnStatistics(rowGroupIdx int) *parquet.Statistics {
columns := p.Footer.RowGroups[rowGroupIdx].Columns
for _, c := range columns {
metaData := c.GetMetaData()
if metaData.GetPathInSchema()[len(metaData.GetPathInSchema())-1] == "Key" {
return metaData.GetStatistics()
}
}
return p.Footer.RowGroups[0].Columns[1].GetMetaData().GetStatistics()
return columns[1].GetMetaData().GetStatistics()
}

func (p *ParquetInventoryFileReader) FirstObjectKey() string {
return string(p.getKeyColumnStatistics().GetMin())
statistics := p.getKeyColumnStatistics(0)
if len(statistics.GetMin()) > 0 {
return string(statistics.GetMin())
}
return string(statistics.GetMinValue())
}

func (p *ParquetInventoryFileReader) LastObjectKey() string {
return string(p.getKeyColumnStatistics().GetMax())
statistics := p.getKeyColumnStatistics(len(p.Footer.RowGroups) - 1)
if len(statistics.GetMax()) > 0 {
return string(statistics.GetMax())
}
return string(statistics.GetMinValue())
}

func (p *ParquetInventoryFileReader) Read(n int) ([]*InventoryObject, error) {
Expand Down

0 comments on commit 36eb6ae

Please sign in to comment.