-
Notifications
You must be signed in to change notification settings - Fork 499
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
services/horizon: Reap history object tables when ingestion is idle (#…
…4518) While Horizon removes history data when `--history-retention-count` flag is set it doesn't clear lookup historical tables. Lookup tables are `[id, key name]` pairs that allow setting pointers to keys in historical tables, thus saving disk space. This data can occupy a vast space on disk and is never used when old historical data is deleted. This commit adds code responsible for clearing orphaned rows in lookup historical tables. Orphaned rows can appear when old data is removed by reaper. The new code is separate from the existing reaper code (see "Alternative solutions" below) and activates after each ledger if there are no more ledgers to ingest in the backend. This has two advantages: it does not slow down catchup and it works only when ingestion is idle which shouldn't affect ingestion at all. To ensure performance is not affected, the `ReapLookupTables` method is called with context with 5 seconds timeout which means that if it does not finish the work in specified time it will simply be cancelled. The solution here requires new indexes added in c2d52f0 (without it finding the rows to delete is slow). For each lookup table, we check the number of occurences of a given lookup ID in all the tables in which lookup table is used. If no occurences are found, the row is removed from a lookup table. Rows are removed in batches of 10000 rows (can be modified in the future). The cursor is updated when tables is processed so after next ledger ingesion the next chunk of rows is checked. When cursor reaches the end of table it is reset back to 0. This ensures that all the orphaned rows are removed eventually (some rows can be skipped because new rows are added to lookup tables by ingestion and some are removed by reaper so `offset` does not always skip to the place is should to cover entire table). #### Alternative solutions While working on this I tried to implement @fons'es idea from #4396 which was removing rows before clearing historical data which are not present in other ranges. There is a general problem with this solution. The lookup tables are actively used by ingestion which means that if rows are deleted while ingestion reads a given row it can create inconsistent data. We could modify reaper to aquire ingestion lock but if there are many ledgers to remove it can affect ingestion. We could also write a query that finds and removes all the orphaned rows but it's too slow to be executed between ingestion of two consecutive ledgers.
- Loading branch information
Showing
9 changed files
with
389 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package history_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stellar/go/services/horizon/internal/db2/history" | ||
"github.com/stellar/go/services/horizon/internal/ledger" | ||
"github.com/stellar/go/services/horizon/internal/reap" | ||
"github.com/stellar/go/services/horizon/internal/test" | ||
) | ||
|
||
func TestReapLookupTables(t *testing.T) { | ||
tt := test.Start(t) | ||
defer tt.Finish() | ||
ledgerState := &ledger.State{} | ||
ledgerState.SetStatus(tt.Scenario("kahuna")) | ||
|
||
db := tt.HorizonSession() | ||
|
||
sys := reap.New(0, db, ledgerState) | ||
|
||
var ( | ||
prevLedgers, curLedgers int | ||
prevClaimableBalances, curClaimableBalances int | ||
prevLiquidityPools, curLiquidityPools int | ||
) | ||
|
||
// Prev | ||
{ | ||
err := db.GetRaw(tt.Ctx, &prevLedgers, `SELECT COUNT(*) FROM history_ledgers`) | ||
tt.Require.NoError(err) | ||
err = db.GetRaw(tt.Ctx, &prevClaimableBalances, `SELECT COUNT(*) FROM history_claimable_balances`) | ||
tt.Require.NoError(err) | ||
err = db.GetRaw(tt.Ctx, &prevLiquidityPools, `SELECT COUNT(*) FROM history_liquidity_pools`) | ||
tt.Require.NoError(err) | ||
} | ||
|
||
ledgerState.SetStatus(tt.LoadLedgerStatus()) | ||
sys.RetentionCount = 1 | ||
err := sys.DeleteUnretainedHistory(tt.Ctx) | ||
tt.Require.NoError(err) | ||
|
||
q := &history.Q{tt.HorizonSession()} | ||
|
||
err = q.Begin() | ||
tt.Require.NoError(err) | ||
|
||
newOffsets, err := q.ReapLookupTables(tt.Ctx, nil) | ||
tt.Require.NoError(err) | ||
|
||
err = q.Commit() | ||
tt.Require.NoError(err) | ||
|
||
// cur | ||
{ | ||
err := db.GetRaw(tt.Ctx, &curLedgers, `SELECT COUNT(*) FROM history_ledgers`) | ||
tt.Require.NoError(err) | ||
err = db.GetRaw(tt.Ctx, &curClaimableBalances, `SELECT COUNT(*) FROM history_claimable_balances`) | ||
tt.Require.NoError(err) | ||
err = db.GetRaw(tt.Ctx, &curLiquidityPools, `SELECT COUNT(*) FROM history_liquidity_pools`) | ||
tt.Require.NoError(err) | ||
} | ||
|
||
tt.Assert.Equal(61, prevLedgers, "prevLedgers") | ||
tt.Assert.Equal(1, curLedgers, "curLedgers") | ||
tt.Assert.Equal(1, prevClaimableBalances, "prevClaimableBalances") | ||
tt.Assert.Equal(0, curClaimableBalances, "curClaimableBalances") | ||
tt.Assert.Equal(1, prevLiquidityPools, "prevLiquidityPools") | ||
tt.Assert.Equal(0, curLiquidityPools, "curLiquidityPools") | ||
|
||
tt.Assert.Len(newOffsets, 2) | ||
tt.Assert.Equal(int64(0), newOffsets["history_claimable_balances"]) | ||
tt.Assert.Equal(int64(0), newOffsets["history_liquidity_pools"]) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.