-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: added size based retention policy #2098
Changes from 5 commits
0b7e863
9fedc9f
675687c
d7140dd
c026cc9
5022d3e
70781ad
3a71923
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,98 @@ | ||||||
when (NimMajor, NimMinor) < (1, 4): | ||||||
{.push raises: [Defect].} | ||||||
else: | ||||||
{.push raises: [].} | ||||||
|
||||||
import | ||||||
std/times, | ||||||
stew/results, | ||||||
chronicles, | ||||||
chronos, | ||||||
os | ||||||
import | ||||||
../driver, | ||||||
../retention_policy | ||||||
|
||||||
logScope: | ||||||
topics = "waku archive retention_policy" | ||||||
|
||||||
# default size is 30 Gb | ||||||
const DefaultRetentionSize*: float = 30_720 | ||||||
|
||||||
# to remove 20% of the outdated data from database | ||||||
const DeleteLimit = 0.80 | ||||||
|
||||||
type | ||||||
# SizeRetentionPolicy implements auto delete as follows: | ||||||
# - sizeLimit is the size in megabytes (Mbs) the database can grow upto | ||||||
# to reduce the size of the databases, remove the rows/number-of-messages | ||||||
# DeleteLimit is the total number of messages to delete beyond this limit | ||||||
# when the database size crosses the sizeLimit, then only a fraction of messages are kept, | ||||||
# rest of the outdated message are deleted using deleteOldestMessagesNotWithinLimit(), | ||||||
# upon deletion process the fragmented space is retrieve back using Vacuum process. | ||||||
SizeRetentionPolicy* = ref object of RetentionPolicy | ||||||
sizeLimit: float | ||||||
|
||||||
proc init*(T: type SizeRetentionPolicy, size=DefaultRetentionSize): T = | ||||||
SizeRetentionPolicy( | ||||||
sizeLimit: size | ||||||
) | ||||||
|
||||||
method execute*(p: SizeRetentionPolicy, | ||||||
driver: ArchiveDriver): | ||||||
Future[RetentionPolicyResult[void]] {.async.} = | ||||||
## when db size overshoots the database limit, shread 20% of outdated messages | ||||||
|
||||||
# to get the size of the database, pageCount and PageSize is required | ||||||
# get page count in "messages" database | ||||||
var pageCount = (await driver.getPagesCount()).valueOr: | ||||||
return err("failed to get Pages count: " & $error) | ||||||
|
||||||
# get page size of database | ||||||
var pageSizeRes = await driver.getPagesSize() | ||||||
var pageSize: int64 = int64(pageSizeRes.valueOr(0) div 1024) | ||||||
|
||||||
if pageSize == 0: | ||||||
return err("failed to get Page size: " & pageSizeRes.error) | ||||||
|
||||||
# database size in megabytes (Mb) | ||||||
var totalSizeOfDB: float = float(pageSize * pageCount)/1024.0 | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (and elsewhere) |
||||||
|
||||||
# check if current databse size crosses the db size limit | ||||||
if totalSizeOfDB < p.sizeLimit: | ||||||
return ok() | ||||||
|
||||||
# keep deleting until the current db size falls within size limit | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see in this function a pattern that is usually solved with a Since nim doesnt have "do while" I would suggest: while true:
...(get page get count)
var totalSizeOfDB: float = float(pageSize * pageCount)/1024.0
if totalSizeOfDB > p.sizeLimit:
break
let res = await driver.deleteOldestMessagesNotWithinLimit(limit=pageDeleteWindow)
if res.isErr():
return err("deleting oldest messages failed: " & res.error) So with something like this you just need 1 single call to |
||||||
while totalSizeOfDB > p.sizeLimit: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This logic looks great to me! However, given the possible issue of stepping into an infinite loop, I think we should have a PR in the near future where we prevent that possible blocking issue. For example, if the measured There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I do not like loops in the code, what I can imagine we can do is, since we have a max size/threshold of the database, we can have a lower threshold of the database as well, then inside the loop, we introduce a notation, i.e. max number of times the loop should run so that the size of the database will reduce to the lowest, Here: For eg., in the case of 30720 MBs(30 GB) as max size, 0.1 MBs as a lower threshold, and 0.80 (80% of the db pages retained) as a fraction of the database retained upon each iteration. We get 57 iterations. SO if we use this then there is a guarantee that the loop will not cross 57 times at max. This way there is a good chance that the forever loop is not encountered. WDYT? @Ivansete-status @alrevuelta There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I vote to follow the simplest approach. However it is fine for this PR for now and we can revisit that in a separate PR in the near future. |
||||||
# to shread/delete messsges, get the total row/message count | ||||||
let numMessagesRes = await driver.getMessagesCount() | ||||||
if numMessagesRes.isErr(): | ||||||
return err("failed to get messages count: " & numMessagesRes.error) | ||||||
let numMessages = numMessagesRes.value | ||||||
|
||||||
# 80% of the total messages are to be kept, delete others | ||||||
let pageDeleteWindow = int(float(numMessages) * DeleteLimit) | ||||||
|
||||||
let res = await driver.deleteOldestMessagesNotWithinLimit(limit=pageDeleteWindow) | ||||||
if res.isErr(): | ||||||
return err("deleting oldest messages failed: " & res.error) | ||||||
|
||||||
# vacuum to get the deleted pages defragments to save storage space | ||||||
# this will resize the database size | ||||||
let resVaccum = await driver.performVacuum() | ||||||
if resVaccum.isErr(): | ||||||
return err("vacuumming failed: " & resVaccum.error) | ||||||
|
||||||
# get the db size again for the loop condition check | ||||||
pageCount = (await driver.getPagesCount()).valueOr: | ||||||
return err("failed to get Pages count: " & $error) | ||||||
|
||||||
pageSizeRes = await driver.getPagesSize() | ||||||
pageSize = int64(pageSizeRes.valueOr(0) div 1024) | ||||||
|
||||||
if pageSize == 0: | ||||||
return err("failed to get Page size: " & pageSizeRes.error) | ||||||
|
||||||
totalSizeOfDB = float(pageSize * pageCount)/1024.0 | ||||||
|
||||||
return ok() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unless you have a good reason to modify a variable later on, it should be final assigned with
let
. A good rule of thumb is to always uselet
, unless you are forced to create a variable that needs to be modifiable.