Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/backend/access/heap/heapam.c
Original file line number Diff line number Diff line change
Expand Up @@ -8945,6 +8945,17 @@ heap_xlog_visible(XLogReaderState *record)

PageSetAllVisible(page);

/*
* NEON: despite to the comment above we need to update page LSN here.
* See discussion at hackers: https://www.postgresql.org/message-id/flat/039076d4f6cdd871691686361f83cb8a6913a86a.camel%40j-davis.com#101ba42b004f9988e3d54fce26fb3462
* For Neon this assignment is critical because otherwise last written LSN tracked at compute doesn't
* match with page LSN assignee by WAL-redo and as a result, prefetched page is rejected.
*
* It is fixed in upstream in https://github.com/neondatabase/postgres/commit/7bf713dd2d0739fbcd4103971ed69c17ebe677ea
* but until it is merged we still need to carry a patch here.
*/
PageSetLSN(page, lsn);

MarkBufferDirty(buffer);
}
else if (action == BLK_RESTORED)
Expand Down
26 changes: 9 additions & 17 deletions src/backend/access/transam/xlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ typedef struct LastWrittenLsnCacheEntry


/*
* Cache of last written LSN for each relation chunk (hash bucket).
* Cache of last written LSN for each relation page.
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
* relation metadata update.
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
Expand Down Expand Up @@ -606,8 +606,6 @@ static WALInsertLockPadded *WALInsertLocks = NULL;
*/
static ControlFileData *ControlFile = NULL;

#define LAST_WRITTEN_LSN_CACHE_BUCKET 1024 /* blocks = 8Mb */

/*
* Calculate the amount of space left on the page after 'endptr'. Beware
* multiple evaluation!
Expand Down Expand Up @@ -6100,7 +6098,7 @@ GetInsertRecPtr(void)
* It returns an upper bound for the last written LSN of a given page,
* either from a cached last written LSN or a global maximum last written LSN.
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
* If cache is large enough ,iterting through all hash items may be rather expensive.
* If cache is large enough, iterating through all hash items may be rather expensive.
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
*/
XLogRecPtr
Expand All @@ -6119,7 +6117,7 @@ GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
BufferTag key;
key.rnode = rnode;
key.forkNum = forknum;
key.blockNum = blkno / LAST_WRITTEN_LSN_CACHE_BUCKET;
key.blockNum = blkno;
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
if (entry != NULL)
lsn = entry->lsn;
Expand All @@ -6143,9 +6141,9 @@ GetLastWrittenLSN(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno)
/*
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
* We maintain cache of last written LSNs with limited size and LRU replacement
* policy. To reduce cache size we store max LSN not for each page, but for
* bucket (1024 blocks). This cache allows to use old LSN when
* requesting pages of unchanged or appended relations.
* policy. Keeping last written LSN for each page allows to use old LSN when
* requesting pages of unchanged or appended relations. Also it is critical for
* efficient work of prefetch in case massive update operations (like vacuum or remove).
*
* rnode.relNode can be InvalidOid, in this case maxLastWrittenLsn is updated.
* SetLastWrittenLsn with dummy rnode is used by createdb and dbase_redo functions.
Expand All @@ -6167,19 +6165,13 @@ SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileNode rnode, ForkNumber for
LastWrittenLsnCacheEntry* entry;
BufferTag key;
bool found;
BlockNumber bucket;
BlockNumber start_bucket; /* inclusive */
BlockNumber end_bucket; /* exclusive */

start_bucket = from / LAST_WRITTEN_LSN_CACHE_BUCKET;
end_bucket = from == REL_METADATA_PSEUDO_BLOCKNO
? start_bucket + 1 : (from + n_blocks + LAST_WRITTEN_LSN_CACHE_BUCKET - 1) / LAST_WRITTEN_LSN_CACHE_BUCKET;
BlockNumber i;

key.rnode = rnode;
key.forkNum = forknum;
for (bucket = start_bucket; bucket < end_bucket; bucket++)
for (i = 0; i < n_blocks; i++)
{
key.blockNum = bucket;
key.blockNum = from + i;
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
if (found)
{
Expand Down
4 changes: 2 additions & 2 deletions src/backend/utils/misc/guc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2452,11 +2452,11 @@ static struct config_int ConfigureNamesInt[] =

{
{"lsn_cache_size", PGC_POSTMASTER, UNGROUPED,
gettext_noop("Size of las written LSN cache used by Neon."),
gettext_noop("Size of last written LSN cache used by Neon."),
NULL
},
&lastWrittenLsnCacheSize,
1024, 10, 1000000, /* 1024 is enough to hold 10GB database with 8Mb bucket */
128*1024, 1024, INT_MAX,
NULL, NULL, NULL
},

Expand Down