Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[refer #111] Persist logical replication files in WAL #306

Merged
merged 9 commits into from
Oct 18, 2023
39 changes: 38 additions & 1 deletion src/backend/access/heap/rewriteheap.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/message.h"
#include "replication/slot.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
Expand Down Expand Up @@ -785,6 +786,36 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
* ------------------------------------------------------------------------
*/

/*
* NEON: we need to persist mapping file in WAL
*/
static void
wallog_mapping_file(char const* path, int fd)
{
char prefix[MAXPGPATH];
snprintf(prefix, sizeof(prefix), "neon-file:%s", path);
if (fd < 0)
{
elog(DEBUG1, "neon: deleting contents of rewrite file %s", path);
/* unlink file */
LogLogicalMessage(prefix, NULL, 0, false);
}
else
{
off_t size = lseek(fd, 0, SEEK_END);
char* buf;
elog(DEBUG1, "neon: writing contents of rewrite file %s, size %ld", path, size);
if (size < 0)
elog(ERROR, "Failed to get size of mapping file: %m");
buf = palloc((size_t)size);
lseek(fd, 0, SEEK_SET);
if (read(fd, buf, (size_t)size) != size)
elog(ERROR, "Failed to read mapping file: %m");
LogLogicalMessage(prefix, buf, (size_t)size, false);
pfree(buf);
}
}

/*
* Do preparations for logging logical mappings during a rewrite if
* necessary. If we detect that we don't need to log anything we'll prevent
Expand Down Expand Up @@ -920,6 +951,7 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
errmsg("could not write to file \"%s\", wrote %d of %d: %m", src->path,
written, len)));
src->off += len;
wallog_mapping_file(src->path, FileGetRawDesc(src->vfd));

XLogBeginInsert();
XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
Expand Down Expand Up @@ -1006,7 +1038,7 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
src->off = 0;
memcpy(src->path, path, sizeof(path));
src->vfd = PathNameOpenFile(path,
O_CREAT | O_EXCL | O_WRONLY | PG_BINARY);
O_CREAT | O_EXCL | O_RDWR | PG_BINARY);
if (src->vfd < 0)
ereport(ERROR,
(errcode_for_file_access(),
Expand Down Expand Up @@ -1172,6 +1204,8 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
errmsg("could not fsync file \"%s\": %m", path)));
pgstat_report_wait_end();

wallog_mapping_file(path, fd);

if (CloseTransientFile(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
Expand Down Expand Up @@ -1247,6 +1281,7 @@ CheckPointLogicalRewriteHeap(void)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not remove file \"%s\": %m", path)));
wallog_mapping_file(path, -1);
}
else
{
Expand Down Expand Up @@ -1275,6 +1310,8 @@ CheckPointLogicalRewriteHeap(void)
errmsg("could not fsync file \"%s\": %m", path)));
pgstat_report_wait_end();

wallog_mapping_file(path, fd);

if (CloseTransientFile(fd) != 0)
ereport(ERROR,
(errcode_for_file_access(),
Expand Down
35 changes: 30 additions & 5 deletions src/backend/access/transam/xlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@ static void CreateEndOfRecoveryRecord(void);
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn,
XLogRecPtr missingContrecPtr,
TimeLineID newTLI);
static void PreCheckPointGuts(int flags);
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags);
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo);
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void);
Expand Down Expand Up @@ -6684,6 +6685,11 @@ CreateCheckPoint(int flags)
*/
SyncPreCheckpoint();

/*
* NEON: perform checkpiont action requiring write to the WAL before we determine the REDO pointer.
*/
PreCheckPointGuts(flags);

/*
* Use a critical section to force system panic if we have trouble.
*/
Expand Down Expand Up @@ -7191,6 +7197,28 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr,
return recptr;
}

static void
CheckPointReplicationState(void)
{
CheckPointRelationMap();
CheckPointReplicationSlots();
CheckPointSnapBuild();
CheckPointLogicalRewriteHeap();
CheckPointReplicationOrigin();
}

/*
* NEON: we use logical records to persist information of about slots, origins, relation map...
* If it is done inside shutdown checkpoint, then Postgres panics: "concurrent write-ahead log activity while database system is shutting down"
* So it before checkpoint REDO position is determined.
*/
static void
PreCheckPointGuts(int flags)
{
if (flags & CHECKPOINT_IS_SHUTDOWN)
CheckPointReplicationState();
}

/*
* Flush all data in shared memory to disk, and fsync
*
Expand All @@ -7200,11 +7228,8 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr,
static void
CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
{
CheckPointRelationMap();
CheckPointReplicationSlots();
CheckPointSnapBuild();
CheckPointLogicalRewriteHeap();
CheckPointReplicationOrigin();
if (!(flags & CHECKPOINT_IS_SHUTDOWN))
CheckPointReplicationState();

/* Write out all dirty data in SLRUs and the main buffer pool */
TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
Expand Down
19 changes: 19 additions & 0 deletions src/backend/replication/logical/origin.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
#include "nodes/execnodes.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/message.h"
#include "replication/origin.h"
#include "storage/condition_variable.h"
#include "storage/copydir.h"
Expand Down Expand Up @@ -562,10 +563,14 @@ CheckPointReplicationOrigin(void)
int i;
uint32 magic = REPLICATION_STATE_MAGIC;
pg_crc32c crc;
char *buf;
size_t chkp_size;

if (max_replication_slots == 0)
return;

buf = palloc(sizeof(magic) + max_replication_slots*sizeof(ReplicationStateOnDisk) + sizeof(crc));

INIT_CRC32C(crc);

/* make sure no old temp file is remaining */
Expand Down Expand Up @@ -599,6 +604,9 @@ CheckPointReplicationOrigin(void)
errmsg("could not write to file \"%s\": %m",
tmppath)));
}
memcpy(buf, &magic, sizeof magic);
chkp_size = sizeof(magic);

COMP_CRC32C(crc, &magic, sizeof(magic));

/* prevent concurrent creations/drops */
Expand Down Expand Up @@ -641,6 +649,8 @@ CheckPointReplicationOrigin(void)
errmsg("could not write to file \"%s\": %m",
tmppath)));
}
memcpy(buf + chkp_size, &disk_state, sizeof(disk_state));
chkp_size += sizeof(disk_state);

COMP_CRC32C(crc, &disk_state, sizeof(disk_state));
}
Expand All @@ -660,6 +670,15 @@ CheckPointReplicationOrigin(void)
errmsg("could not write to file \"%s\": %m",
tmppath)));
}
if (chkp_size != sizeof(magic)) /* has some valid origins */
{
memcpy(buf + chkp_size, &crc, sizeof crc);
chkp_size += sizeof(crc);

/* NEON specific: persist snapshot in storage using logical message */
LogLogicalMessage("neon-file:pg_logical/replorigin_checkpoint", buf, chkp_size, false);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes in this file are actually redundant as repl origins serve the reverse purpose -- tracking progress of replaying logical stream, not serving it. But neither should harm (and would be necessary if we want neon to be logical subscriber).

}
pfree(buf);

if (CloseTransientFile(tmpfd) != 0)
ereport(PANIC,
Expand Down
11 changes: 11 additions & 0 deletions src/backend/replication/logical/snapbuild.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/message.h"
#include "replication/reorderbuffer.h"
#include "replication/snapbuild.h"
#include "storage/block.h" /* debugging output */
Expand Down Expand Up @@ -1599,6 +1600,7 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
int fd;
char tmppath[MAXPGPATH];
char path[MAXPGPATH];
char prefix[MAXPGPATH];
int ret;
struct stat stat_buf;
Size sz;
Expand Down Expand Up @@ -1721,6 +1723,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", tmppath)));

/* NEON specific: persist snapshot in storage using logical message */
snprintf(prefix, sizeof(prefix), "neon-file:%s", path);
LogLogicalMessage(prefix, (char*)ondisk, needed_length, false);

errno = 0;
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_WRITE);
if ((write(fd, ondisk, needed_length)) != needed_length)
Expand Down Expand Up @@ -2027,6 +2033,7 @@ CheckPointSnapBuild(void)
DIR *snap_dir;
struct dirent *snap_de;
char path[MAXPGPATH + 21];
char prefix[MAXPGPATH + 31];

/*
* We start off with a minimum of the last redo pointer. No new
Expand Down Expand Up @@ -2085,6 +2092,10 @@ CheckPointSnapBuild(void)
{
elog(DEBUG1, "removing snapbuild snapshot %s", path);

/* NEON specific: delete file from storage using logical message */
snprintf(prefix, sizeof(prefix), "neon-file:%s", path);
LogLogicalMessage(prefix, NULL, 0, false);

/*
* It's not particularly harmful, though strange, if we can't
* remove the file here. Don't prevent the checkpoint from
Expand Down
19 changes: 19 additions & 0 deletions src/backend/replication/slot.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/slot.h"
#include "replication/message.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/proc.h"
Expand Down Expand Up @@ -683,6 +684,15 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
sprintf(path, "pg_replslot/%s", NameStr(slot->data.name));
sprintf(tmppath, "pg_replslot/%s.tmp", NameStr(slot->data.name));

if (SlotIsLogical(slot))
{
/* NEON specific: delete slot from storage using logical message */
char prefix[MAXPGPATH];
snprintf(prefix, sizeof(prefix), "neon-file:%s/state", path);
elog(LOG, "Drop replication slot %s", path);
LogLogicalMessage(prefix, NULL, 0, false);
}

/*
* Rename the slot directory on disk, so that we'll no longer recognize
* this as a valid slot. Note that if this fails, we've got to mark the
Expand Down Expand Up @@ -1649,6 +1659,15 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
ReplicationSlotOnDiskChecksummedSize);
FIN_CRC32C(cp.checksum);

if (SlotIsLogical(slot) && cp.slotdata.restart_lsn != InvalidXLogRecPtr)
{
/* NEON specific: persist slot in storage using logical message */
char prefix[MAXPGPATH];
snprintf(prefix, sizeof(prefix), "neon-file:%s", path);
elog(LOG, "Save replication slot at %s restart_lsn=%X/%X", path, LSN_FORMAT_ARGS(cp.slotdata.restart_lsn));
LogLogicalMessage(prefix, (char*)&cp, sizeof cp, false);
}

errno = 0;
pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_WRITE);
if ((write(fd, &cp, sizeof(cp))) != sizeof(cp))
Expand Down