Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor nbtsort-xx.c #110

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions lib/nbtree/nbtsort-10.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ typedef struct BTPageState
OffsetNumber btps_lastoff; /* last item offset loaded */
uint32 btps_level; /* tree level (0 = leaf) */
Size btps_full; /* "full" if less than this much free space */
struct BTPageState *btps_next; /* link to parent level, if any */
struct BTPageState *btps_next; /* link to parent level, if any */
} BTPageState;

/*
Expand All @@ -122,8 +122,8 @@ typedef struct BTWriteState
Relation heap;
Relation index;
bool btws_use_wal; /* dump pages to WAL? */
BlockNumber btws_pages_alloced; /* # pages allocated */
BlockNumber btws_pages_written; /* # pages written out */
BlockNumber btws_pages_alloced; /* # pages allocated */
BlockNumber btws_pages_written; /* # pages written out */
Page btws_zeropage; /* workspace for filling zeroes */
} BTWriteState;

Expand Down Expand Up @@ -208,7 +208,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
ShowUsage("BTREE BUILD (Spool) STATISTICS");
ResetUsage();
}
#endif /* BTREE_BUILD_STATS */
#endif /* BTREE_BUILD_STATS */

tuplesort_performsort(btspool->sortstate);
if (btspool2)
Expand Down Expand Up @@ -345,7 +345,7 @@ _bt_pagestate(BTWriteState *wstate, uint32 level)
state->btps_full = (BLCKSZ * (100 - BTREE_NONLEAF_FILLFACTOR) / 100);
else
state->btps_full = RelationGetTargetPageFreeSpace(wstate->index,
BTREE_DEFAULT_FILLFACTOR);
BTREE_DEFAULT_FILLFACTOR);
/* no parent level, yet */
state->btps_next = NULL;

Expand Down Expand Up @@ -485,12 +485,12 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)
if (itupsz > BTMaxItemSize(npage))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
itupsz, BTMaxItemSize(npage),
RelationGetRelationName(wstate->index)),
errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
"Consider a function index of an MD5 hash of the value, "
"or use full text indexing."),
errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
itupsz, BTMaxItemSize(npage),
RelationGetRelationName(wstate->index)),
errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
"Consider a function index of an MD5 hash of the value, "
"or use full text indexing."),
errtableconstraint(wstate->heap,
RelationGetRelationName(wstate->index))));

Expand Down Expand Up @@ -566,7 +566,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup)

oopaque->btpo_next = nblkno;
nopaque->btpo_prev = oblkno;
nopaque->btpo_next = P_NONE; /* redundant */
nopaque->btpo_next = P_NONE; /* redundant */
}

/*
Expand Down Expand Up @@ -695,7 +695,7 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
*/

/* the preparation of merge */
itup = tuplesort_getindextuple(btspool->sortstate,true);
itup = tuplesort_getindextuple(btspool->sortstate, true);
itup2 = tuplesort_getindextuple(btspool2->sortstate, true);
indexScanKey = _bt_mkscankey_nodata(wstate->index);

Expand Down
112 changes: 67 additions & 45 deletions lib/nbtree/nbtsort-11.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,6 @@
* its parent level. When we have only one page on a level, it must be
* the root -- it can be attached to the btree metapage and we are done.
*
* This code is moderately slow (~10% slower) compared to the regular
* btree (insertion) build code on sorted or well-clustered data. On
* random data, however, the insertion build code is unusable -- the
* difference on a 60MB heap is a factor of 15 because the random
* probes into the btree thrash the buffer pool. (NOTE: the above
* "10%" estimate is probably obsolete, since it refers to an old and
* not very good external sort implementation that used to exist in
* this module. tuplesort.c is almost certainly faster.)
*
* It is not wise to pack the pages entirely full, since then *any*
* insertion would cause a split (and not only of the leaf page; the need
* for a split would cascade right up the tree). The steady-state load
Expand Down Expand Up @@ -73,6 +64,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/smgr.h"
Expand All @@ -87,6 +79,7 @@
#define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002)
#define PARALLEL_KEY_TUPLESORT_SPOOL2 UINT64CONST(0xA000000000000003)
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004)
#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xA000000000000005)

/*
* DISABLE_LEADER_PARTICIPATION disables the leader's participation in
Expand Down Expand Up @@ -201,6 +194,7 @@ typedef struct BTLeader
Sharedsort *sharedsort;
Sharedsort *sharedsort2;
Snapshot snapshot;
BufferUsage *bufferusage;
} BTLeader;

/*
Expand Down Expand Up @@ -496,34 +490,6 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate,
return reltuples;
}

/*
* create and initialize a spool structure
*/
static BTSpool *
_bt_spoolinit(Relation heap, Relation index, bool isunique, bool isdead)
{
BTSpool *btspool = (BTSpool *) palloc0(sizeof(BTSpool));
int btKbytes;

btspool->heap = heap;
btspool->index = index;
btspool->isunique = isunique;

/*
* We size the sort area as maintenance_work_mem rather than work_mem to
* speed index creation. This should be OK since a single backend can't
* run multiple index creations in parallel. Note that creation of a
* unique index actually requires two BTSpool objects. We expect that the
* second one (for dead tuples) won't get very full, so we give it only
* work_mem.
*/
btKbytes = isdead ? work_mem : maintenance_work_mem;
btspool->sortstate = tuplesort_begin_index_btree(heap, index, isunique,
btKbytes, NULL, false);

return btspool;
}

/*
* clean up a spool structure and its substructures.
*/
Expand Down Expand Up @@ -1277,8 +1243,8 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Sharedsort *sharedsort2;
BTSpool *btspool = buildstate->spool;
BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader));
BufferUsage *bufferusage;
bool leaderparticipates = true;
char *sharedquery;
int querylen;

#ifdef DISABLE_LEADER_PARTICIPATION
Expand All @@ -1293,6 +1259,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
Assert(request > 0);
pcxt = CreateParallelContext("postgres", "_bt_parallel_build_main",
request, true);

scantuplesortstates = leaderparticipates ? request + 1 : request;

/*
Expand Down Expand Up @@ -1328,14 +1295,40 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
shm_toc_estimate_keys(&pcxt->estimator, 3);
}

/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
querylen = strlen(debug_query_string);
shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
/*
* Estimate space for BufferUsage -- PARALLEL_KEY_BUFFER_USAGE.
*
* If there are no extensions loaded that care, we could skip this. We
* have no way of knowing whether anyone's looking at pgBufferUsage, so do
* it unconditionally.
*/
shm_toc_estimate_chunk(&pcxt->estimator,
mul_size(sizeof(BufferUsage), pcxt->nworkers));
shm_toc_estimate_keys(&pcxt->estimator, 1);

/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
if (debug_query_string)
{
querylen = strlen(debug_query_string);
shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
shm_toc_estimate_keys(&pcxt->estimator, 1);
}
else
querylen = 0; /* keep compiler quiet */

/* Everyone's had a chance to ask for space, so now create the DSM */
InitializeParallelDSM(pcxt);

/* If no DSM segment was available, back out (do serial build) */
if (pcxt->seg == NULL)
{
if (IsMVCCSnapshot(snapshot))
UnregisterSnapshot(snapshot);
DestroyParallelContext(pcxt);
ExitParallelMode();
return;
}

/* Store shared build state, for which we reserved space */
btshared = (BTShared *) shm_toc_allocate(pcxt->toc, estbtshared);
/* Initialize immutable state */
Expand Down Expand Up @@ -1383,9 +1376,19 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
}

/* Store query string for workers */
sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
memcpy(sharedquery, debug_query_string, querylen + 1);
shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
if (debug_query_string)
{
char *sharedquery;

sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
memcpy(sharedquery, debug_query_string, querylen + 1);
shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
}

/* Allocate space for each worker's BufferUsage; no need to initialize */
bufferusage = shm_toc_allocate(pcxt->toc,
mul_size(sizeof(BufferUsage), pcxt->nworkers));
shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);

/* Launch workers, saving status for leader/caller */
LaunchParallelWorkers(pcxt);
Expand All @@ -1397,6 +1400,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
btleader->sharedsort = sharedsort;
btleader->sharedsort2 = sharedsort2;
btleader->snapshot = snapshot;
btleader->bufferusage = bufferusage;

/* If no workers were successfully launched, back out (do serial build) */
if (pcxt->nworkers_launched == 0)
Expand Down Expand Up @@ -1425,8 +1429,18 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request)
static void
_bt_end_parallel(BTLeader *btleader)
{
int i;

/* Shutdown worker processes */
WaitForParallelWorkersToFinish(btleader->pcxt);

/*
* Next, accumulate buffer usage. (This must wait for the workers to
* finish, or we might get incomplete data.)
*/
for (i = 0; i < btleader->pcxt->nworkers_launched; i++)
InstrAccumParallelQuery(&btleader->bufferusage[i]);

/* Free last reference to MVCC snapshot, if one was used */
if (IsMVCCSnapshot(btleader->snapshot))
UnregisterSnapshot(btleader->snapshot);
Expand Down Expand Up @@ -1563,6 +1577,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
Relation indexRel;
LOCKMODE heapLockmode;
LOCKMODE indexLockmode;
BufferUsage *bufferusage;
int sortmem;

#ifdef BTREE_BUILD_STATS
Expand All @@ -1571,7 +1586,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
#endif /* BTREE_BUILD_STATS */

/* Set debug_query_string for individual workers first */
sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, false);
sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
debug_query_string = sharedquery;

/* Report the query string from leader */
Expand Down Expand Up @@ -1624,11 +1639,18 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc)
tuplesort_attach_shared(sharedsort2, seg);
}

/* Prepare to track buffer usage during parallel execution */
InstrStartParallelQuery();

/* Perform sorting of spool, and possibly a spool2 */
sortmem = maintenance_work_mem / btshared->scantuplesortstates;
_bt_parallel_scan_and_sort(btspool, btspool2, btshared, sharedsort,
sharedsort2, sortmem);

/* Report buffer usage during parallel execution */
bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber]);

#ifdef BTREE_BUILD_STATS
if (log_btree_build_stats)
{
Expand Down
Loading