Skip to content
This repository has been archived by the owner on May 24, 2024. It is now read-only.

Commit

Permalink
Collect I/O timing stats for AO/AOCO tables (#15243)
Browse files Browse the repository at this point in the history
Collect I/O timing stats for AO/AOCO tables

Reviewed-by: Huansong Fu <fuhuansong@gmail.com>
Reviewed-by: Ashwin Agrawal <aashwin@vmware.com>
  • Loading branch information
l-wang authored Mar 31, 2023
1 parent c5b15cc commit fc081c7
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 7 deletions.
18 changes: 18 additions & 0 deletions src/backend/cdb/cdbbufferedappend.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include "cdb/cdbappendonlyxlog.h"
#include "cdb/cdbbufferedappend.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/faultinjector.h"
#include "utils/guc.h"

static void BufferedAppendWrite(
Expand Down Expand Up @@ -155,12 +157,28 @@ BufferedAppendWrite(BufferedAppend *bufferedAppend, bool needsWAL)
while (bytesleft > 0)
{
int32 byteswritten;
instr_time io_start,
io_time;

if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);

byteswritten = FileWrite(bufferedAppend->file,
(char *) largeWriteMemory + bytestotal,
bytesleft,
bufferedAppend->largeWritePosition + bytestotal,
WAIT_EVENT_DATA_FILE_WRITE);

SIMPLE_FAULT_INJECTOR("ao_storage_write_after_filewrite");

if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
}

if (byteswritten < 0)
ereport(ERROR,
(errcode_for_file_access(),
Expand Down
30 changes: 25 additions & 5 deletions src/backend/cdb/cdbbufferedread.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "cdb/cdbbufferedread.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/faultinjector.h"
#include "utils/guc.h"

static void BufferedReadIo(
Expand Down Expand Up @@ -174,11 +176,28 @@ BufferedReadIo(
offset = 0;
while (largeReadLen > 0)
{
int actualLen = FileRead(bufferedRead->file,
(char *) largeReadMemory,
largeReadLen,
bufferedRead->fileOff,
WAIT_EVENT_DATA_FILE_READ);
int actualLen;
instr_time io_start,
io_time;

if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);

actualLen = FileRead(bufferedRead->file,
(char *) largeReadMemory,
largeReadLen,
bufferedRead->fileOff,
WAIT_EVENT_DATA_FILE_READ);

SIMPLE_FAULT_INJECTOR("ao_storage_read_after_fileread");

if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
INSTR_TIME_SUBTRACT(io_time, io_start);
pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
}

if (actualLen == 0)
ereport(ERROR, (errcode_for_file_access(),
Expand All @@ -202,6 +221,7 @@ BufferedReadIo(
offset,
actualLen,
bufferedRead->largeReadLen)));

bufferedRead->fileOff += actualLen;

elogif(Debug_appendonly_print_read_block, LOG,
Expand Down
43 changes: 42 additions & 1 deletion src/test/isolation2/input/uao/collected_stats_views.source
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@ begin
end; /* in func */
$$ language plpgsql;

-- Test collected heap_blks_* I/O stats for append-optimized tables
-- Setup test table
CREATE TABLE statio_@amname@(a int, b int) USING @amname@;
INSERT INTO statio_@amname@ SELECT 0, i FROM generate_series(1, 100000)i;
-- heap_blks_read should be zero to begin with
1U: SELECT heap_blks_read, heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'statio_@amname@';

INSERT INTO statio_@amname@ SELECT 0, i FROM generate_series(1, 100000)i;
-- heap_blks_read should remain zero after INSERT
SELECT heap_blks_read, heap_blks_hit FROM gp_statio_all_tables WHERE segment_id = 1 AND relname = 'statio_@amname@';

Expand All @@ -36,3 +39,41 @@ SELECT sum(a+b) FROM statio_@amname@;
-- cleanup
DROP TABLE statio_@amname@;
DROP FUNCTION wait_until_heap_blks_read_change_to(table_name text, stat_val_expected bigint);
-- stats should be gone after table is dropped
1U: SELECT heap_blks_read, heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'statio_@amname@';
1Uq:

-- Test collected I/O timing stats for append-optimized tables
1U: SET track_io_timing = on;

-- Setup test table
1U: CREATE TEMP TABLE stat_io_timing(a int, b int) USING @amname@;
1U: INSERT INTO stat_io_timing SELECT 0, i FROM generate_series(1, 100000)i;

-- Sleep for 1s just for once to ensure blk_read_time and blk_write_time in
-- pg_stat_database get updated. The number is selected to be larger than the
-- 1-ms granularity of these two fields.
SELECT gp_inject_fault('ao_storage_read_after_fileread', 'sleep', '', '', '', 1, 1, 1, dbid)
FROM gp_segment_configuration where role = 'p' AND content = 1;
SELECT gp_inject_fault('ao_storage_write_after_filewrite', 'sleep', '', '', '', 1, 1, 1, dbid)
FROM gp_segment_configuration where role = 'p' AND content = 1;

-- start_matchsubs
-- m{I/O Timings: read=\d*\.\d+ write=\d+\.\d+.*}
-- s{I/O Timings: read=\d*\.\d+ write=\d+\.\d+.*}{I/O Timings: read=##.### write=##.###}
-- m{I/O Timings: read=\d*\.\d+.*}
-- s{I/O Timings: read=\d*\.\d+.*}{I/O Timings: read=##.###.*}
-- m/Buffers: shared hit=\d+ read=\d+/
-- s/Buffers: shared hit=\d+ read=\d+/Buffers: shared hit=### read=###/
-- end_matchsubs
-- "I/O Timings" should be available for Append-optimized table, and "Buffers" should also be available due to auxiliary table lookups
1U: EXPLAIN (ANALYZE, BUFFERS, COSTS OFF, TIMING OFF, SUMMARY OFF) INSERT INTO stat_io_timing SELECT * FROM stat_io_timing;

-- blk_read_time and blk_write_time should be non-zero
1U: SELECT datname, blk_read_time > 0 AS has_blk_read_time, blk_write_time > 0 AS has_blk_write_time from pg_stat_database WHERE datname = 'isolation2test';

-- cleanup
SELECT gp_inject_fault('all', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
1U: DROP TABLE stat_io_timing;
1U: RESET track_io_timing;
1Uq:
79 changes: 78 additions & 1 deletion src/test/isolation2/output/uao/collected_stats_views.source
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,19 @@
create or replace function wait_until_heap_blks_read_change_to(table_name text, stat_val_expected bigint) returns text as $$ declare stat_val int; /* in func */ i int; /* in func */ begin i := 0; /* in func */ while i < 1200 loop select heap_blks_read into stat_val from pg_statio_all_tables where relname = table_name; /* in func */ if stat_val = stat_val_expected then /* in func */ return 'OK'; /* in func */ end if; /* in func */ perform pg_sleep(0.1); /* in func */ perform pg_stat_clear_snapshot(); /* in func */ i := i + 1; /* in func */ end loop; /* in func */ return 'Fail'; /* in func */ end; /* in func */ $$ language plpgsql;
CREATE

-- Test collected heap_blks_* I/O stats for append-optimized tables
-- Setup test table
CREATE TABLE statio_@amname@(a int, b int) USING @amname@;
CREATE
-- heap_blks_read should be zero to begin with
1U: SELECT heap_blks_read, heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'statio_@amname@';
heap_blks_read | heap_blks_hit
----------------+---------------
0 | 0
(1 row)

INSERT INTO statio_@amname@ SELECT 0, i FROM generate_series(1, 100000)i;
INSERT 100000

-- heap_blks_read should remain zero after INSERT
SELECT heap_blks_read, heap_blks_hit FROM gp_statio_all_tables WHERE segment_id = 1 AND relname = 'statio_@amname@';
heap_blks_read | heap_blks_hit
Expand All @@ -35,3 +42,73 @@ DROP TABLE statio_@amname@;
DROP
DROP FUNCTION wait_until_heap_blks_read_change_to(table_name text, stat_val_expected bigint);
DROP
-- stats should be gone after table is dropped
1U: SELECT heap_blks_read, heap_blks_hit FROM pg_statio_all_tables WHERE relname = 'statio_@amname@';
heap_blks_read | heap_blks_hit
----------------+---------------
(0 rows)
1Uq: ... <quitting>

-- Test collected I/O timing stats for append-optimized tables
1U: SET track_io_timing = on;
SET

-- Setup test table
1U: CREATE TEMP TABLE stat_io_timing(a int, b int) USING @amname@;
CREATE
1U: INSERT INTO stat_io_timing SELECT 0, i FROM generate_series(1, 100000)i;
INSERT 100000

-- Sleep for 1s just for once to ensure blk_read_time and blk_write_time in
-- pg_stat_database get updated. The number is selected to be larger than the
-- 1-ms granularity of these two fields.
SELECT gp_inject_fault('ao_storage_read_after_fileread', 'sleep', '', '', '', 1, 1, 1, dbid) FROM gp_segment_configuration where role = 'p' AND content = 1;
gp_inject_fault
-----------------
Success:
(1 row)
SELECT gp_inject_fault('ao_storage_write_after_filewrite', 'sleep', '', '', '', 1, 1, 1, dbid) FROM gp_segment_configuration where role = 'p' AND content = 1;
gp_inject_fault
-----------------
Success:
(1 row)

-- start_matchsubs
-- m{I/O Timings: read=\d*\.\d+ write=\d+\.\d+.*}
-- s{I/O Timings: read=\d*\.\d+ write=\d+\.\d+.*}{I/O Timings: read=##.### write=##.###}
-- m{I/O Timings: read=\d*\.\d+.*}
-- s{I/O Timings: read=\d*\.\d+.*}{I/O Timings: read=##.###.*}
-- m/Buffers: shared hit=\d+ read=\d+/
-- s/Buffers: shared hit=\d+ read=\d+/Buffers: shared hit=### read=###/
-- end_matchsubs
-- "I/O Timings" should be available for Append-optimized table, and "Buffers" should also be available due to auxiliary table lookups
1U: EXPLAIN (ANALYZE, BUFFERS, COSTS OFF, TIMING OFF, SUMMARY OFF) INSERT INTO stat_io_timing SELECT * FROM stat_io_timing;
QUERY PLAN
--------------------------------------------------------------------------------
Insert on stat_io_timing (actual rows=0 loops=1)
Buffers: shared hit=14160 read=1
I/O Timings: read=1001.399 write=1001.310
-> Seq Scan on stat_io_timing stat_io_timing_1 (actual rows=100000 loops=1)
Buffers: shared hit=82 read=1
I/O Timings: read=1001.399
Optimizer: Postgres query optimizer
(7 rows)

-- blk_read_time and blk_write_time should be non-zero
1U: SELECT datname, blk_read_time > 0 AS has_blk_read_time, blk_write_time > 0 AS has_blk_write_time from pg_stat_database WHERE datname = 'isolation2test';
datname | has_blk_read_time | has_blk_write_time
----------------+-------------------+--------------------
isolation2test | t | t
(1 row)

-- cleanup
SELECT gp_inject_fault('all', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p';
gp_inject_fault
-----------------
Success:
(1 row)
1U: DROP TABLE stat_io_timing;
DROP
1U: RESET track_io_timing;
RESET
1Uq: ... <quitting>

0 comments on commit fc081c7

Please sign in to comment.