Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backfill benchmarks #412

Merged
merged 31 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
176d669
Initial stab at a simple backfill benchmark
ryanslade Oct 17, 2024
53b6644
Remove TODO
ryanslade Oct 17, 2024
e04ee36
Add license header
ryanslade Oct 17, 2024
642fe97
Print row/s seeded
ryanslade Oct 17, 2024
4af35f5
Add some comments
ryanslade Oct 18, 2024
76bf742
Casing tweak
ryanslade Oct 18, 2024
9ce9a87
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 18, 2024
d08bd95
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 18, 2024
19601cc
Run benchmarks on push to main
ryanslade Oct 18, 2024
1eeb68c
Temporarliy run benchmark action
ryanslade Oct 18, 2024
d27e958
Run the correct make command
ryanslade Oct 18, 2024
db57605
Remove benchmark from build workflow
ryanslade Oct 18, 2024
f5e74a5
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 21, 2024
3c9ea47
Fix import path
ryanslade Oct 21, 2024
4d46743
Remove TODO
ryanslade Oct 21, 2024
dbe3cb1
Add benchmark for write amplification
ryanslade Oct 21, 2024
e327d56
Move cleanup code into cleanup function
ryanslade Oct 21, 2024
956e289
Format and switch from assert to require
ryanslade Oct 21, 2024
a54759e
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 21, 2024
01562e9
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 22, 2024
0930d8d
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 22, 2024
f1ab93d
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 23, 2024
df2bd12
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 23, 2024
7f6016a
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 24, 2024
b875daa
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 25, 2024
e41db77
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 25, 2024
cf96799
Don't logs seeding row/s
ryanslade Oct 28, 2024
4552b43
Update permissions
ryanslade Oct 28, 2024
655a7e4
Merge branch 'main' into rs/benchmark-scaffolding
ryanslade Oct 28, 2024
bb7c6db
Makefile comment
ryanslade Oct 29, 2024
656632a
Drop max benchmarked rows to 300k
ryanslade Oct 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Benchmark
on:
push:
branches:
- main
andrew-farries marked this conversation as resolved.
Show resolved Hide resolved
permissions:
contents: write
packages: write
ryanslade marked this conversation as resolved.
Show resolved Hide resolved
jobs:
benchmark:
name: 'benchmark (pg: ${{ matrix.pgVersion }})'
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
pgVersion: ['14.8', '15.3', '16.4', '17.0' ,'latest']
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'

- name: Run benchmarks
run: make bench
env:
POSTGRES_VERSION: ${{ matrix.pgVersion }}
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ jobs:
version: latest
args: release --clean
env:
# We use two github tokens here:
# We use two GitHub tokens here:
# * The actions-bound `GITHUB_TOKEN` with permissions to write packages.
# * The org level `GIT_TOKEN` to be able to publish the brew tap file.
# See: https://goreleaser.com/errors/resource-not-accessible-by-integration/
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@ examples:

test:
go test ./...

bench:
go test ./internal/benchmarks -v -benchtime=1x -bench .
199 changes: 199 additions & 0 deletions internal/benchmarks/benchmarks_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// SPDX-License-Identifier: Apache-2.0

package benchmarks

import (
"context"
"database/sql"
"strconv"
"testing"
"time"

"github.com/lib/pq"
"github.com/oapi-codegen/nullable"
"github.com/stretchr/testify/require"

"github.com/xataio/pgroll/internal/testutils"
"github.com/xataio/pgroll/pkg/migrations"
"github.com/xataio/pgroll/pkg/roll"
)

const unitRowsPerSecond = "rows/s"

var rowCounts = []int{10_000, 100_000, 1_000_000}

func TestMain(m *testing.M) {
testutils.SharedTestMain(m)
}

func BenchmarkBackfill(b *testing.B) {
ctx := context.Background()
testSchema := testutils.TestSchema()
var opts []roll.Option

for _, rowCount := range rowCounts {
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
b.Cleanup(func() {
require.NoError(b, mig.Close())
})

setupInitialTable(b, ctx, testSchema, mig, db, rowCount)
b.ResetTimer()

// Backfill
b.StartTimer()
require.NoError(b, mig.Start(ctx, &migAlterColumn))
require.NoError(b, mig.Complete(ctx))
b.StopTimer()
b.Logf("Backfilled %d rows in %s", rowCount, b.Elapsed())
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
})
})
}
}

// Benchmark the difference between updating all rows with and without an update trigger in place
func BenchmarkWriteAmplification(b *testing.B) {
ctx := context.Background()
testSchema := testutils.TestSchema()
var opts []roll.Option

assertRowCount := func(tb testing.TB, db *sql.DB, rowCount int) {
tb.Helper()

var count int
err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM users WHERE name = 'person'").Scan(&count)
require.NoError(b, err)
require.Equal(b, rowCount, count)
}

b.Run("NoTrigger", func(b *testing.B) {
for _, rowCount := range rowCounts {
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
setupInitialTable(b, ctx, testSchema, mig, db, rowCount)
b.Cleanup(func() {
require.NoError(b, mig.Close())
assertRowCount(b, db, rowCount)
})

b.ResetTimer()

// Update the name in all rows
b.StartTimer()
_, err := db.ExecContext(ctx, `UPDATE users SET name = 'person'`)
require.NoError(b, err)
b.StopTimer()
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
})
})
}
})

b.Run("WithTrigger", func(b *testing.B) {
for _, rowCount := range rowCounts {
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
setupInitialTable(b, ctx, testSchema, mig, db, rowCount)

// Start the migration
require.NoError(b, mig.Start(ctx, &migAlterColumn))
b.Cleanup(func() {
// Finish the migration
require.NoError(b, mig.Complete(ctx))
require.NoError(b, mig.Close())
assertRowCount(b, db, rowCount)
})

b.ResetTimer()

// Update the name in all rows
b.StartTimer()
_, err := db.ExecContext(ctx, `UPDATE users SET name = 'person'`)
require.NoError(b, err)
b.StopTimer()
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
})
})
}
})
}

func setupInitialTable(tb testing.TB, ctx context.Context, testSchema string, mig *roll.Roll, db *sql.DB, rowCount int) {
tb.Helper()

seed := func(tb testing.TB, rowCount int, db *sql.DB) {
seedStart := time.Now()
defer func() {
elapsed := time.Since(seedStart)
rowsPerSecond := float64(rowCount) / elapsed.Seconds()
tb.Logf("Seeded %d rows in %s (%.f rows/s)", rowCount, elapsed, rowsPerSecond)
ryanslade marked this conversation as resolved.
Show resolved Hide resolved
}()

tx, err := db.Begin()
require.NoError(tb, err)
defer tx.Rollback()

stmt, err := tx.PrepareContext(ctx, pq.CopyInSchema(testSchema, "users", "name"))
require.NoError(tb, err)

for i := 0; i < rowCount; i++ {
_, err = stmt.ExecContext(ctx, nil)
require.NoError(tb, err)
}

_, err = stmt.ExecContext(ctx)
require.NoError(tb, err)
require.NoError(tb, tx.Commit())
}

// Setup
require.NoError(tb, mig.Start(ctx, &migCreateTable))
require.NoError(tb, mig.Complete(ctx))
seed(tb, rowCount, db)
}

// Simple table with a nullable `name` field.
var migCreateTable = migrations.Migration{
Name: "01_create_table",
Operations: migrations.Operations{
&migrations.OpCreateTable{
Name: "users",
Columns: []migrations.Column{
{
Name: "id",
Type: "serial",
Pk: ptr(true),
},
{
Name: "name",
Type: "varchar(255)",
Nullable: ptr(true),
Unique: ptr(false),
},
},
},
},
}

// Alter the table to make the name field not null and backfill the old name fields with
// `placeholder`.
var migAlterColumn = migrations.Migration{
Name: "02_alter_column",
Operations: migrations.Operations{
&migrations.OpAlterColumn{
Table: "users",
Column: "name",
Up: "(SELECT CASE WHEN name IS NULL THEN 'placeholder' ELSE name END)",
Down: "user_name",
Comment: nullable.NewNullableWithValue("the name of the user"),
Nullable: ptr(false),
},
},
}

func ptr[T any](x T) *T { return &x }
4 changes: 2 additions & 2 deletions internal/testutils/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func WithUninitializedState(t *testing.T, fn func(*state.State)) {
fn(st)
}

func WithMigratorInSchemaAndConnectionToContainerWithOptions(t *testing.T, schema string, opts []roll.Option, fn func(mig *roll.Roll, db *sql.DB)) {
func WithMigratorInSchemaAndConnectionToContainerWithOptions(t testing.TB, schema string, opts []roll.Option, fn func(mig *roll.Roll, db *sql.DB)) {
t.Helper()
ctx := context.Background()

Expand Down Expand Up @@ -236,7 +236,7 @@ func WithMigratorAndConnectionToContainerWithOptions(t *testing.T, opts []roll.O
// - a connection to the new database
// - the connection string to the new database
// - the name of the new database
func setupTestDatabase(t *testing.T) (*sql.DB, string, string) {
func setupTestDatabase(t testing.TB) (*sql.DB, string, string) {
t.Helper()
ctx := context.Background()

Expand Down