From f7b632cc11de0aef6aa6a7f6fb31b610e5732060 Mon Sep 17 00:00:00 2001 From: Gusted Date: Fri, 18 Aug 2023 04:39:23 +0200 Subject: [PATCH 1/5] add slow SQL query warning - Databases are one of the most important parts of Forgejo, every interaction uses the database in one way or another. Therefore, it is important to maintain the database and recognize when the server is not doing well with the database. There already is the option to log *every* SQL query along with its execution time, but monitoring becomes impractical for larger instances and takes up unnecessary storage in the logs. - Add a QoL enhancement that allows instance administrators to specify a threshold value beyond which query execution time is logged as a warning in the xorm logger. The default value is a conservative five seconds to avoid this becoming a source of spam in the logs. - The use case for this patch is that with an instance the size of Codeberg, monitoring SQL logs is not very fruitful and most of them are uninteresting. Recently, in the context of persistent deadlock issues (https://codeberg.org/forgejo/forgejo/issues/220), I have noticed that certain queries hold locks on tables like comment and issue for several seconds. This patch helps to identify which queries these are and when they happen. - Added unit test. (cherry picked from commit 9cf501f1af4cd870221cef6af489618785b71186) --- models/db/engine.go | 28 +++++++++++++++++++++++++++ models/db/engine_test.go | 38 +++++++++++++++++++++++++++++++++++++ modules/setting/database.go | 2 ++ 3 files changed, 68 insertions(+) diff --git a/models/db/engine.go b/models/db/engine.go index 182d8cd993696..9aec903780313 100755 --- a/models/db/engine.go +++ b/models/db/engine.go @@ -11,10 +11,13 @@ import ( "io" "reflect" "strings" + "time" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "xorm.io/xorm" + "xorm.io/xorm/contexts" "xorm.io/xorm/names" "xorm.io/xorm/schemas" @@ -146,6 +149,13 @@ func InitEngine(ctx context.Context) error { xormEngine.SetConnMaxLifetime(setting.Database.ConnMaxLifetime) xormEngine.SetDefaultContext(ctx) + if setting.Database.SlowQueryTreshold > 0 { + xormEngine.AddHook(&SlowQueryHook{ + Treshold: setting.Database.SlowQueryTreshold, + Logger: log.GetLogger("xorm"), + }) + } + SetDefaultEngine(ctx, xormEngine) return nil } @@ -299,3 +309,21 @@ func SetLogSQL(ctx context.Context, on bool) { sess.Engine().ShowSQL(on) } } + +type SlowQueryHook struct { + Treshold time.Duration + Logger log.Logger +} + +var _ contexts.Hook = &SlowQueryHook{} + +func (SlowQueryHook) BeforeProcess(c *contexts.ContextHook) (context.Context, error) { + return c.Ctx, nil +} + +func (h *SlowQueryHook) AfterProcess(c *contexts.ContextHook) error { + if c.ExecuteTime >= h.Treshold { + h.Logger.Log(8, log.WARN, "[Slow SQL Query] %s %v - %v", c.SQL, c.Args, c.ExecuteTime) + } + return nil +} diff --git a/models/db/engine_test.go b/models/db/engine_test.go index e3dbfbdc24205..fbc40102dac43 100644 --- a/models/db/engine_test.go +++ b/models/db/engine_test.go @@ -6,15 +6,19 @@ package db_test import ( "path/filepath" "testing" + "time" "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" _ "code.gitea.io/gitea/cmd" // for TestPrimaryKeys "github.com/stretchr/testify/assert" + "xorm.io/xorm" ) func TestDumpDatabase(t *testing.T) { @@ -84,3 +88,37 @@ func TestPrimaryKeys(t *testing.T) { } } } + +func TestSlowQuery(t *testing.T) { + lc, cleanup := test.NewLogChecker("slow-query") + lc.StopMark("[Slow SQL Query]") + defer cleanup() + + e := db.GetEngine(db.DefaultContext) + engine, ok := e.(*xorm.Engine) + assert.True(t, ok) + + // It's not possible to clean this up with XORM, but it's luckily not harmful + // to leave around. + engine.AddHook(&db.SlowQueryHook{ + Treshold: time.Second * 10, + Logger: log.GetLogger("slow-query"), + }) + + // NOOP query. + e.Exec("SELECT 1 WHERE false;") + + _, stopped := lc.Check(100 * time.Millisecond) + assert.False(t, stopped) + + engine.AddHook(&db.SlowQueryHook{ + Treshold: 0, // Every query should be logged. + Logger: log.GetLogger("slow-query"), + }) + + // NOOP query. + e.Exec("SELECT 1 WHERE false;") + + _, stopped = lc.Check(100 * time.Millisecond) + assert.True(t, stopped) +} diff --git a/modules/setting/database.go b/modules/setting/database.go index 709655368c67b..219c96e4d7190 100644 --- a/modules/setting/database.go +++ b/modules/setting/database.go @@ -44,6 +44,7 @@ var ( ConnMaxLifetime time.Duration IterateBufferSize int AutoMigration bool + SlowQueryTreshold time.Duration }{ Timeout: 500, IterateBufferSize: 50, @@ -86,6 +87,7 @@ func loadDBSetting(rootCfg ConfigProvider) { Database.DBConnectRetries = sec.Key("DB_RETRIES").MustInt(10) Database.DBConnectBackoff = sec.Key("DB_RETRY_BACKOFF").MustDuration(3 * time.Second) Database.AutoMigration = sec.Key("AUTO_MIGRATION").MustBool(true) + Database.SlowQueryTreshold = sec.Key("SLOW_QUERY_TRESHOLD").MustDuration(5 * time.Second) } // DBConnStr returns database connection string From 546841008389def033c6d9bfdcef1bcacc4b9ec1 Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Mon, 16 Oct 2023 13:18:49 +0200 Subject: [PATCH 2/5] Remove tests from "add slow SQL query warning" This partially reverts commit f7b632cc11de0aef6aa6a7f6fb31b610e5732060. --- models/db/engine_test.go | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/models/db/engine_test.go b/models/db/engine_test.go index fbc40102dac43..e3dbfbdc24205 100644 --- a/models/db/engine_test.go +++ b/models/db/engine_test.go @@ -6,19 +6,15 @@ package db_test import ( "path/filepath" "testing" - "time" "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/models/unittest" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/test" _ "code.gitea.io/gitea/cmd" // for TestPrimaryKeys "github.com/stretchr/testify/assert" - "xorm.io/xorm" ) func TestDumpDatabase(t *testing.T) { @@ -88,37 +84,3 @@ func TestPrimaryKeys(t *testing.T) { } } } - -func TestSlowQuery(t *testing.T) { - lc, cleanup := test.NewLogChecker("slow-query") - lc.StopMark("[Slow SQL Query]") - defer cleanup() - - e := db.GetEngine(db.DefaultContext) - engine, ok := e.(*xorm.Engine) - assert.True(t, ok) - - // It's not possible to clean this up with XORM, but it's luckily not harmful - // to leave around. - engine.AddHook(&db.SlowQueryHook{ - Treshold: time.Second * 10, - Logger: log.GetLogger("slow-query"), - }) - - // NOOP query. - e.Exec("SELECT 1 WHERE false;") - - _, stopped := lc.Check(100 * time.Millisecond) - assert.False(t, stopped) - - engine.AddHook(&db.SlowQueryHook{ - Treshold: 0, // Every query should be logged. - Logger: log.GetLogger("slow-query"), - }) - - // NOOP query. - e.Exec("SELECT 1 WHERE false;") - - _, stopped = lc.Check(100 * time.Millisecond) - assert.True(t, stopped) -} From 41acb653b049eca31c877c57c93903a73766dbc7 Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Mon, 16 Oct 2023 13:20:13 +0200 Subject: [PATCH 3/5] explain why 8 --- models/db/engine.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/models/db/engine.go b/models/db/engine.go index 9aec903780313..c148501734ef4 100755 --- a/models/db/engine.go +++ b/models/db/engine.go @@ -323,6 +323,9 @@ func (SlowQueryHook) BeforeProcess(c *contexts.ContextHook) (context.Context, er func (h *SlowQueryHook) AfterProcess(c *contexts.ContextHook) error { if c.ExecuteTime >= h.Treshold { + // 8 is the amount of skips passed to runtime.Caller, so that in the log the correct function + // is being displayed (the function that ultimately wants to execute the query in the code) + // instead of the function of the slow query hook being called. h.Logger.Log(8, log.WARN, "[Slow SQL Query] %s %v - %v", c.SQL, c.Args, c.ExecuteTime) } return nil From 438fee919eabaa426dfe782c33bf1f60ed347822 Mon Sep 17 00:00:00 2001 From: Earl Warren Date: Tue, 10 Oct 2023 13:47:38 +0200 Subject: [PATCH 4/5] document the setting --- custom/conf/app.example.ini | 4 ++++ docs/content/administration/config-cheat-sheet.en-us.md | 1 + 2 files changed, 5 insertions(+) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 8cd5e6c144fad..eaa0f5bf82ecc 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -409,6 +409,10 @@ USER = root ;; ;; Whether execute database models migrations automatically ;AUTO_MIGRATION = true +;; +;; Threshold value (in seconds) beyond which query execution time is logged as a warning in the xorm logger +;; +;SLOW_QUERY_TRESHOLD = 5s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/administration/config-cheat-sheet.en-us.md b/docs/content/administration/config-cheat-sheet.en-us.md index 16f8caf22fd5a..9dd01f8f1f814 100644 --- a/docs/content/administration/config-cheat-sheet.en-us.md +++ b/docs/content/administration/config-cheat-sheet.en-us.md @@ -453,6 +453,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a - `MAX_IDLE_CONNS` **2**: Max idle database connections on connection pool, default is 2 - this will be capped to `MAX_OPEN_CONNS`. - `CONN_MAX_LIFETIME` **0 or 3s**: Sets the maximum amount of time a DB connection may be reused - default is 0, meaning there is no limit (except on MySQL where it is 3s - see #6804 & #7071). - `AUTO_MIGRATION` **true**: Whether execute database models migrations automatically. +- `SLOW_QUERY_TRESHOLD` **5s**: Threshold value in seconds beyond which query execution time is logged as a warning in the xorm logger. Please see #8540 & #8273 for further discussion of the appropriate values for `MAX_OPEN_CONNS`, `MAX_IDLE_CONNS` & `CONN_MAX_LIFETIME` and their relation to port exhaustion. From cc5ccf4f1fc35c35823d5d994a29c0a452e44217 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Fri, 23 Feb 2024 01:31:51 +0100 Subject: [PATCH 5/5] fix misspell --- custom/conf/app.example.ini | 2 +- .../config-cheat-sheet.en-us.md | 2 +- models/db/engine.go | 12 ++--- modules/setting/database.go | 44 +++++++++---------- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index bc29c37e4dd93..dc9e8a23a1366 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -415,7 +415,7 @@ USER = root ;; ;; Threshold value (in seconds) beyond which query execution time is logged as a warning in the xorm logger ;; -;SLOW_QUERY_TRESHOLD = 5s +;SLOW_QUERY_THRESHOLD = 5s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/administration/config-cheat-sheet.en-us.md b/docs/content/administration/config-cheat-sheet.en-us.md index 57bbc787184c5..91f55b631f935 100644 --- a/docs/content/administration/config-cheat-sheet.en-us.md +++ b/docs/content/administration/config-cheat-sheet.en-us.md @@ -458,7 +458,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a - `MAX_IDLE_CONNS` **2**: Max idle database connections on connection pool, default is 2 - this will be capped to `MAX_OPEN_CONNS`. - `CONN_MAX_LIFETIME` **0 or 3s**: Sets the maximum amount of time a DB connection may be reused - default is 0, meaning there is no limit (except on MySQL where it is 3s - see #6804 & #7071). - `AUTO_MIGRATION` **true**: Whether execute database models migrations automatically. -- `SLOW_QUERY_TRESHOLD` **5s**: Threshold value in seconds beyond which query execution time is logged as a warning in the xorm logger. +- `SLOW_QUERY_THRESHOLD` **5s**: Threshold value in seconds beyond which query execution time is logged as a warning in the xorm logger. [^1]: It may be necessary to specify a hostport even when listening on a unix socket, as the port is part of the socket name. see [#24552](https://github.com/go-gitea/gitea/issues/24552#issuecomment-1681649367) for additional details. diff --git a/models/db/engine.go b/models/db/engine.go index 8a2ea5d74f4b6..2a2743e927985 100755 --- a/models/db/engine.go +++ b/models/db/engine.go @@ -146,10 +146,10 @@ func InitEngine(ctx context.Context) error { xormEngine.SetConnMaxLifetime(setting.Database.ConnMaxLifetime) xormEngine.SetDefaultContext(ctx) - if setting.Database.SlowQueryTreshold > 0 { + if setting.Database.SlowQueryThreshold > 0 { xormEngine.AddHook(&SlowQueryHook{ - Treshold: setting.Database.SlowQueryTreshold, - Logger: log.GetLogger("xorm"), + Threshold: setting.Database.SlowQueryThreshold, + Logger: log.GetLogger("xorm"), }) } @@ -310,8 +310,8 @@ func SetLogSQL(ctx context.Context, on bool) { } type SlowQueryHook struct { - Treshold time.Duration - Logger log.Logger + Threshold time.Duration + Logger log.Logger } var _ contexts.Hook = &SlowQueryHook{} @@ -321,7 +321,7 @@ func (SlowQueryHook) BeforeProcess(c *contexts.ContextHook) (context.Context, er } func (h *SlowQueryHook) AfterProcess(c *contexts.ContextHook) error { - if c.ExecuteTime >= h.Treshold { + if c.ExecuteTime >= h.Threshold { // 8 is the amount of skips passed to runtime.Caller, so that in the log the correct function // is being displayed (the function that ultimately wants to execute the query in the code) // instead of the function of the slow query hook being called. diff --git a/modules/setting/database.go b/modules/setting/database.go index 54551b44bb73f..1a4bf648058ff 100644 --- a/modules/setting/database.go +++ b/modules/setting/database.go @@ -25,27 +25,27 @@ var ( // Database holds the database settings Database = struct { - Type DatabaseType - Host string - Name string - User string - Passwd string - Schema string - SSLMode string - Path string - LogSQL bool - MysqlCharset string - CharsetCollation string - Timeout int // seconds - SQLiteJournalMode string - DBConnectRetries int - DBConnectBackoff time.Duration - MaxIdleConns int - MaxOpenConns int - ConnMaxLifetime time.Duration - IterateBufferSize int - AutoMigration bool - SlowQueryTreshold time.Duration + Type DatabaseType + Host string + Name string + User string + Passwd string + Schema string + SSLMode string + Path string + LogSQL bool + MysqlCharset string + CharsetCollation string + Timeout int // seconds + SQLiteJournalMode string + DBConnectRetries int + DBConnectBackoff time.Duration + MaxIdleConns int + MaxOpenConns int + ConnMaxLifetime time.Duration + IterateBufferSize int + AutoMigration bool + SlowQueryThreshold time.Duration }{ Timeout: 500, IterateBufferSize: 50, @@ -88,7 +88,7 @@ func loadDBSetting(rootCfg ConfigProvider) { Database.DBConnectRetries = sec.Key("DB_RETRIES").MustInt(10) Database.DBConnectBackoff = sec.Key("DB_RETRY_BACKOFF").MustDuration(3 * time.Second) Database.AutoMigration = sec.Key("AUTO_MIGRATION").MustBool(true) - Database.SlowQueryTreshold = sec.Key("SLOW_QUERY_TRESHOLD").MustDuration(5 * time.Second) + Database.SlowQueryThreshold = sec.Key("SLOW_QUERY_THRESHOLD").MustDuration(5 * time.Second) } // DBConnStr returns database connection string