From 03bf7fe0023acc65e005c7f8339715b924faba3e Mon Sep 17 00:00:00 2001 From: Sergei Parshev Date: Mon, 13 May 2024 17:13:00 -0400 Subject: [PATCH 1/5] Added pprof into the node API to debug memory leaks --- docs/openapi.yaml | 46 +++++++++++++++++++++++++++++++++++++++ lib/openapi/api/api_v1.go | 34 +++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/docs/openapi.yaml b/docs/openapi.yaml index dcfca07..4cfd133 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -681,6 +681,52 @@ paths: security: - basic_auth: [] + # In theory not needed if `required: false` work in the next API call, but it doesn't + /api/v1/node/this/profiling/: + get: + summary: Shows pprof index page + description: + Shows debug information about heap, goroutines, symbols etc. Very helpful in figuring out + the memory issues and to understand the internals of the Fish Node execution. + operationId: NodeThisProfilingIndexGet + tags: + - Node + responses: + '200': + description: Successful operation + '400': + description: Bad parameter or conditions + '401': + $ref: '#/components/responses/UnauthorizedError' + security: + - basic_auth: [] + + /api/v1/node/this/profiling/{handler}: + get: + summary: Gives profiling data from pprof + description: + Shows debug information about heap, goroutines, symbols etc. Very helpful in figuring out + the memory issues and to understand the internals of the Fish Node execution. + operationId: NodeThisProfilingGet + tags: + - Node + parameters: + - name: handler + in: path + description: Which pprof handler to use. If empty - will show index + required: false + schema: + type: string + responses: + '200': + description: Successful operation + '400': + description: Bad parameter or conditions + '401': + $ref: '#/components/responses/UnauthorizedError' + security: + - basic_auth: [] + /api/v1/location/: get: summary: Get list of locations diff --git a/lib/openapi/api/api_v1.go b/lib/openapi/api/api_v1.go index f0dfb7a..7e99197 100644 --- a/lib/openapi/api/api_v1.go +++ b/lib/openapi/api/api_v1.go @@ -15,6 +15,7 @@ package api import ( "fmt" "net/http" + "net/http/pprof" "time" "github.com/google/uuid" @@ -481,6 +482,39 @@ func (e *Processor) NodeThisMaintenanceGet(c echo.Context, params types.NodeThis return c.JSON(http.StatusOK, params) } +func (e *Processor) NodeThisProfilingIndexGet(c echo.Context) error { + return e.NodeThisProfilingGet(c, "") +} + +func (e *Processor) NodeThisProfilingGet(c echo.Context, handler string) error { + user := c.Get("user") + if user.(*types.User).Name != "admin" { + c.JSON(http.StatusBadRequest, H{"message": fmt.Sprintf("Only 'admin' can see profiling info")}) + return fmt.Errorf("Only 'admin' user can see profiling info") + } + + switch handler { + case "": + // Show index if no handler name provided + pprof.Index(c.Response().Writer, c.Request()) + case "allocs", "block", "goroutine", "heap", "mutex", "threadcreate": + // PProf usual handlers + pprof.Handler(handler).ServeHTTP(c.Response(), c.Request()) + case "cmdline": + pprof.Cmdline(c.Response(), c.Request()) + case "profile": + pprof.Profile(c.Response(), c.Request()) + case "symbol": + pprof.Symbol(c.Response(), c.Request()) + case "trace": + pprof.Trace(c.Response(), c.Request()) + default: + return fmt.Errorf("Unable to find requested profiling handler") + } + + return nil +} + func (e *Processor) VoteListGet(c echo.Context, params types.VoteListGetParams) error { user := c.Get("user") if user.(*types.User).Name != "admin" { From 07c1304b801d570b1c9de6e010d27576abe9dbf3 Mon Sep 17 00:00:00 2001 From: Sergei Parshev Date: Wed, 15 May 2024 11:52:17 -0400 Subject: [PATCH 2/5] Clarified note for the added endpoint and made better the error messages --- docs/openapi.yaml | 4 +++- lib/openapi/api/api_v1.go | 9 ++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/openapi.yaml b/docs/openapi.yaml index 4cfd133..0bd1f82 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -681,7 +681,9 @@ paths: security: - basic_auth: [] - # In theory not needed if `required: false` work in the next API call, but it doesn't + # This /profiling/ endpoint is separate from the /profiling/{handler} because `required: false` + # behaved as expected. Since it is not, /profiling/ will route to a separate method that just + # calls the /profiling/{handler} endpoint with the empty string /api/v1/node/this/profiling/: get: summary: Shows pprof index page diff --git a/lib/openapi/api/api_v1.go b/lib/openapi/api/api_v1.go index 7e99197..4b217bd 100644 --- a/lib/openapi/api/api_v1.go +++ b/lib/openapi/api/api_v1.go @@ -489,8 +489,9 @@ func (e *Processor) NodeThisProfilingIndexGet(c echo.Context) error { func (e *Processor) NodeThisProfilingGet(c echo.Context, handler string) error { user := c.Get("user") if user.(*types.User).Name != "admin" { - c.JSON(http.StatusBadRequest, H{"message": fmt.Sprintf("Only 'admin' can see profiling info")}) - return fmt.Errorf("Only 'admin' user can see profiling info") + message := "Only 'admin' can see profiling info" + c.JSON(http.StatusBadRequest, H{"message": message}) + return fmt.Errorf(message) } switch handler { @@ -509,7 +510,9 @@ func (e *Processor) NodeThisProfilingGet(c echo.Context, handler string) error { case "trace": pprof.Trace(c.Response(), c.Request()) default: - return fmt.Errorf("Unable to find requested profiling handler") + message := "Unable to find requested profiling handler" + c.JSON(http.StatusNotFound, H{"message": message}) + return fmt.Errorf(message) } return nil From 4975dea9c2cec01bb465e429c7f09b3844c4db1e Mon Sep 17 00:00:00 2001 From: Sergei Parshev Date: Wed, 15 May 2024 13:54:23 -0400 Subject: [PATCH 3/5] Added negative to the explanation --- docs/openapi.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/openapi.yaml b/docs/openapi.yaml index 0bd1f82..9893ba2 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -682,8 +682,8 @@ paths: - basic_auth: [] # This /profiling/ endpoint is separate from the /profiling/{handler} because `required: false` - # behaved as expected. Since it is not, /profiling/ will route to a separate method that just - # calls the /profiling/{handler} endpoint with the empty string + # did not behaved as expected. Since it is not, /profiling/ will route to a separate method that + # just calls the /profiling/{handler} endpoint with the empty string /api/v1/node/this/profiling/: get: summary: Shows pprof index page From 889bef58f13762d1ad6ecd844ad22496519732db Mon Sep 17 00:00:00 2001 From: Sergei Parshev Date: Tue, 4 Jun 2024 18:51:07 -0400 Subject: [PATCH 4/5] Added CODEOWNERS & development section in README.md --- CODEOWNERS | 2 ++ README.md | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 CODEOWNERS diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..28203c6 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,2 @@ +# All the files for now +* @sparshev diff --git a/README.md b/README.md index 2027bf5..9488720 100644 --- a/README.md +++ b/README.md @@ -189,13 +189,30 @@ The election process: Simplify the cluster management, for example adding labels or check the status [#8](https://github.com/adobe/aquarium-fish/issues/8). -## Integration tests +## Development + +Is relatively easy - you change logic, you run `./build.sh` to create a binary, testing it and send +the PR when you think it's perfect enough. That will be great if you can ask in the discussions or +create an issue on GitHub to align with the current direction and the plans. + +### Integration tests To verify that everything works as expected you can run integration tests like that: ```sh $ FISH_PATH=$PWD/aquarium-fish.darwin_amd64 go test -v -failfast -parallel 4 ./tests/... ``` +### Profiling + +Is available through pprof like that: +``` +$ go tool pprof 'https+insecure://:@localhost:8001/api/v1/node/this/profiling/heap' +$ curl -ku ":" 'https://localhost:8001/api/v1/node/this/prof:w +iling/?debug=1' +``` + +Or you can open https://localhost:8001/api/v1/node/this/profiling/ in browser to see the index. + ## API There is a number of ways to communicate with the Fish cluster, and the most important one is API. From cdc25c3342ce42969c50a65e37c2c2c7e561cf9d Mon Sep 17 00:00:00 2001 From: Sergei Parshev Date: Wed, 5 Jun 2024 12:57:41 -0400 Subject: [PATCH 5/5] Fixed mistake in the line --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 9488720..e362aad 100644 --- a/README.md +++ b/README.md @@ -207,8 +207,7 @@ $ FISH_PATH=$PWD/aquarium-fish.darwin_amd64 go test -v -failfast -parallel 4 ./t Is available through pprof like that: ``` $ go tool pprof 'https+insecure://:@localhost:8001/api/v1/node/this/profiling/heap' -$ curl -ku ":" 'https://localhost:8001/api/v1/node/this/prof:w -iling/?debug=1' +$ curl -ku ":" 'https://localhost:8001/api/v1/node/this/profiling/?debug=1' ``` Or you can open https://localhost:8001/api/v1/node/this/profiling/ in browser to see the index.