Skip to content

Commit

Permalink
improve storage alert
Browse files Browse the repository at this point in the history
  • Loading branch information
LexLuthr committed May 7, 2024
1 parent 1b1448a commit e16a789
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 61 deletions.
2 changes: 1 addition & 1 deletion cmd/curio/deps/deps.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func (deps *Deps) PopulateRemainingDeps(ctx context.Context, cctx *cli.Context,
}
}

if deps.Cfg == nil {
if deps.DB == nil {
deps.DB, err = MakeDB(cctx)
if err != nil {
return err
Expand Down
41 changes: 32 additions & 9 deletions curiosrc/alertmanager/alerts.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ import (
"strings"

"github.com/BurntSushi/toml"
"github.com/dustin/go-humanize"
"golang.org/x/xerrors"

"github.com/filecoin-project/go-address"
"github.com/filecoin-project/go-state-types/abi"
"github.com/filecoin-project/go-state-types/big"

"github.com/filecoin-project/lotus/node/config"
)
Expand All @@ -20,7 +22,7 @@ import (
// It queries the database for the configuration of each layer and decodes it using the toml.Decode function.
// It then iterates over the addresses in the configuration and curates a list of unique addresses.
// If an address is not found in the chain node, it adds an alert to the alert map.
// If the balance of an address is below 5 Fil, it adds an alert to the alert map.
// If the balance of an address is below MinimumWalletBalance, it adds an alert to the alert map.
// If there are any errors encountered during the process, the err field of the alert map is populated.
func balanceCheck(al *alerts) {
Name := "Balance Check"
Expand Down Expand Up @@ -267,22 +269,43 @@ func permanentStorageCheck(al *alerts) {
return
}

var totalRequiredSpace int64
type sm struct {
s sector
size int64
}

sectorMap := make(map[sm]bool)

for _, sec := range sectors {
space := int64(0)
sec := sec
sectorSize, err := sec.Proof.SectorSize()
if err != nil {
totalRequiredSpace += int64(64 << 30)
space = int64(64<<30)*2 + int64(200<<20) // Assume 64 GiB sector
} else {
space = int64(sectorSize)*2 + int64(200<<20) // sealed + unsealed + cache
}

key := sm{s: sec, size: space}

sectorMap[key] = false

for _, strg := range storages {
if space > strg.Available {
strg.Available -= space
sectorMap[key] = true
}
}
totalRequiredSpace += int64(sectorSize)
}

var totalAvailableSpace int64
for _, storage := range storages {
totalAvailableSpace += storage.Available
missingSpace := big.NewInt(0)
for sec, accounted := range sectorMap {
if !accounted {
big.Add(missingSpace, big.NewInt(sec.size))
}
}

if totalAvailableSpace < totalRequiredSpace {
al.alertMap[Name].alertString = fmt.Sprintf("Insufficient storage space for sealing sectors. Required: %d bytes, Available: %d bytes", totalRequiredSpace, totalAvailableSpace)
if missingSpace.GreaterThan(big.NewInt(0)) {
al.alertMap[Name].alertString = fmt.Sprintf("Insufficient storage space for sealing sectors. Additional %s required.", humanize.Bytes(missingSpace.Uint64()))
}
}
19 changes: 12 additions & 7 deletions curiosrc/alertmanager/task_alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,6 @@ type pdPayload struct {
CustomDetails interface{} `json:"custom_details,omitempty"`
}

type pdData struct {
RoutingKey string `json:"routing_key"`
EventAction string `json:"event_action"`
Payload *pdPayload `json:"payload"`
}

type alertFunc func(al *alerts)

var alertFuncs = []alertFunc{
Expand All @@ -87,6 +81,11 @@ func NewAlertTask(api AlertAPI, db *harmonydb.DB, alertingCfg config.CurioAlerti
}

func (a *AlertTask) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) {
if a.cfg.PageDutyIntegrationKey == "" {
log.Warnf("PageDutyIntegrationKey is empty, not sending an alert")
return true, nil
}

ctx := context.Background()

alMap := make(map[string]*alertOut)
Expand Down Expand Up @@ -167,6 +166,12 @@ var _ harmonytask.TaskInterface = &AlertTask{}
// If all retries fail, it returns an error indicating the last network error encountered.
func (a *AlertTask) sendAlert(data *pdPayload) error {

type pdData struct {
RoutingKey string `json:"routing_key"`
EventAction string `json:"event_action"`
Payload *pdPayload `json:"payload"`
}

payload := &pdData{
RoutingKey: a.cfg.PageDutyIntegrationKey,
EventAction: "trigger",
Expand All @@ -178,7 +183,7 @@ func (a *AlertTask) sendAlert(data *pdPayload) error {
return fmt.Errorf("error marshaling JSON: %w", err)
}

req, err := http.NewRequest("POST", a.cfg.PagerDutyEventURL.String(), bytes.NewBuffer(jsonData))
req, err := http.NewRequest("POST", a.cfg.PagerDutyEventURL, bytes.NewBuffer(jsonData))
if err != nil {
return fmt.Errorf("error creating request: %w", err)
}
Expand Down
30 changes: 8 additions & 22 deletions documentation/en/default-curio-config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,14 @@


[Alerting]
# PageDutyIntegrationKey is the integration key for a pager duty service. You can find this unique service
# PagerDutyEventURL is URL for PagerDuty.com Events API v2 URL. Events sent to this API URL are ultimately
# routed to a PagerDuty.com service and processed.
# The default is sufficient for integration with the stock commercial PagerDuty.com company's service.
#
# type: string
#PagerDutyEventURL = "https://events.pagerduty.com/v2/enqueue"

# PageDutyIntegrationKey is the integration key for a PagerDuty.com service. You can find this unique service
# identifier in the integration page for the service.
#
# type: string
Expand All @@ -378,24 +385,3 @@
# type: types.FIL
#MinimumWalletBalance = "5 FIL"

[Alerting.PagerDutyEventURL]
#Scheme = "https"

#Opaque = ""

#Host = "events.pagerduty.com"

#Path = "/v2/enqueue"

#RawPath = ""

#OmitHost = false

#ForceQuery = false

#RawQuery = ""

#Fragment = ""

#RawFragment = ""

11 changes: 1 addition & 10 deletions node/config/def.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package config

import (
"encoding"
"net/url"
"os"
"strconv"
"time"
Expand Down Expand Up @@ -329,14 +328,6 @@ const (
ResourceFilteringDisabled = ResourceFilteringStrategy("disabled")
)

func PGDutyURL() *url.URL {
ret, err := url.Parse("https://events.pagerduty.com/v2/enqueue")
if err != nil {
return &url.URL{}
}
return ret
}

func DefaultCurioConfig() *CurioConfig {
return &CurioConfig{
Subsystems: CurioSubsystemsConfig{
Expand Down Expand Up @@ -378,7 +369,7 @@ func DefaultCurioConfig() *CurioConfig {
MaxQueuePoRep: 0, // default don't use this limit
},
Alerting: CurioAlerting{
PagerDutyEventURL: PGDutyURL(),
PagerDutyEventURL: "https://events.pagerduty.com/v2/enqueue",
PageDutyIntegrationKey: "",
MinimumWalletBalance: types.MustParseFIL("5"),
},
Expand Down
10 changes: 5 additions & 5 deletions node/config/doc_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 5 additions & 7 deletions node/config/types.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package config

import (
"net/url"

"github.com/ipfs/go-cid"

"github.com/filecoin-project/lotus/chain/types"
Expand Down Expand Up @@ -1114,12 +1112,12 @@ type FaultReporterConfig struct {
}

type CurioAlerting struct {
// PagerDutyEventURL is URL for PagerDuty's Events API v2 URL. Events sent to this API URL are ultimately
// routed to a PagerDuty service and processed.
// The default is sufficient for integration with the stock commercial Pager Duty company's service.
PagerDutyEventURL *url.URL
// PagerDutyEventURL is URL for PagerDuty.com Events API v2 URL. Events sent to this API URL are ultimately
// routed to a PagerDuty.com service and processed.
// The default is sufficient for integration with the stock commercial PagerDuty.com company's service.
PagerDutyEventURL string

// PageDutyIntegrationKey is the integration key for a pager duty service. You can find this unique service
// PageDutyIntegrationKey is the integration key for a PagerDuty.com service. You can find this unique service
// identifier in the integration page for the service.
PageDutyIntegrationKey string

Expand Down

0 comments on commit e16a789

Please sign in to comment.