diff --git a/config/resources/defaults.yaml b/config/resources/defaults.yaml index 2e5216874..4b4ec51ad 100644 --- a/config/resources/defaults.yaml +++ b/config/resources/defaults.yaml @@ -67,6 +67,16 @@ Cache: LowWatermark: 90 HighWaterMark: 95 BlocksToPrefetch: 0 +Lotman: + EnabledPolicy: "fairshare" + DefaultLotExpirationLifetime: "2016h" + DefaultLotDeletionLifetime: "4032h" + PolicyDefinitions: + - PolicyName: "fairshare" + DivideUnallocated: true + PurgeOrder: ["del", "exp", "opp", "ded"] + DiscoverPrefixes: true + MergeLocalWithDiscovered: false LocalCache: HighWaterMarkPercentage: 95 LowWaterMarkPercentage: 85 diff --git a/docs/parameters.yaml b/docs/parameters.yaml index adaa76eb0..99ad2e696 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -2692,34 +2692,126 @@ type: bool default: false components: ["cache"] --- -name: Lotman.Lots -description: |+ - Declarative configuration for LotMan. This is a list of objects, each of which describes a "lot". Every lot - can be defined with the following: - - - `LotName`: REQUIRED. The name of the lot. This is used to identify the lot in the LotMan database. - - `Owner`: REQUIRED. A string identifying the owner of the lot's data (as opposed to someone who can modify the lot itself). - The Owner field should generally be set to the issue for the lot's namespace path. For example, if the lot - tracks namespace `/foo/bar`, the owner might be set to `https://registry.com/api/v1.0/registry/foo/bar`. - - `Paths`: OPTIONAL. A list of path objects, each of which describes a path that should be managed by the lot. - - `Path`: REQUIRED. The path to be managed by the lot. - - `Recursive`: REQUIRED. A boolean indicating whether the path should be managed recursively. If true, the lot will - manage all files and directories under the specified path. - - `ManagementPolicyAttrs`: REQUIRED. The lot's management policy attributes object. This contains information about resources the lot should - be allocated, and how it should be managed. - - `DedicatedGB`: REQUIRED. The amount of storage, in GB, that should be dedicated to the lot. This means the lot can assume it - always has access to this quantity. - - `OpportunisticGB`: REQUIRED. The amount of opportunistic storage, in GB, the lot should have access to, when storage is available. - - `MaxNumObjects`: REQUIRED. The maximum number of objects a lot is allowed to store. - - `CreationTime`: REQUIRED. A unix timestamp indicating when the lot should begin being considered valid. Times in the future indicate - the lot should not be considered valid until that time. - - `ExpirationTime`: REQUIRED. A unix timestamp indicating when the lot expires. Lots may continue to function after expiration, but lot - data owners should recognize the storage is at-will and may be pre-empted at any time. - - `DeletionTime`: REQUIRED. A unix timestamp indicating when the lot and its associated data should be deleted. - - Note that example configurations can be found in lotman/resources/lots-config.yaml +name: Lotman.PolicyDefinitions +description: |+ + A list of named Lotman purge policy definitions that may be enabled by the cache administrator through setting the `Lotman.EnabledPolicy` + configuration. Each policy definition is an object with the following fields: + - `PolicyName`: The name of the policy. This is used to identify the policy in the `Lotman.EnabledPolicy` configuration. + - `PurgeOrder`: An ordered list of strings indicating the order in which lots should be purged. The strings should be one of the following: + - `del`: Purge lots that have passed their deletion time. + - `exp`: Purge lots that have passed their expiration time. + - `opp`: Purge lots that have passed their opportunistic storage quota. + - `ded`: Purge lots that have passed their dedicated storage quota. + - `DiscoverPrefixes`: A boolean indicating whether Lotman should automatically discover prefixes from the Director. If true, Lotman will + attempt to create lots for all discovered federation prefixes. Locally-defined lots will take precedence over discovered lots if the two have + the same name. + - `MergeLocalWithDiscovered`: A boolean indicating whether Lotman should merge locally-defined lot configurations with discovered namespaces. + Most Lot configuration fields will take precedence from local configuration, but the `Paths` and `Parents` fields are additive. + - `DivideUnallocated`: A boolean indicating whether Lotman should attempt to make intelligent decisions regarding management policy attributes + for lots that have not provided explicit values. These decisions are based on the cache's total storage capacity and the number of lots + that have been explicitly configured, and are intended to maximize potential cache utilization. This should be set to "true" in most cases. + - `Lots`: A list of lot objects, each of which describes a "lot". Every lot can be defined with the following: + - `LotName`: REQUIRED. The name of the lot. This is used to identify the lot in the LotMan database. + - `Owner`: REQUIRED. A string identifying the owner of the lot's data (as opposed to someone who can modify the lot itself). + The Owner field should generally be set to the issue for the lot's namespace path. For example, if the lot + tracks namespace `/foo/bar`, the owner might be set to `https://registry.com/api/v1.0/registry/foo/bar`. + - `Paths`: OPTIONAL. A list of path objects, each of which describes a path that should be managed by the lot. + - `Path`: REQUIRED. The path to be managed by the lot. + - `Recursive`: REQUIRED. A boolean indicating whether the path should be managed recursively. If true, the lot will + manage all files and directories under the specified path. + - `ManagementPolicyAttrs`: REQUIRED. The lot's management policy attributes object. This contains information about resources the lot should + be allocated, and how it should be managed. + - `DedicatedGB`: REQUIRED. The amount of storage, in GB, that should be dedicated to the lot. This means the lot can assume it + always has access to this quantity. + - `OpportunisticGB`: REQUIRED. The amount of opportunistic storage, in GB, the lot should have access to, when storage is available. + - `MaxNumObjects`: REQUIRED. The maximum number of objects a lot is allowed to store. + - `CreationTime`: REQUIRED. A unix timestamp indicating when the lot should begin being considered valid. Times in the future indicate + the lot should not be considered valid until that time. + - `ExpirationTime`: REQUIRED. A unix timestamp indicating when the lot expires. Lots may continue to function after expiration, but lot + data owners should recognize the storage is at-will and may be pre-empted at any time. + - `DeletionTime`: REQUIRED. A unix timestamp indicating when the lot and its associated data should be deleted. + + For example, Lotman could be configured with the "my-policy" policy with the following: + ```yaml + Lotman: + EnabledPolicy: "my-policy" + PolicyDefinitions: + - PolicyName: "my-policy" + DivideUnallocated: true + PurgeOrder: ["del", "exp", "opp", "ded"] + DiscoverPrefixes: true + MergeLocalWithDiscovered: true + Lots: + - LotName: "/foo/bar" + Owner: "https://registry.com/api/v1.0/registry/foo/bar" + Paths: + Path: "/foo/bar" + Recursive: true + ManagementPolicyAttrs: + DedicatedGB: 100 + OpportunisticGB: 100 + MaxNumObjects: 1000 + CreationTime: 1614556800 + ExpirationTime: 1614556800 + DeletionTime: 1614556800 + - LotName ... + ``` + + Additional example configurations can be found in lotman/resources/lots-config.yaml For more information about LotMan configuration, see: [https://github.com/pelicanplatform/lotman](https://github.com/pelicanplatform/lotman) type: object default: none components: ["cache"] +--- +name: Lotman.EnabledPolicy +description: |+ + The name of the policy to use with Lotman's purge logic. Policy names are defined in the Lotman.PolicyDefinitions list object. + If unset, the "fairshare" policy is used, which evenly divides the cache's space amongst all top-level namespaces discoverable + through the Director and purges data according in order of lots past deletion, lots past expiration, lots past opportunistic + storage, and lots past dedicated storage. The "fairshare" policy is defined as follows: + ```yaml + Lotman: + EnabledPolicy: "fairshare" + DefaultLotExpirationLifetime: "2016h" + DefaultLotDeletionLifetime: "4032h" + PolicyDefinitions: + - PolicyName: "fairshare" + DivideUnallocated: true + PurgeOrder: ["del", "exp", "opp", "ded"] + DiscoverPrefixes: true + MergeLocalWithDiscovered: false + ``` +type: string +default: "fairshare" +components: ["cache"] +--- +name: Lotman.DefaultLotExpirationLifetime +description: |+ + The default expiration lifetime for lots that have not provided an explicit expiration time. Valid time units are: + - ns for nanoseconds + - us (or µs) for microseconds + - ms for milliseconds + - s for seconds + - m for minutes + - h for hours + + This value is fed to Lotman as a unix timestamp in microseconds, adjusted from the current time. +type: duration +default: 2016h +components: ["cache"] +--- +name: Lotman.DefaultLotDeletionLifetime +description: |+ + The default deletion lifetime for lots that have not provided an explicit deletion time. Valid time units are: + - ns for nanoseconds + - us (or µs) for microseconds + - ms for milliseconds + - s for seconds + - m for minutes + - h for hours + + This value is fed to Lotman as a unix timestamp in microseconds, adjusted from the current time. +type: duration +default: 4032h +components: ["cache"] diff --git a/launchers/cache_serve.go b/launchers/cache_serve.go index 6822ee876..3c78d2d77 100644 --- a/launchers/cache_serve.go +++ b/launchers/cache_serve.go @@ -74,8 +74,16 @@ func CacheServe(ctx context.Context, engine *gin.Engine, egrp *errgroup.Group, m log.Debugln("Registering Lotman API") lotman.RegisterLotman(ctx, engine.Group("/")) } - // Bind the c library funcs to Go - if success := lotman.InitLotman(); !success { + + // Until https://github.com/PelicanPlatform/lotman/issues/24 is closed, we can only really logic over + // top-level prefixes because enumerating all object "directories" under a given federation prefix is + // infeasible, but is currently the only way to nest namespaces in Lotman such that a sub namespace + // can be assosciated with a top-level prefix. + // To that end, we need to filter out any nested namespaces from the cache server's namespace ads. + uniqueTopPrefixes := server_utils.FilterTopLevelPrefixes(cacheServer.GetNamespaceAds()) + + // Bind the c library funcs to Go, instantiate lots, set up the Lotman database, etc + if success := lotman.InitLotman(uniqueTopPrefixes); !success { return nil, errors.New("Failed to initialize lotman") } } diff --git a/lotman/lotman.go b/lotman/lotman.go index 69ac4d302..637b53bfd 100644 --- a/lotman/lotman.go +++ b/lotman/lotman.go @@ -29,13 +29,15 @@ import ( "github.com/gin-gonic/gin" log "github.com/sirupsen/logrus" + + "github.com/pelicanplatform/pelican/server_structs" ) func RegisterLotman(ctx context.Context, router *gin.RouterGroup) { log.Warningln("LotMan is not supported on this platform. Skipping...") } -func InitLotman() bool { +func InitLotman(adsFromFed []server_structs.NamespaceAdV2) bool { log.Warningln("LotMan is not supported on this platform. Skipping...") return false } diff --git a/lotman/lotman_linux.go b/lotman/lotman_linux.go index 1719911e3..2612a7918 100644 --- a/lotman/lotman_linux.go +++ b/lotman/lotman_linux.go @@ -29,15 +29,21 @@ import ( "fmt" "os" "runtime" + "strconv" + "strings" "sync" + "syscall" + "time" "unsafe" "github.com/ebitengine/purego" "github.com/pkg/errors" log "github.com/sirupsen/logrus" + "github.com/spf13/viper" "github.com/pelicanplatform/pelican/config" "github.com/pelicanplatform/pelican/param" + "github.com/pelicanplatform/pelican/server_structs" ) var ( @@ -74,10 +80,10 @@ type ( Value int64 } - LotPaths struct { + LotPath struct { Path string `json:"path" mapstructure:"Path"` Recursive bool `json:"recursive" mapstructure:"Recursive"` - LotName string `json:"lot_name,omitempty"` + LotName string `json:"lot_name,omitempty"` // Not used when creating lots, but some queries will populate } LotValueMapInt struct { @@ -137,7 +143,7 @@ type ( Parents []string `json:"parents" mapstructure:"Parents"` // While we _could_ expose Children, that complicates things so for now we keep it hidden from the config Children *[]string `json:"children,omitempty"` - Paths []LotPaths `json:"paths,omitempty" mapstructure:"Paths"` + Paths []LotPath `json:"paths,omitempty" mapstructure:"Paths"` MPA *MPA `json:"management_policy_attrs,omitempty" mapstructure:"ManagementPolicyAttrs"` // Again, these are derived RestrictiveMPA *RestrictiveMPA `json:"restrictive_management_policy_attrs,omitempty"` @@ -162,6 +168,19 @@ type ( Paths *[]PathUpdate `json:"paths,omitempty"` MPA *MPA `json:"management_policy_attrs,omitempty"` } + + PurgePolicy struct { + PurgeOrder []string `json:"purge_order"` + PolicyName string `json:"policy_name"` + DiscoverPrefixes bool `json:"discover_prefixes"` + MergeLocalWithDiscovered bool `json:"merge_local_with_discovered"` + DivideUnallocated bool `json:"divide_unallocated"` + Lots []Lot `json:"lots"` + } +) + +const ( + bytesInGigabyte = 1000 * 1000 * 1000 ) // Lotman has a tendency to return an int as 123.0 instead of 123. This struct is used to unmarshal @@ -314,9 +333,580 @@ func getFederationIssuer() (string, error) { return federationIssuer, nil } +// Given MPA1 and MPA2, merge them into a single MPA. If a field is set in MPA1, +// it will take precedence. +func mergeMPAs(mpa1, mpa2 *MPA) *MPA { + // Handle nil cases + if mpa1 == nil && mpa2 == nil { + return nil + } + if mpa1 == nil { + return mpa2 + } + if mpa2 == nil { + return mpa1 + } + + // Merge individual fields + mergedMPA := *mpa1 + if mpa1.DedicatedGB == nil { + mergedMPA.DedicatedGB = mpa2.DedicatedGB + } + if mpa1.OpportunisticGB == nil { + mergedMPA.OpportunisticGB = mpa2.OpportunisticGB + } + if mpa1.MaxNumObjects == nil { + mergedMPA.MaxNumObjects = mpa2.MaxNumObjects + } + if mpa1.CreationTime == nil { + mergedMPA.CreationTime = mpa2.CreationTime + } + if mpa1.ExpirationTime == nil { + mergedMPA.ExpirationTime = mpa2.ExpirationTime + } + if mpa1.DeletionTime == nil { + mergedMPA.DeletionTime = mpa2.DeletionTime + } + + return &mergedMPA +} + +// Given lot1 and lot2, merge them into a single lot. If a field is set in lot1, +// it will take precedence. Lots cannot be merged if they have separate names +func mergeLots(lot1, lot2 Lot) (Lot, error) { + if lot1.LotName != lot2.LotName { + return Lot{}, errors.Errorf("cannot merge lots %s and %s because they have different names", lot1.LotName, lot2.LotName) + } + mergedLot := lot1 + + // Prefer lot1's owner + if lot1.Owner == "" { + mergedLot.Owner = lot2.Owner + } + + // Calculate union between the parents -- if this gets us in trouble by introducing cycles, + // lotman will tell us on startup (hopefully...). + parentSet := make(map[string]bool) + for _, parent := range lot1.Parents { + parentSet[parent] = true + } + for _, parent := range lot2.Parents { + if !parentSet[parent] { + mergedLot.Parents = append(mergedLot.Parents, parent) + parentSet[parent] = true + } + } + + // Merge the MPAs + mergedLot.MPA = mergeMPAs(lot1.MPA, lot2.MPA) + + return mergedLot, nil +} + +// Calculate the union of two lot maps. If a lot is present in both maps, merge them. +func mergeLotMaps(map1, map2 map[string]Lot) (map[string]Lot, error) { + result := make(map[string]Lot) + + // Add all entries from map1 to result + for key, value := range map1 { + result[key] = value + } + + // Merge entries from map2 into result + for key, value := range map2 { + if existingValue, exists := result[key]; exists { + mergedLot, err := mergeLots(existingValue, value) + if err != nil { + return result, err + } + result[key] = mergedLot + } else { + result[key] = value + } + } + + return result, nil +} + +// Grab a map of policy definitions from the config file, where the policy +// name is the key and its attributes comprise the value. +func getPolicyMap() (map[string]PurgePolicy, error) { + policyMap := make(map[string]PurgePolicy) + var policies []PurgePolicy + err := viper.UnmarshalKey("Lotman.PolicyDefinitions", &policies) + if err != nil { + return policyMap, errors.Wrap(err, "error unmarshaling Lotman policy definitions") + } + + for _, policy := range policies { + policyMap[policy.PolicyName] = policy + } + + return policyMap, nil +} + +// Given a filesystem path, try to get the amount of total and free disk space. +func getDiskUsage(path string) (total uint64, free uint64, err error) { + var stat syscall.Statfs_t + + err = syscall.Statfs(path, &stat) + if err != nil { + return 0, 0, err + } + + // Total space is the block size multiplied by the total number of blocks + total = stat.Blocks * uint64(stat.Bsize) + + // Free space is the block size multiplied by the number of free blocks + free = stat.Bfree * uint64(stat.Bsize) + + return total, free, nil +} + +func bytesToGigabytes(bytes uint64) float64 { + return float64(bytes) / bytesInGigabyte +} + +func gigabytesToBytes(gb float64) uint64 { + return uint64(gb * bytesInGigabyte) +} + +// Given the list of lots and the total disk space available to the cache, validate that +// each lot has all necessary creation fields and that their values are within reasonable bounds. +// In particular, we want to make sure that the sum of all lots' dedicatedGB values does not exceed +// the high watermark of the cache, as this would allow the cache to purge data from namespaces +// that are using less than their dedicated quota. +func validateLotsConfig(lots []Lot, totalDiskSpaceB uint64) error { + hwmStr := param.Cache_HighWaterMark.GetString() + hwm, err := convertWatermarkToBytes(hwmStr, totalDiskSpaceB) + if err != nil { + return errors.Wrap(err, "error converting high watermark to byte value for Lotman") + } + + totalDedicatedGB := 0.0 + for _, lot := range lots { + // Skip the root lot, which is a container we use to make sure all lots have a federation-owned parent. + // We don't use the root lot for any other purpose. + if lot.LotName == "root" { + continue + } + // Instead of returning on the first missing field, try to get everything for the entire lot. + // We could also do this for _all_ lots before returning, but that may be an overwhelming error + // message. This way, the user can focus on one lot at a time. + missingValues := make([]string, 0) + if lot.LotName == "" { + return errors.New(fmt.Sprintf("detected a lot with no name: %+v", lot)) + } + errMsg := fmt.Sprintf("the lot '%s' is missing required values:", lot.LotName) + + if lot.Owner == "" { + missingValues = append(missingValues, "Owner") + } + if len(lot.Parents) == 0 { + missingValues = append(missingValues, "Parents") + } else { + for _, parent := range lot.Parents { + if parent == "" { + missingValues = append(missingValues, "Parents") + } + } + } + if len(lot.Paths) == 0 { + // Default lot doesn't need paths, but everybody else does + if lot.LotName != "default" { + missingValues = append(missingValues, "Paths") + } + } else { + for _, path := range lot.Paths { + if path.Path == "" { + missingValues = append(missingValues, "Paths.Path") + } + } + } + + if lot.MPA == nil { + missingValues = append(missingValues, "ManagementPolicyAttrs") + } else { + if lot.MPA.DedicatedGB == nil { + missingValues = append(missingValues, "ManagementPolicyAttrs.DedicatedGB") + } else { + totalDedicatedGB += *lot.MPA.DedicatedGB + } + if lot.MPA.OpportunisticGB == nil { + missingValues = append(missingValues, "ManagementPolicyAttrs.OpportunisticGB") + } + // No checking for MaxNumObjects -- the purge plugin doesn't use it yet + if lot.MPA.CreationTime == nil || lot.MPA.CreationTime.Value == 0 { + missingValues = append(missingValues, "ManagementPolicyAttrs.CreationTime") + } + if lot.MPA.ExpirationTime == nil || lot.MPA.ExpirationTime.Value == 0 { + missingValues = append(missingValues, "ManagementPolicyAttrs.ExpirationTime") + } + if lot.MPA.DeletionTime == nil || lot.MPA.DeletionTime.Value == 0 { + missingValues = append(missingValues, "ManagementPolicyAttrs.DeletionTime") + } + } + + if len(missingValues) > 0 { + return errors.New(fmt.Sprintf("%s %v", errMsg, missingValues)) + } + + // We don't apply validation to the opportunistic GB, as it's not a hard limit and the user + // may wish to do something unexpected. However, the sum of dedicated GB should not exceed the HWM + // or the cache may expose some data to purging when it should be protected. + if totalDedicatedGB > bytesToGigabytes(hwm) { + return errors.New(fmt.Sprintf("the sum of all lots' dedicatedGB values exceeds the high watermark of %s. This would allow the cache to purge namespaces using less than their dedicated quota", hwmStr)) + } + } + + return nil +} + +// HWM and LWM values may be a percentage (e.g. 95) indicating the amount of available disk +// to treat as the watermark, or they may be a suffixed byte value (e.g. 100G). We need this +// information in bytes to calculate the amount of space to allocate to each lot. +func convertWatermarkToBytes(value string, totalDiskSpace uint64) (uint64, error) { + suffixMultipliers := map[string]uint64{ + "k": 1000, + "m": 1000 * 1000, + "g": 1000 * 1000 * 1000, + "t": 1000 * 1000 * 1000 * 1000, + } + + // Check if the value has a suffix + if len(value) > 1 { + suffix := strings.ToLower(string(value[len(value)-1])) + if multiplier, exists := suffixMultipliers[suffix]; exists { + number, err := strconv.ParseUint(value[:len(value)-1], 10, 64) + if err != nil { + return 0, err + } + return number * multiplier, nil + } + } + + // If no suffix, treat as percentage + percentage, err := strconv.ParseFloat(strings.TrimSuffix(value, "%"), 64) + if err != nil { + return 0, err + } + return uint64((percentage / 100) * float64(totalDiskSpace)), nil +} + + +// Divide the remaining space among lots' dedicatedGB values -- we don't ever want to +// dedicate more space than we have available, as indicated by the HWM of the cache. This is because +// our model is that as long as a lot stays under its dedicated GB, its data is safe in the cache -- +// If the sum of each lot's dedicated GB exceeds the HWM, the cache may purge data without a single lot +// exceeding it's quota. BAD! +// +// Opportunistic space can (and should) be overallocated, so unless explicitly set, each lot will have +// dedicatedGB + opportunisticGB = HWM. This isn't maxed out to the total disk space, because otherwise +// no lot could ever exceed its opportunistic storage and we'd lose some of the capabilities to reason about +// how greedy the lot is. +func divideRemainingSpace(lotMap *map[string]Lot, totalDiskSpaceB uint64) error { + hwmStr := param.Cache_HighWaterMark.GetString() + if hwmStr == "" { + return errors.New("high watermark is not set in the cache configuration") + } + hwm, err := convertWatermarkToBytes(hwmStr, totalDiskSpaceB) + if err != nil { + return errors.Wrap(err, "error converting high watermark to byte value for Lotman") + } + remainingToHwmB := hwm + + // first iterate through all lots and subtract from our total space any amount + // that's already been allocated. Note which lots have an unset value, as we'll + // need to return to them. + returnToKeys := make([]string, 0, len(*lotMap)) + for key, lot := range *lotMap { + if lot.LotName == "root" { + continue + } + if lot.MPA != nil && lot.MPA.DedicatedGB != nil { + remainingToHwmB -= gigabytesToBytes(*lot.MPA.DedicatedGB) + if lot.MPA.OpportunisticGB == nil { + oGb := bytesToGigabytes(hwm) - *lot.MPA.DedicatedGB + lot.MPA.OpportunisticGB = &oGb + } + } else { + returnToKeys = append(returnToKeys, lot.LotName) + } + (*lotMap)[key] = lot + } + + if len(returnToKeys) > 0 { + // now iterate through the lots that need space allocated and assign them the + // remaining space + spacePerLotRemainingB := remainingToHwmB / uint64(len(returnToKeys)) + for _, key := range returnToKeys { + lot := (*lotMap)[key] + if lot.MPA == nil { + lot.MPA = &MPA{} + } + dGb := bytesToGigabytes(spacePerLotRemainingB) + oGb := bytesToGigabytes(hwm - spacePerLotRemainingB) + lot.MPA.DedicatedGB = &dGb + if lot.MPA.OpportunisticGB == nil { + lot.MPA.OpportunisticGB = &oGb + } + lot.MPA.MaxNumObjects = &Int64FromFloat{Value: 0} // Purge plugin doesn't yet use this, set to 0. + (*lotMap)[key] = lot + } + } + + return nil +} + +// Lots have unix millisecond timestamps for creation, expiration, and deletion. If these are not set in the +//config, we'll set them to the current time. Expiration and deletion times are set to the default lifetime +func configLotTimestamps(lotMap *map[string]Lot) { + now := time.Now().UnixMilli() + defaultExpiration := now + param.Lotman_DefaultLotExpirationLifetime.GetDuration().Milliseconds() + defaultDeletion := now + param.Lotman_DefaultLotDeletionLifetime.GetDuration().Milliseconds() + + for name, lot := range *lotMap { + if lot.MPA == nil { + lot.MPA = &MPA{} + } + if lot.MPA.CreationTime == nil || lot.MPA.CreationTime.Value == 0 { + lot.MPA.CreationTime = &Int64FromFloat{Value: now} + } + if lot.MPA.ExpirationTime == nil || lot.MPA.ExpirationTime.Value == 0 { + lot.MPA.ExpirationTime = &Int64FromFloat{Value: defaultExpiration} + } + + if lot.MPA.DeletionTime == nil || lot.MPA.DeletionTime.Value == 0 { + lot.MPA.DeletionTime = &Int64FromFloat{Value: defaultDeletion} + } + + (*lotMap)[name] = lot + } +} + +// By default, Lotman should discover namespaces from the Director and try to create the relevant top-level +// lots for those namespaces. This function creates those lots, but they may be merged with local config +// at a later time. +func configLotsFromFedPrefixes(nsAds []server_structs.NamespaceAdV2) (map[string]Lot, error) { + directorLotMap := make(map[string]Lot) + federationIssuer, err := getFederationIssuer() + if err != nil { + return directorLotMap, errors.Wrap(err, "Unable to determine federation issuer which is needed by Lotman to determine lot ownership") + } + if federationIssuer == "" { + return directorLotMap, errors.New("The detected federation issuer, which is needed by Lotman to determine lot/namespace ownership, is empty") + } + for _, nsAd := range nsAds { + // Skip monitoring namespaces + if strings.HasPrefix(nsAd.Path, "/pelican/monitoring") { + continue + } + var issuer string + if len(nsAd.Issuer) > 0 { + issuer = (nsAd.Issuer[0]).IssuerUrl.String() + } else { + issuer = federationIssuer + } + + directorLotMap[nsAd.Path] = Lot{ + LotName: nsAd.Path, + Owner: issuer, // grab the first issuer -- lotman doesn't currently support multiple direct owners + // Assign parent as the root lot at the cache. This lets root edit the lot, but still allows the owner of the namespace to further subdivide + Parents: []string{"root"}, + Paths: []LotPath{ + { + Path: nsAd.Path, + Recursive: true, + }, + }, + } + } + + return directorLotMap, nil +} + +// One limitation in Lotman is that a lot cannot be created unless all of its parents exist. Unfortunately, +// this means we have to sort our lots topologically to ensure that we create them in the correct order. +// Failure to do so appears to result in a segfault in Lotman. +func topoSort(lotMap map[string]Lot) ([]Lot, error) { + sorted := make([]Lot, 0, len(lotMap)) + visited := make(map[string]bool) + + // Recursively visit each lot and its parents, DFS-style + var visit func(string) error + visit = func(name string) error { + if visited[name] { + return nil + } + visited[name] = true + + // Visit all parents first + for _, parent := range lotMap[name].Parents { + if err := visit(parent); err != nil { + return err + } + } + // Adding the leaves of the DFS parent tree to the sorted list + // guarantees that we'll add the parents before the children + sorted = append(sorted, lotMap[name]) + return nil + } + + for name := range lotMap { + if err := visit(name); err != nil { + return nil, err + } + } + + return sorted, nil +} + + +// Initialize the lot configurations based on provided policy, discovered namespaces, +// and available cache space, handling any necessary merges and validations along the way. +func initLots(nsAds []server_structs.NamespaceAdV2) ([]Lot, error) { + var internalLots []Lot + + policies, err := getPolicyMap() + if err != nil { + return internalLots, errors.Wrap(err, "unable to parse lotman configuration") + } + + // Get the configured policy, which defines any lots we may need to handle + // along with merging logic and purge ordering + policyName := param.Lotman_EnabledPolicy.GetString() + if _, exists := policies[policyName]; !exists { + return internalLots, errors.Errorf("enabled policy %s is not defined in the configuration", policyName) + } + policy := policies[policyName] + + discoverPrefixes := policy.DiscoverPrefixes + shouldMerge := policy.MergeLocalWithDiscovered + if shouldMerge && !discoverPrefixes { + return internalLots, errors.New("MergeLocalWithDiscovered is set to true, but DiscoverPrefixes is set to false. This is not a valid configuration") + } + + // policyLotMap will hold the lots defined in the configuration file (if any) provided by the cache admin + policyLotMap := make(map[string]Lot) + for _, lot := range policy.Lots { + policyLotMap[lot.LotName] = lot + } + + var lotMap map[string]Lot + if discoverPrefixes { + directorLotMap, err := configLotsFromFedPrefixes(nsAds) + if err != nil { + return internalLots, errors.Wrap(err, "error configuring lots from federation prefixes") + } + + // Handle potential need to merge discovered namespaces with provided configuration + if shouldMerge { + log.Debug("Merging lot configuration from discovered namespaces with configured lots") + lotMap, err = mergeLotMaps(directorLotMap, policyLotMap) + if err != nil { + return internalLots, errors.Wrap(err, "error merging discovered namespaces with configured lots") + } + log.Tracef("Merged lot configuration: %+v", lotMap) + } else { + lotMap = make(map[string]Lot) + // first set things up with the director lots, then overwrite with the policy lots. + // This allows cache admins to override any discovered lots with their own configuration. + for key, value := range directorLotMap { + lotMap[key] = value + } + for key, value := range policyLotMap { + lotMap[key] = value + } + } + } else { + lotMap = policyLotMap + } + + cacheDisks := param.Cache_DataLocations.GetStringSlice() + log.Tracef("Cache data locations being tracked by Lotman: %v", cacheDisks) + var totalDiskSpaceB uint64 + for _, disk := range cacheDisks { + diskSpace, _, err := getDiskUsage(disk) + if err != nil { + return internalLots, errors.Wrapf(err, "error getting disk usage for filesystem path %s", disk) + } + totalDiskSpaceB += diskSpace + } + + // Now guarantee our special "default" and "root" lots if the user hasn't provided them + // in the config. For now, these lots must always exist because they're used to make sure + // all data is tied to a lot (default) and that the requirement of a root lot is satisfied + // without allowing discovered lots to gain rootly status. + federationIssuer, err := getFederationIssuer() + if err != nil { + return internalLots, errors.Wrap(err, "Unable to determine federation issuer which is needed by Lotman to determine lot ownership") + } + if federationIssuer == "" { + return internalLots, errors.New("The detected federation issuer, which is needed by Lotman to determine lot/namespace ownership, is empty") + } + rootDedGB := bytesToGigabytes(totalDiskSpaceB) + zero := float64(0) + if _, exists := lotMap["default"]; !exists { + lotMap["default"] = Lot{ + LotName: "default", + Owner: federationIssuer, + Parents: []string{"default"}, + MPA: &MPA{ + // Set default values to 0 and let potential reallocation happen later. + DedicatedGB: &zero, + OpportunisticGB: &zero, + MaxNumObjects: &Int64FromFloat{Value: 0}, // Purge plugin doesn't yet use this, set to 0. + }, + + } + } + if _, exists := lotMap["root"]; !exists { + lotMap["root"] = Lot{ + LotName: "root", + Owner: federationIssuer, + Parents: []string{"root"}, + + Paths: []LotPath{ + { + Path: "/", + Recursive: false, // setting this to true would prevent any other lot from claiming a path + }, + }, + MPA: &MPA{ + // Max out dedicatedGB so the root lot never purges. All other lots should be tied to their own policies. + DedicatedGB: &rootDedGB, + OpportunisticGB: &zero, + MaxNumObjects: &Int64FromFloat{Value: 0}, // Purge plugin doesn't yet use this, set to 0. + }, + } + } + + log.Tracef("Lotman will split lot disk space quotas amongst the discovered disk space: %vB", totalDiskSpaceB) + if policy.DivideUnallocated { + log.Traceln("Dividing unallocated space among lots") + divideRemainingSpace(&lotMap, totalDiskSpaceB) + } + + // Set up lot timestamps (creation, expiration, deletion) if needed + configLotTimestamps(&lotMap) + + internalLots, err = topoSort(lotMap) + if err != nil { + return internalLots, errors.Wrap(err, "error sorting lots prior to instantiation") + } + + log.Tracef("Internal lot configuration: %+v", internalLots) + err = validateLotsConfig(internalLots, totalDiskSpaceB) + if err != nil { + return internalLots, errors.Wrap(err, "error validating deduced lot configuration") + } + + return internalLots, nil +} + // Initialize the LotMan library and bind its functions to the global vars // We also perform a bit of extra setup such as setting the lotman db location -func InitLotman() bool { +func InitLotman(adsFromFed []server_structs.NamespaceAdV2) bool { log.Infof("Initializing LotMan...") // dlopen the LotMan library @@ -358,12 +948,10 @@ func InitLotman() bool { return false } - defaultInitialized := false - rootInitialized := false - - err = param.Lotman_Lots.Unmarshal(&initializedLots) + initializedLots, err = initLots(adsFromFed) if err != nil { - log.Warningf("Error while unmarshaling Lots from config: %v", err) + log.Errorf("Error creating lot config: %v", err) + return false } federationIssuer, err := getFederationIssuer() @@ -387,14 +975,13 @@ func InitLotman() bool { // Create the basic lots if they don't already exist. We'll make one for default // and one for the root namespace + defaultInitialized := false ret = LotmanLotExists("default", &errMsg) if ret < 0 { trimBuf(&errMsg) log.Errorf("Error checking if default lot exists: %s", string(errMsg)) return false } else if ret == 0 { - // First we try to create the lots that might be configured via Pelican.yaml. If there are none, we'll use - // a few default values for _, lot := range initializedLots { if lot.LotName == "default" { log.Debugf("Creating the default lot defined by %v", lot) @@ -414,56 +1001,18 @@ func InitLotman() bool { } } - if !defaultInitialized { - // Create the default lot - if federationIssuer == "" { - log.Errorf("your federation's issuer could not be deduced from your config's federation discovery URL or director URL") - return false - } - - initDedicatedGB := float64(0) - initOpportunisticGB := float64(0) - defaultLot := Lot{ - LotName: "default", - // Set the owner to the Federation's discovery url -- under this model, we can treat it like an issuer - Owner: federationIssuer, - // A self-parent lot indicates superuser status - Parents: []string{"default"}, - MPA: &MPA{ - DedicatedGB: &initDedicatedGB, - OpportunisticGB: &initOpportunisticGB, - MaxNumObjects: &Int64FromFloat{Value: 0}, - CreationTime: &Int64FromFloat{Value: 0}, - ExpirationTime: &Int64FromFloat{Value: 0}, - DeletionTime: &Int64FromFloat{Value: 0}, - }, - } - - log.Debugf("Creating the default lot defined by %v", defaultLot) - lotJSON, err := json.Marshal(defaultLot) - if err != nil { - log.Errorf("Error marshalling default lot JSON: %v", err) - return false - } - - ret = LotmanAddLot(string(lotJSON), &errMsg) - if ret != 0 { - trimBuf(&errMsg) - log.Errorf("Error creating default lot: %s", string(errMsg)) - return false - } - } - log.Infof("Created default lot") + } else if ret == 1 { + log.Infoln("Default lot already exists, skipping creation") } + rootInitialized := false ret = LotmanLotExists("root", &errMsg) if ret < 0 { trimBuf(&errMsg) log.Errorf("Error checking if root lot exists: %s", string(errMsg)) return false } else if ret == 0 { - // Try to create the root lot based on what we have in the config for _, lot := range initializedLots { if lot.LotName == "root" { lotJSON, err := json.Marshal(lot) @@ -482,67 +1031,45 @@ func InitLotman() bool { } } - if !rootInitialized { - // Create the root lot based on predefined setup - if federationIssuer == "" { - log.Errorf("your federation's issuer could not be deduced from your config's federation discovery URL or director URL") - return false - } - - initDedicatedGB := float64(0) - initOpportunisticGB := float64(0) - rootLot := Lot{ - LotName: "root", - Owner: federationIssuer, - // A self-parent lot indicates superuser status - Parents: []string{"root"}, - Paths: []LotPaths{ - { - Path: "/", - Recursive: false, - }, - }, - MPA: &MPA{ - DedicatedGB: &initDedicatedGB, - OpportunisticGB: &initOpportunisticGB, - MaxNumObjects: &Int64FromFloat{Value: 0}, - CreationTime: &Int64FromFloat{Value: 0}, - ExpirationTime: &Int64FromFloat{Value: 0}, - DeletionTime: &Int64FromFloat{Value: 0}, - }, - } - - log.Debugf("Creating the root lot defined by %v", rootLot) - lotJSON, err := json.Marshal(rootLot) - if err != nil { - log.Errorf("Error marshalling root lot JSON: %v", err) - return false - } - - ret = LotmanAddLot(string(lotJSON), &errMsg) - if ret != 0 { - trimBuf(&errMsg) - log.Errorf("Error creating root lot: %s", string(errMsg)) - return false - } - } log.Infof("Created root lot") + } else if ret == 1 { + log.Infoln("Root lot already exists, skipping creation") + } + + if !defaultInitialized || !rootInitialized { + log.Errorln("Failed to create default and/or root lots") + return false } // Now instantiate any other lots that are in the config for _, lot := range initializedLots { if lot.LotName != "default" && lot.LotName != "root" { - lotJSON, err := json.Marshal(lot) - if err != nil { - log.Errorf("Error marshalling lot JSON for %s: %v", lot.LotName, err) + // Don't try to re-create lots that may already exist, as doing so could prevent + // the cache from restarting. + // TODO: Work out how to handle this case -- we may need to update the lot instead of creating it + ret = LotmanLotExists(lot.LotName, &errMsg) + if ret < 0 { + trimBuf(&errMsg) + log.Errorf("Error checking if lot '%s'exists: %s", lot.LotName, string(errMsg)) return false - } + } else if ret == 0 { + lotJSON, err := json.Marshal(lot) + if err != nil { + log.Errorf("Error marshalling lot JSON for %s: %v", lot.LotName, err) + return false + } - ret = LotmanAddLot(string(lotJSON), &errMsg) - if ret != 0 { - trimBuf(&errMsg) - log.Errorf("Error creating lot %s: %s", lot.LotName, string(errMsg)) - log.Infoln("Full lot JSON passed to Lotman for lot creation:", string(lotJSON)) + ret = LotmanAddLot(string(lotJSON), &errMsg) + if ret != 0 { + trimBuf(&errMsg) + log.Errorf("Error creating lot %s: %s", lot.LotName, string(errMsg)) + log.Infoln("Full lot JSON passed to Lotman for lot creation:", string(lotJSON)) + return false + } + } else if ret == 1 { + log.Infof("Lot '%s' already exists, skipping creation", lot.LotName) + } else { + log.Errorf("Unexpected return value from Lotman for lot '%s' existence check: %d", lot.LotName, ret) return false } } diff --git a/lotman/lotman_test.go b/lotman/lotman_test.go index b5a12c504..7041db1a7 100644 --- a/lotman/lotman_test.go +++ b/lotman/lotman_test.go @@ -27,51 +27,86 @@ import ( "fmt" "net/http" "net/http/httptest" + "net/url" "os" "strings" "testing" + "time" - "github.com/pelicanplatform/pelican/server_utils" log "github.com/sirupsen/logrus" "github.com/spf13/viper" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/pelicanplatform/pelican/config" + "github.com/pelicanplatform/pelican/pelican_url" + "github.com/pelicanplatform/pelican/server_structs" + "github.com/pelicanplatform/pelican/server_utils" ) //go:embed resources/lots-config.yaml var yamlMockup string +// Helper function for determining policy index from lot config yaml +func findPolicyIndex(policyName string, policies []PurgePolicy) int { + for i, policy := range policies { + if policy.PolicyName == policyName { + return i + } + } + return -1 +} + // Initialize Lotman // If we read from the embedded yaml, we need to override the SHOULD_OVERRIDE keys with the discUrl // so that underlying metadata discovery can happen against the mock discovery host -func setupLotmanFromConf(t *testing.T, readConfig bool, name string, discUrl string) (bool, func()) { +func setupLotmanFromConf(t *testing.T, readConfig bool, name string, discUrl string, nsAds []server_structs.NamespaceAdV2) (bool, func()) { // Load in our config and handle overriding the SHOULD_OVERRIDE keys with the discUrl - // Load in our config - if readConfig { - viper.SetConfigType("yaml") - err := viper.ReadConfig(strings.NewReader(yamlMockup)) - if err != nil { - t.Fatalf("Error reading config: %v", err) - } - - // Traverse the settings and modify the "Owner" keys directly in Viper - lots := viper.Get("Lotman.Lots").([]interface{}) - for i, lot := range lots { - if lotMap, ok := lot.(map[string]interface{}); ok { - if owner, ok := lotMap["owner"].(string); ok && owner == "SHOULD_OVERRIDE" { - lotMap["owner"] = discUrl - lots[i] = lotMap - } - } - } - viper.Set("Lotman.Lots", lots) - } + // Load in our config + viper.Set("Cache.HighWaterMark", "100g") + viper.Set("Cache.LowWaterMark", "50g") + viper.Set("Debug", true) + if readConfig { + viper.SetConfigType("yaml") + err := viper.ReadConfig(strings.NewReader(yamlMockup)) + if err != nil { + t.Fatalf("Error reading config: %v", err) + } + + // Grab the policy, figure out which one we're using and override the lot issuers/owners + var policies []PurgePolicy + err = viper.UnmarshalKey("Lotman.PolicyDefinitions", &policies) + require.NoError(t, err) + enabledPolicy := viper.GetString("Lotman.EnabledPolicy") + policyIndex := findPolicyIndex(enabledPolicy, policies) + if policyIndex == -1 { + t.Fatalf("Policy %s not found", enabledPolicy) + } + policy := policies[policyIndex] + + for i, lot := range policy.Lots { + if lot.Owner == "SHOULD_OVERRIDE" { + lot.Owner = discUrl + policy.Lots[i] = lot + } + } + + // Update the policy in viper + policies[policyIndex] = policy + viper.Set("Lotman.PolicyDefinitions", policies) + } else { + // If we're not reading from the embedded yaml, grab the + // default configuration. We need _some_ configuration to work. + viper.Set("ConfigDir", t.TempDir()) + config.InitConfig() + } tmpPathPattern := name + "*" tmpPath, err := os.MkdirTemp("", tmpPathPattern) require.NoError(t, err) viper.Set("Lotman.DbLocation", tmpPath) - success := InitLotman() + success := InitLotman(nsAds) //reset func return success, func() { server_utils.ResetTestState() @@ -80,20 +115,20 @@ func setupLotmanFromConf(t *testing.T, readConfig bool, name string, discUrl str // Create a mock discovery host that returns the servers URL as the value for each pelican-configuration key func getMockDiscoveryHost() *httptest.Server { - return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == "/.well-known/pelican-configuration" { - w.Header().Set("Content-Type", "application/json") - serverURL := r.Host - response := fmt.Sprintf(`{ + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/.well-known/pelican-configuration" { + w.Header().Set("Content-Type", "application/json") + serverURL := r.Host + response := fmt.Sprintf(`{ "director_endpoint": "https://%s/osdf-director.osg-htc.org", "namespace_registration_endpoint": "https://%s/osdf-registry.osg-htc.org", "jwks_uri": "https://%s/osdf/public_signing_key.jwks" }`, serverURL, serverURL, serverURL) - w.Write([]byte(response)) - } else { - http.NotFound(w, r) - } - })) + w.Write([]byte(response)) + } else { + http.NotFound(w, r) + } + })) } // Test the library initializer. NOTE: this also tests CreateLot, which is a part of initialization. @@ -102,7 +137,7 @@ func TestLotmanInit(t *testing.T) { t.Run("TestBadInit", func(t *testing.T) { // We haven't set various bits needed to create the lots, like discovery URL - success, cleanup := setupLotmanFromConf(t, false, "LotmanBadInit", "") + success, cleanup := setupLotmanFromConf(t, false, "LotmanBadInit", "", nil) defer cleanup() require.False(t, success) }) @@ -114,7 +149,7 @@ func TestLotmanInit(t *testing.T) { // Lotman uses the discovered URLs/keys to determine some aspects of lot ownership viper.Set("Federation.DiscoveryUrl", server.URL) - success, cleanup := setupLotmanFromConf(t, false, "LotmanGoodInit", server.URL) + success, cleanup := setupLotmanFromConf(t, false, "LotmanGoodInit", server.URL, nil) defer cleanup() require.True(t, success) @@ -159,7 +194,7 @@ func TestLotmanInitFromConfig(t *testing.T) { server_utils.ResetTestState() server := getMockDiscoveryHost() viper.Set("Federation.DiscoveryUrl", server.URL) - success, cleanup := setupLotmanFromConf(t, true, "LotmanInitConf", server.URL) + success, cleanup := setupLotmanFromConf(t, true, "LotmanInitConf", server.URL, nil) defer cleanup() require.True(t, success) @@ -180,8 +215,6 @@ func TestLotmanInitFromConfig(t *testing.T) { require.Equal(t, "default", defaultLot.LotName) require.Equal(t, server.URL, defaultLot.Owner) require.Equal(t, "default", defaultLot.Parents[0]) - require.Equal(t, 100.0, *(defaultLot.MPA.DedicatedGB)) - require.Equal(t, int64(1000), (defaultLot.MPA.MaxNumObjects.Value)) // Now root rootOutput := make([]byte, 4096) @@ -197,8 +230,6 @@ func TestLotmanInitFromConfig(t *testing.T) { require.Equal(t, "root", rootLot.LotName) require.Equal(t, server.URL, rootLot.Owner) require.Equal(t, "root", rootLot.Parents[0]) - require.Equal(t, 1.0, *(rootLot.MPA.DedicatedGB)) - require.Equal(t, int64(10), rootLot.MPA.MaxNumObjects.Value) require.Equal(t, "/", rootLot.Paths[0].Path) require.False(t, rootLot.Paths[0].Recursive) @@ -260,7 +291,7 @@ func TestGetAuthzCallers(t *testing.T) { server_utils.ResetTestState() server := getMockDiscoveryHost() viper.Set("Federation.DiscoveryUrl", server.URL) - success, cleanup := setupLotmanFromConf(t, true, "LotmanGetAuthzCalleres", server.URL) + success, cleanup := setupLotmanFromConf(t, true, "LotmanGetAuthzCalleres", server.URL, nil) defer cleanup() require.True(t, success) @@ -281,7 +312,7 @@ func TestGetLot(t *testing.T) { server_utils.ResetTestState() server := getMockDiscoveryHost() viper.Set("Federation.DiscoveryUrl", server.URL) - success, cleanup := setupLotmanFromConf(t, true, "LotmanGetLot", server.URL) + success, cleanup := setupLotmanFromConf(t, true, "LotmanGetLot", server.URL, nil) defer cleanup() require.True(t, success) @@ -306,7 +337,7 @@ func TestUpdateLot(t *testing.T) { server_utils.ResetTestState() server := getMockDiscoveryHost() viper.Set("Federation.DiscoveryUrl", server.URL) - success, cleanup := setupLotmanFromConf(t, true, "LotmanInitConf", server.URL) + success, cleanup := setupLotmanFromConf(t, true, "LotmanInitConf", server.URL, nil) defer cleanup() require.True(t, success) @@ -346,7 +377,7 @@ func TestDeleteLotsRec(t *testing.T) { server_utils.ResetTestState() server := getMockDiscoveryHost() viper.Set("Federation.DiscoveryUrl", server.URL) - success, cleanup := setupLotmanFromConf(t, true, "LotmanInitConf", server.URL) + success, cleanup := setupLotmanFromConf(t, true, "LotmanInitConf", server.URL, nil) defer cleanup() require.True(t, success) @@ -363,3 +394,602 @@ func TestDeleteLotsRec(t *testing.T) { require.Error(t, err, "Expected error for non-existent lot") require.Nil(t, lot) } + +// In any case where two MPA values are both set, the value in MPA1 should win. +func TestMergeMPAs(t *testing.T) { + dedicatedGB1 := 10.0 + maxNumObjects1 := Int64FromFloat{Value: 50} + creationTime1 := Int64FromFloat{Value: 200} + deletionTime1 := Int64FromFloat{Value: 400} + + dedicatedGB2 := 20.0 + opportunisticGB2 := 30.0 + maxNumObjects2 := Int64FromFloat{Value: 100} + expirationTime2 := Int64FromFloat{Value: 300} + + mpa1 := &MPA{ + DedicatedGB: &dedicatedGB1, + MaxNumObjects: &maxNumObjects1, + CreationTime: &creationTime1, + DeletionTime: &deletionTime1, + } + + mpa2 := &MPA{ + DedicatedGB: &dedicatedGB2, + OpportunisticGB: &opportunisticGB2, + MaxNumObjects: &maxNumObjects2, + ExpirationTime: &expirationTime2, + } + + expectedMPA := &MPA{ + DedicatedGB: &dedicatedGB1, + OpportunisticGB: &opportunisticGB2, + MaxNumObjects: &maxNumObjects1, + CreationTime: &creationTime1, + ExpirationTime: &expirationTime2, + DeletionTime: &deletionTime1, + } + + mergedMPA := mergeMPAs(mpa1, mpa2) + require.Equal(t, expectedMPA, mergedMPA) +} + +func TestLotMerging(t *testing.T) { + // Owner should be set to lot1 owner + owner1 := "owner1" + owner2 := "owner2" + + // Parents should be the union of lot1 and lot2 parents + parent1 := "parent1" + parent2 := "parent2" + parent3 := "parent3" + + // MPA should be the MPA of lot1, unless no value is set + dedicatedGB1 := 10.0 + dedicatedGB2 := 20.0 + opportunisticGB2 := 30.0 + + lot1 := Lot{ + LotName: "some-lot", + Owner: owner1, + Parents: []string{parent1, parent2}, + MPA: &MPA{ + DedicatedGB: &dedicatedGB1, + }, + } + + lot2 := Lot{ + LotName: "some-lot", + Owner: owner2, + Parents: []string{parent2, parent3}, + MPA: &MPA{ + DedicatedGB: &dedicatedGB2, + OpportunisticGB: &opportunisticGB2, + }, + } + + expectedMergedLot := Lot{ + LotName: "some-lot", + Owner: owner1, + Parents: []string{parent1, parent2, parent3}, + MPA: &MPA{ + DedicatedGB: &dedicatedGB1, + OpportunisticGB: &opportunisticGB2, + }, + } + + mergedLot, err := mergeLots(lot1, lot2) + require.NoError(t, err) + require.Equal(t, expectedMergedLot.LotName, mergedLot.LotName) + require.Equal(t, expectedMergedLot.Owner, mergedLot.Owner) + require.ElementsMatch(t, expectedMergedLot.Parents, mergedLot.Parents) + require.Equal(t, expectedMergedLot.MPA.DedicatedGB, mergedLot.MPA.DedicatedGB) + require.Equal(t, expectedMergedLot.MPA.OpportunisticGB, mergedLot.MPA.OpportunisticGB) + + // Now test with no MPA set in lot1 + lot1.MPA = nil + expectedMergedLot.MPA.DedicatedGB = &dedicatedGB2 + mergedLot, err = mergeLots(lot1, lot2) + require.NoError(t, err) + require.Equal(t, expectedMergedLot.MPA.DedicatedGB, mergedLot.MPA.DedicatedGB) + + // Make sure we can't merge lots with different names + lot2.LotName = "different-lot" + mergedLot, err = mergeLots(lot1, lot2) + require.Error(t, err) + + // Test merging lot maps -- reset lot2's name so we can merge them + // Here we intentionally assign lot2 to the lot1 key to test that the merge works + // while also adding lot2 as its own key to test that the merge works with multiple lots + lot2.LotName = "some-lot" + lotMap1 := map[string]Lot{ + "lot1": lot1, + } + lotMap2 := map[string]Lot{ + "lot1": lot2, + "lot2": lot2, + } + mergedMaps, err := mergeLotMaps(lotMap1, lotMap2) + require.NoError(t, err) + require.Equal(t, 2, len(mergedMaps)) + require.Equal(t, "some-lot", mergedMaps["lot1"].LotName) + require.Equal(t, expectedMergedLot.Owner, mergedMaps["lot1"].Owner) + require.ElementsMatch(t, expectedMergedLot.Parents, mergedMaps["lot1"].Parents) + require.Equal(t, expectedMergedLot.MPA.DedicatedGB, mergedMaps["lot1"].MPA.DedicatedGB) + require.Equal(t, expectedMergedLot.MPA.OpportunisticGB, mergedMaps["lot1"].MPA.OpportunisticGB) +} + +// Read the mockup yaml and make sure we grab the list of policy definitions as expected +func TestGetPolicyMap(t *testing.T) { + server_utils.ResetTestState() + defer server_utils.ResetTestState() + viper.SetConfigType("yaml") + err := viper.ReadConfig(strings.NewReader(yamlMockup)) + if err != nil { + t.Fatalf("Error reading config: %v", err) + } + + policyMap, err := getPolicyMap() + require.NoError(t, err) + require.Equal(t, 1, len(policyMap)) + require.Contains(t, policyMap, "different-policy") + require.Equal(t, "different-policy", viper.GetString("Lotman.EnabledPolicy")) +} + +func TestByteConversions(t *testing.T) { + bytes := uint64(120530000000) // 120.53 GB + + // forward pass + gb := bytesToGigabytes(bytes) + require.Equal(t, 120.53, gb) + + // reverse pass + bytes = gigabytesToBytes(gb) + require.Equal(t, uint64(120530000000), bytes) +} + +// Valid lot configuration not only requires that all fields are present, but also that the sum of all lots' dedicatedGB values +// does not exceed the high watermark. +func TestLotValidation(t *testing.T) { + type testCase struct { + name string + lots []Lot + hwm string + totalDiskSpaceB uint64 + errorStrings []string + } + + createValidLot := func(name string, owner string, parent string, path string, dedicatedGB float64) Lot { + return Lot{ + LotName: name, + Owner: owner, + Parents: []string{parent}, + Paths: []LotPath{ + { + Path: path, + Recursive: false, + }, + }, + MPA: &MPA{ + DedicatedGB: &dedicatedGB, + OpportunisticGB: &dedicatedGB, + CreationTime: &Int64FromFloat{Value: 1}, + ExpirationTime: &Int64FromFloat{Value: 2}, + DeletionTime: &Int64FromFloat{Value: 3}, + }, + } + } + + testCases := []testCase{ + { + name: "Valid lots", + lots: []Lot{ + createValidLot("lot1", "owner1", "root", "/foo/bar", 10.0), + createValidLot("lot2", "owner2", "root", "/foo/baz", 20.0), + }, + hwm: "30g", + totalDiskSpaceB: gigabytesToBytes(40.0), + errorStrings: nil, + }, + { + name: "Missing lot name", + lots: []Lot{ + createValidLot("", "owner1", "root", "/foo/bar", 10.0), + }, + hwm: "30g", + totalDiskSpaceB: gigabytesToBytes(40.0), + errorStrings: []string{"detected a lot with no name"}, + }, + { + name: "Missing lot owner", + lots: []Lot{ + createValidLot("lot1", "", "root", "/foo/bar", 10.0), + }, + hwm: "30g", + totalDiskSpaceB: gigabytesToBytes(40.0), + errorStrings: []string{"the lot 'lot1' is missing required values", "Owner"}, + }, + { + name: "Missing lot parent", + lots: []Lot{ + createValidLot("lot1", "owner", "", "/foo/bar", 10.0), + }, + hwm: "30g", + totalDiskSpaceB: gigabytesToBytes(40.0), + errorStrings: []string{"the lot 'lot1' is missing required values", "Parents"}, + }, + { + name: "Missing lot path", + lots: []Lot{ + createValidLot("lot1", "owner", "root", "", 10.0), + }, + hwm: "30g", + totalDiskSpaceB: gigabytesToBytes(40.0), + errorStrings: []string{"the lot 'lot1' is missing required values", "Paths.Path"}, + }, + { + name: "Missing lot MPA", + lots: []Lot{ + { + LotName: "lot1", + Owner: "owner1", + Parents: []string{"root"}, + Paths: []LotPath{ + { + Path: "/foo/bar", + Recursive: false, + }, + }, + }, + }, + hwm: "30g", + totalDiskSpaceB: gigabytesToBytes(40.0), + errorStrings: []string{"the lot 'lot1' is missing required values", "ManagementPolicyAttrs"}, + }, + { + name: "Missing lot MPA subfield", + lots: []Lot{ + { + LotName: "lot1", + Owner: "owner1", + Parents: []string{"root"}, + Paths: []LotPath{ + { + Path: "/foo/bar", + Recursive: false, + }, + }, + MPA: &MPA{}, + }, + }, + hwm: "30g", + totalDiskSpaceB: gigabytesToBytes(40.0), + errorStrings: []string{"the lot 'lot1' is missing required values", "ManagementPolicyAttrs.DedicatedGB"}, + }, + { + name: "Invalid dedGB sum", + lots: []Lot{ + createValidLot("lot1", "owner1", "root", "/foo/bar", 10.0), + createValidLot("lot2", "owner2", "root", "/foo/baz", 20.0), + }, + hwm: "20g", // sum of dedGB should not be greater than hwm + totalDiskSpaceB: gigabytesToBytes(40.0), + errorStrings: []string{"the sum of all lots' dedicatedGB values exceeds the high watermark of 20g."}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + server_utils.ResetTestState() + defer server_utils.ResetTestState() + viper.Set("Cache.HighWaterMark", tc.hwm) + err := validateLotsConfig(tc.lots, tc.totalDiskSpaceB) + if len(tc.errorStrings) > 0 { + require.Error(t, err) + for _, errStr := range tc.errorStrings { + require.Contains(t, err.Error(), errStr) + } + } else { + require.NoError(t, err) + } + }) + } +} + +// Make sure we handle various suffixes and non-suffixed percentages correctly +func TestConvertWatermarkToBytes(t *testing.T) { + type testCase struct { + Name string + Watermark string + Expected uint64 + ErrorExpected bool + } + + totDisk := uint64(1000000000000) // 1TB + testCases := []testCase{ + { + Name: "Valid 'k' suffix", + Watermark: "100k", + Expected: uint64(100000), // 100KB + ErrorExpected: false, + }, + { + Name: "Valid 'm' suffix", + Watermark: "100m", + Expected: uint64(100000000), // 100MB + ErrorExpected: false, + }, + { + Name: "Valid 'g' suffix", + Watermark: "100g", + Expected: uint64(100000000000), // 100GB + ErrorExpected: false, + }, + { + Name: "Valid 't' suffix", + Watermark: "100t", + Expected: uint64(100000000000000), // 100TB + ErrorExpected: false, + }, + { + Name: "No suffix is percentage", + Watermark: "50", + Expected: uint64(500000000000), // 500GB + ErrorExpected: false, + }, + { + Name: "Invalid suffix", + Watermark: "100z", + Expected: uint64(0), + ErrorExpected: true, + }, + { + Name: "Invalid value", + Watermark: "foo", + Expected: uint64(0), + ErrorExpected: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.Name, func(t *testing.T) { + result, err := convertWatermarkToBytes(tc.Watermark, totDisk) + if tc.ErrorExpected { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, fmt.Sprintf("%d", tc.Expected), fmt.Sprintf("%d", result)) + } + }) + } +} + +// If so configured, we'll divide unallocated space between lot's dedicated GB and +// opportunistict GB values. This test ensures the calculations are correct and that +// hardcoded configuration isn't modified. +// I don't test for errors here because the internal functions capable of generating +// errors are tested elsewhere (e.g. convertWatermarkToBytes) +func TestDivideRemainingSpace(t *testing.T) { + server_utils.ResetTestState() + defer server_utils.ResetTestState() + dedGB := float64(10.0) + oppGB := float64(1.5) + lotMap := map[string]Lot{ + "lot1": { + LotName: "lot1", + MPA: &MPA{ + DedicatedGB: &dedGB, + OpportunisticGB: &oppGB, + }, + }, + "lot2": { + LotName: "lot2", + MPA: &MPA{}, + }, + "lot3": { + LotName: "lot3", + MPA: &MPA{ + DedicatedGB: &dedGB, + }, + }, + "lot4": { + LotName: "lot4", + MPA: &MPA{ + OpportunisticGB: &oppGB, // hardcoded values should be respected + }, + }, + } + + totalDiskSpaceB := uint64(30000000000) // 30GB + viper.Set("Cache.HighWaterMark", "25g") + err := divideRemainingSpace(&lotMap, totalDiskSpaceB) + require.NoError(t, err) + // dedGB divisions should sum to HWM + require.Equal(t, 10.0, *lotMap["lot1"].MPA.DedicatedGB) + require.Equal(t, 2.5, *lotMap["lot2"].MPA.DedicatedGB) + require.Equal(t, 10.0, *lotMap["lot3"].MPA.DedicatedGB) + require.Equal(t, 2.5, *lotMap["lot4"].MPA.DedicatedGB) + // oppGB should be HWM - dedGB unless hardcoded + require.Equal(t, 1.5, *lotMap["lot1"].MPA.OpportunisticGB) + require.Equal(t, 22.5, *lotMap["lot2"].MPA.OpportunisticGB) + require.Equal(t, 15.0, *lotMap["lot3"].MPA.OpportunisticGB) + require.Equal(t, 1.5, *lotMap["lot4"].MPA.OpportunisticGB) +} + +// Pretty straightforward -- tests should make sure we can grab viper config and use it when +// setting lot timestamps if they're not pre-configured. +func TestConfigLotTimestamps(t *testing.T) { + server_utils.ResetTestState() + defer server_utils.ResetTestState() + now := time.Now().UnixMilli() + viper.Set("Lotman.DefaultLotExpirationLifetime", "24h") + viper.Set("Lotman.DefaultLotDeletionLifetime", "48h") + + defaultExpiration := now + 24*60*60*1000 // 24 hours in milliseconds + defaultDeletion := now + 48*60*60*1000 // 48 hours in milliseconds + + // Helper function to create a lot with optional timestamps + createLot := func(creationTime, expirationTime, deletionTime *Int64FromFloat) Lot { + return Lot{ + MPA: &MPA{ + CreationTime: creationTime, + ExpirationTime: expirationTime, + DeletionTime: deletionTime, + }, + } + } + + // Define the test cases + testCases := []struct { + name string + lotMap map[string]Lot + expectedLotMap map[string]Lot + }{ + { + name: "Lots with missing timestamps", + lotMap: map[string]Lot{ + "lot1": createLot(nil, nil, nil), + "lot2": createLot(&Int64FromFloat{Value: 0}, &Int64FromFloat{Value: 0}, &Int64FromFloat{Value: 0}), + }, + expectedLotMap: map[string]Lot{ + "lot1": createLot(&Int64FromFloat{Value: now}, &Int64FromFloat{Value: defaultExpiration}, &Int64FromFloat{Value: defaultDeletion}), + "lot2": createLot(&Int64FromFloat{Value: now}, &Int64FromFloat{Value: defaultExpiration}, &Int64FromFloat{Value: defaultDeletion}), + }, + }, + { + name: "Lots with existing timestamps", + lotMap: map[string]Lot{ + "lot1": createLot(&Int64FromFloat{Value: 1000}, &Int64FromFloat{Value: 2000}, &Int64FromFloat{Value: 3000}), + }, + expectedLotMap: map[string]Lot{ + "lot1": createLot(&Int64FromFloat{Value: 1000}, &Int64FromFloat{Value: 2000}, &Int64FromFloat{Value: 3000}), + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + configLotTimestamps(&tc.lotMap) + assert.Equal(t, tc.expectedLotMap, tc.lotMap) + }) + } +} + +func TestConfigLotsFromFedPrefixes(t *testing.T) { + server_utils.ResetTestState() + defer server_utils.ResetTestState() + + // Most of these aren't actually used by the test, but to prevent auto discovery + // and needing to spin up a separate mock discovery server, set them all. + fed := pelican_url.FederationDiscovery{ + DiscoveryEndpoint: "https://dne-discovery.com", + DirectorEndpoint: "https://dne-director.com", + RegistryEndpoint: "https://dne-registry.com", + JwksUri: "https://dne-jwks.com", + BrokerEndpoint: "https://dne-broker.com", + } + config.SetFederation(fed) + + issuer1Str := "https://issuer1.com" + issuer1, _ := url.Parse(issuer1Str) + issuer2Str := "https://issuer2.com" + issuer2, _ := url.Parse(issuer2Str) + testCases := []struct { + name string + nsAds []server_structs.NamespaceAdV2 + federationIssuerErr error + expectedLotMap map[string]Lot + expectedError string + }{ + { + name: "Valid namespaces", + nsAds: []server_structs.NamespaceAdV2{ + { + Path: "/namespace1", + Issuer: []server_structs.TokenIssuer{ + {IssuerUrl: *issuer1}, + }, + }, + { + Path: "/namespace2", + Issuer: []server_structs.TokenIssuer{ + {IssuerUrl: *issuer2}, + }, + }, + }, + expectedLotMap: map[string]Lot{ + "/namespace1": { + LotName: "/namespace1", + Owner: issuer1Str, + Parents: []string{"root"}, + Paths: []LotPath{ + { + Path: "/namespace1", + Recursive: true, + }, + }, + }, + "/namespace2": { + LotName: "/namespace2", + Owner: issuer2Str, + Parents: []string{"root"}, + Paths: []LotPath{ + { + Path: "/namespace2", + Recursive: true, + }, + }, + }, + }, + expectedError: "", + }, + { + name: "Skip monitoring namespaces", + nsAds: []server_structs.NamespaceAdV2{ + { + Path: "/pelican/monitoring/namespace1", + Issuer: []server_structs.TokenIssuer{ + {IssuerUrl: *issuer1}, + }, + }, + { + Path: "/namespace2", + Issuer: []server_structs.TokenIssuer{ + {IssuerUrl: *issuer2}, + }, + }, + }, + expectedLotMap: map[string]Lot{ + "/namespace2": { + LotName: "/namespace2", + Owner: issuer2Str, + Parents: []string{"root"}, + Paths: []LotPath{ + { + Path: "/namespace2", + Recursive: true, + }, + }, + }, + }, + expectedError: "", + }, + } + + // Run the test cases + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Call the function + lotMap, err := configLotsFromFedPrefixes(tc.nsAds) + + // Check the result + if tc.expectedError == "" { + require.NoError(t, err) + } else { + require.Error(t, err) + assert.Contains(t, err.Error(), tc.expectedError) + } + assert.Equal(t, tc.expectedLotMap, lotMap) + }) + } +} diff --git a/lotman/resources/lots-config.yaml b/lotman/resources/lots-config.yaml index 9fdf31edb..5ac783e87 100644 --- a/lotman/resources/lots-config.yaml +++ b/lotman/resources/lots-config.yaml @@ -18,83 +18,50 @@ # Configuration options used to test Lot declarations Lotman: - Lots: - - LotName: "default" - Owner: "SHOULD_OVERRIDE" - Parents: - - "default" - ManagementPolicyAttrs: - DedicatedGB: 100 - OpportunisticGB: 200 - # Wrapping these in a map is an unfortunate side effect of the - # way we need to handle the float-->int conversion. - MaxNumObjects: - Value: 1000 - CreationTime: - Value: 1234 - ExpirationTime: - Value: 12345 - DeletionTime: - Value: 123456 - - - LotName: "root" - Owner: "SHOULD_OVERRIDE" - Parents: - - "root" - Paths: - - Path: "/" - Recursive: false - ManagementPolicyAttrs: - DedicatedGB: 1 - OpportunisticGB: 2 - # Wrapping these in a map is an unfortunate side effect of the - # way we need to handle the float-->int conversion. - MaxNumObjects: - Value: 10 - CreationTime: - Value: 1234 - ExpirationTime: - Value: 12345 - DeletionTime: - Value: 123456 - - - LotName: "test-1" - Owner: "https://different-fake-federation.com" - Parents: - - "root" - Paths: - - Path: "/test-1" - Recursive: false - ManagementPolicyAttrs: - DedicatedGB: 1.11 - OpportunisticGB: 2.22 - # Wrapping these in a map is an unfortunate side effect of the - # way we need to handle the float-->int conversion. - MaxNumObjects: - Value: 42 - CreationTime: - Value: 1234 - ExpirationTime: - Value: 12345 - DeletionTime: - Value: 123456 - - LotName: "test-2" - Owner: "https://another-fake-federation.com" - Parents: - - "test-1" - Paths: - - Path: "/test-1/test-2" - Recursive: true - ManagementPolicyAttrs: - DedicatedGB: 1.11 - OpportunisticGB: 2.22 - # Wrapping these in a map is an unfortunate side effect of the - # way we need to handle the float-->int conversion. - MaxNumObjects: - Value: 42 - CreationTime: - Value: 1234 - ExpirationTime: - Value: 12345 - DeletionTime: - Value: 123456 + EnabledPolicy: "different-policy" + PolicyDefinitions: + - PolicyName: "different-policy" + DivideUnallocated: false + PurgeOrder: ["ded", "opp", "exp", "del"] + DiscoverPrefixes: false + Lots: + - LotName: "test-1" + Owner: "https://different-fake-federation.com" + Parents: + - "root" + Paths: + - Path: "/test-1" + Recursive: false + ManagementPolicyAttrs: + DedicatedGB: 1.11 + OpportunisticGB: 2.22 + # Wrapping these in a map is an unfortunate side effect of the + # way we need to handle the float-->int conversion. + MaxNumObjects: + Value: 42 + CreationTime: + Value: 1234 + ExpirationTime: + Value: 12345 + DeletionTime: + Value: 123456 + - LotName: "test-2" + Owner: "https://another-fake-federation.com" + Parents: + - "test-1" + Paths: + - Path: "/test-1/test-2" + Recursive: true + ManagementPolicyAttrs: + DedicatedGB: 1.11 + OpportunisticGB: 2.22 + # Wrapping these in a map is an unfortunate side effect of the + # way we need to handle the float-->int conversion. + MaxNumObjects: + Value: 42 + CreationTime: + Value: 1234 + ExpirationTime: + Value: 12345 + DeletionTime: + Value: 123456 diff --git a/param/parameters.go b/param/parameters.go index 581270186..e9cbfbec0 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -196,6 +196,7 @@ var ( Logging_Origin_Xrd = StringParam{"Logging.Origin.Xrd"} Logging_Origin_Xrootd = StringParam{"Logging.Origin.Xrootd"} Lotman_DbLocation = StringParam{"Lotman.DbLocation"} + Lotman_EnabledPolicy = StringParam{"Lotman.EnabledPolicy"} Lotman_LibLocation = StringParam{"Lotman.LibLocation"} Monitoring_DataLocation = StringParam{"Monitoring.DataLocation"} OIDC_AuthorizationEndpoint = StringParam{"OIDC.AuthorizationEndpoint"} @@ -391,6 +392,8 @@ var ( Director_OriginCacheHealthTestInterval = DurationParam{"Director.OriginCacheHealthTestInterval"} Director_StatTimeout = DurationParam{"Director.StatTimeout"} Federation_TopologyReloadInterval = DurationParam{"Federation.TopologyReloadInterval"} + Lotman_DefaultLotDeletionLifetime = DurationParam{"Lotman.DefaultLotDeletionLifetime"} + Lotman_DefaultLotExpirationLifetime = DurationParam{"Lotman.DefaultLotExpirationLifetime"} Monitoring_DataRetention = DurationParam{"Monitoring.DataRetention"} Monitoring_TokenExpiresIn = DurationParam{"Monitoring.TokenExpiresIn"} Monitoring_TokenRefreshInterval = DurationParam{"Monitoring.TokenRefreshInterval"} @@ -412,7 +415,7 @@ var ( GeoIPOverrides = ObjectParam{"GeoIPOverrides"} Issuer_AuthorizationTemplates = ObjectParam{"Issuer.AuthorizationTemplates"} Issuer_OIDCAuthenticationRequirements = ObjectParam{"Issuer.OIDCAuthenticationRequirements"} - Lotman_Lots = ObjectParam{"Lotman.Lots"} + Lotman_PolicyDefinitions = ObjectParam{"Lotman.PolicyDefinitions"} Origin_Exports = ObjectParam{"Origin.Exports"} Registry_CustomRegistrationFields = ObjectParam{"Registry.CustomRegistrationFields"} Registry_Institutions = ObjectParam{"Registry.Institutions"} diff --git a/param/parameters_struct.go b/param/parameters_struct.go index 68970b101..0b760a722 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -153,9 +153,12 @@ type Config struct { } `mapstructure:"logging" yaml:"Logging"` Lotman struct { DbLocation string `mapstructure:"dblocation" yaml:"DbLocation"` + DefaultLotDeletionLifetime time.Duration `mapstructure:"defaultlotdeletionlifetime" yaml:"DefaultLotDeletionLifetime"` + DefaultLotExpirationLifetime time.Duration `mapstructure:"defaultlotexpirationlifetime" yaml:"DefaultLotExpirationLifetime"` EnableAPI bool `mapstructure:"enableapi" yaml:"EnableAPI"` + EnabledPolicy string `mapstructure:"enabledpolicy" yaml:"EnabledPolicy"` LibLocation string `mapstructure:"liblocation" yaml:"LibLocation"` - Lots interface{} `mapstructure:"lots" yaml:"Lots"` + PolicyDefinitions interface{} `mapstructure:"policydefinitions" yaml:"PolicyDefinitions"` } `mapstructure:"lotman" yaml:"Lotman"` MinimumDownloadSpeed int `mapstructure:"minimumdownloadspeed" yaml:"MinimumDownloadSpeed"` Monitoring struct { @@ -461,9 +464,12 @@ type configWithType struct { } Lotman struct { DbLocation struct { Type string; Value string } + DefaultLotDeletionLifetime struct { Type string; Value time.Duration } + DefaultLotExpirationLifetime struct { Type string; Value time.Duration } EnableAPI struct { Type string; Value bool } + EnabledPolicy struct { Type string; Value string } LibLocation struct { Type string; Value string } - Lots struct { Type string; Value interface{} } + PolicyDefinitions struct { Type string; Value interface{} } } MinimumDownloadSpeed struct { Type string; Value int } Monitoring struct { diff --git a/server_utils/server_utils.go b/server_utils/server_utils.go index 08fe2ceec..528470a8d 100644 --- a/server_utils/server_utils.go +++ b/server_utils/server_utils.go @@ -32,6 +32,8 @@ import ( "net/http" "reflect" "time" + "strings" + "time" "github.com/fsnotify/fsnotify" "github.com/pkg/errors" @@ -279,3 +281,44 @@ func ResetTestState() { config.ResetConfig() ResetOriginExports() } + +// Given a slice of NamespaceAdV2 objects, return a slice of unique top-level prefixes. +// +// For example, given: +// - /foo +// - /foo/bar +// - /foo/bar/baz +// - /goo +// - /some/path +// +// the function should return /foo, /goo, and /some/path. +func FilterTopLevelPrefixes(nsAds []server_structs.NamespaceAdV2) []server_structs.NamespaceAdV2 { + prefixMap := make(map[string]server_structs.NamespaceAdV2) + for _, nsAd := range nsAds { + if !strings.HasSuffix(nsAd.Path, "/") { + nsAd.Path = nsAd.Path + "/" + } + + add := true + for prefix := range prefixMap { + if strings.HasPrefix(nsAd.Path, prefix) { + add = false + break + } + // Consider the case where we may have already added a longer path + // and we need to remove it in favor of the shorter path + if strings.HasPrefix(prefix, nsAd.Path) { + delete(prefixMap, prefix) + } + } + if add { + prefixMap[nsAd.Path] = nsAd + } + } + + var uniquePrefixes []server_structs.NamespaceAdV2 + for _, nsAd := range prefixMap { + uniquePrefixes = append(uniquePrefixes, nsAd) + } + return uniquePrefixes +} diff --git a/server_utils/server_utils_test.go b/server_utils/server_utils_test.go index 741f5854e..bc223e5e0 100644 --- a/server_utils/server_utils_test.go +++ b/server_utils/server_utils_test.go @@ -33,6 +33,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/pelicanplatform/pelican/server_structs" "github.com/pelicanplatform/pelican/test_utils" ) @@ -158,3 +159,33 @@ func TestWaitUntilWorking(t *testing.T) { assert.Equal(t, expectedErrorMsg, err.Error()) }) } + +func TestFilterTopLevelPrefixes(t *testing.T) { + namespaceAds := []server_structs.NamespaceAdV2{ + {Path: "/foo"}, + {Path: "/foo/bar"}, + {Path: "/foo/bar/baz"}, + {Path: "/foogoo"}, + // Putting /goo/bar ahead of /goo/ to test that the function removes this + // in favor of /goo/ + {Path: "/goo/bar"}, + {Path: "/goo/"}, + {Path: "/some/other/path"}, + } + + filtered := FilterTopLevelPrefixes(namespaceAds) + + var filteredPaths []string + for _, nsAd := range filtered { + filteredPaths = append(filteredPaths, nsAd.Path) + } + + expectedPaths := []string{ + "/foo/", + "/foogoo/", + "/goo/", + "/some/other/path/", + } + + assert.ElementsMatch(t, expectedPaths, filteredPaths) +}