diff --git a/cache_ui/advertise.go b/cache_ui/advertise.go index ddbb57242..19dcbf8aa 100644 --- a/cache_ui/advertise.go +++ b/cache_ui/advertise.go @@ -35,6 +35,7 @@ import ( type ( CacheServer struct { server_utils.NamespaceHolder + namespaceFilter map[string]struct{} } ) @@ -49,6 +50,64 @@ func (server *CacheServer) CreateAdvertisement(name string, originUrl string, or return ad, nil } +func (server *CacheServer) SetFilters() { + /* + * Converts the list of permitted namespaces to a set and stores it for the serve + * This is based on the assumption that the cache server could potentially be filtering once + * every minute, so to save speed, we use a map to an empty struct to allow for O(1) lookup time + */ + server.namespaceFilter = make(map[string]struct{}) + nsList := param.Cache_PermittedNamespaces.GetStringSlice() + // Ensure that each permitted namespace starts with a "/" + for _, ns := range nsList { + if !strings.HasPrefix(ns, "/") { + ns = "/" + ns + } + server.namespaceFilter[ns] = struct{}{} + } +} + +func (server *CacheServer) filterAdsBasedOnNamespace(nsAds []common.NamespaceAdV2) []common.NamespaceAdV2 { + /* + * Filters out ads based on the namespaces listed in server.NamespaceFilter + * Note that this does a few checks for trailing and non-trailing "/" as it's assumed that the namespaces + * from the director and the ones provided might differ. + */ + filteredAds := []common.NamespaceAdV2{} + if len(server.namespaceFilter) > 0 { + for _, ad := range nsAds { + ns := ad.Path + sentinel := true + //If the final character isn't a '/', add it to the string + if !strings.HasSuffix(ns, "/") { + ns = ns + "/" + } + for sentinel { + _, exists := server.namespaceFilter[ns] + if exists { + filteredAds = append(filteredAds, ad) + break + } + + splitIndex := strings.LastIndex(ns, "/") + + //If ns isn't the root the start of the path, either remove the trailing / + //or check one director higher + if splitIndex != -1 && splitIndex != 0 { + if splitIndex != len(ns)-1 { + ns = ns[:splitIndex+1] + } else { + ns = ns[:splitIndex] + } + } else { + sentinel = false + } + } + } + } + return filteredAds +} + func (server *CacheServer) GetNamespaceAdsFromDirector() error { // Get the endpoint of the director var respNS []common.NamespaceAdV2 @@ -103,6 +162,10 @@ func (server *CacheServer) GetNamespaceAdsFromDirector() error { } } + if len(server.namespaceFilter) > 0 { + respNS = server.filterAdsBasedOnNamespace(respNS) + } + server.SetNamespaceAds(respNS) return nil diff --git a/cache_ui/advertise_test.go b/cache_ui/advertise_test.go new file mode 100644 index 000000000..4e2a3a845 --- /dev/null +++ b/cache_ui/advertise_test.go @@ -0,0 +1,141 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +package cache_ui + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/pelicanplatform/pelican/common" + "github.com/spf13/viper" + "github.com/stretchr/testify/require" +) + +func TestFilterNsAdsForCache(t *testing.T) { + tests := []struct { + desc string + permittedNS []string + expectedNumNS int + }{ + { + desc: "no-matching-namespaces", + permittedNS: []string{"/noexist", "/bad"}, + expectedNumNS: 0, + }, + { + desc: "matching-namespaces", + permittedNS: []string{"/ns1", "/ns2"}, + expectedNumNS: 2, + }, + { + desc: "mix-matching-namespaces", + permittedNS: []string{"/ns3/foo", "/noexist", "/ns1"}, + expectedNumNS: 2, + }, + { + desc: "matching-prefix", + permittedNS: []string{"/ns3", "/ns4/foo"}, + expectedNumNS: 3, + }, + { + desc: "no-filters-set", + expectedNumNS: 7, + }, + { + desc: "empty-filter-list", + permittedNS: []string{}, + expectedNumNS: 7, + }, + { + desc: "trailing-/", + permittedNS: []string{"/ns1/", "/ns4/"}, + expectedNumNS: 3, + }, + { + desc: "no-trailing-/", + permittedNS: []string{"/ns5", "/ns3"}, + expectedNumNS: 3, + }, + { + desc: "no-starting/", + permittedNS: []string{"ns4/foo/bar", "ns5"}, + expectedNumNS: 2, + }, + } + viper.Reset() + defer viper.Reset() + + nsAds := []common.NamespaceAdV2{ + { + Path: "/ns1", + }, + { + Path: "/ns2", + }, + { + Path: "/ns3/foo", + }, + { + Path: "/ns3/", + }, + { + Path: "/ns4/foo/bar/", + }, + { + Path: "/ns4", + }, + { + Path: "/ns5/", + }, + } + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + jsonbytes, err := json.Marshal(nsAds) + require.NoError(t, err) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err = w.Write(jsonbytes) + require.NoError(t, err) + })) + defer ts.Close() + + cacheServer := &CacheServer{} + + for _, testInput := range tests { + t.Run(testInput.desc, func(t *testing.T) { + + viper.Set("Federation.DirectorURL", ts.URL) + if testInput.permittedNS != nil { + viper.Set("Cache.PermittedNamespaces", testInput.permittedNS) + } + defer viper.Reset() + + cacheServer.SetFilters() + err := cacheServer.GetNamespaceAdsFromDirector() + require.NoError(t, err) + filteredNS := cacheServer.GetNamespaceAds() + + require.Equal(t, testInput.expectedNumNS, len(filteredNS)) + + }) + } +} diff --git a/docs/parameters.yaml b/docs/parameters.yaml index dc56bc7a6..2de05c237 100644 --- a/docs/parameters.yaml +++ b/docs/parameters.yaml @@ -694,6 +694,15 @@ type: int default: none components: ["cache"] --- +name: Cache.PermittedNamespaces +description: >- + A list of namespaces the cache is allowed to pull from. If the list is empty or this option is unset, it's assumed that + the cache is allowed to access any namespace that's advertised to the director. Otherwise, it will + only be allowed to access the listed namespaces. +type: stringSlice +default: none +components: ["cache"] +--- name: Cache.SelfTest description: >- A bool indicating whether the cache should perform self health checks. diff --git a/launchers/cache_serve.go b/launchers/cache_serve.go index ba54e0ef9..079473825 100644 --- a/launchers/cache_serve.go +++ b/launchers/cache_serve.go @@ -46,6 +46,7 @@ func CacheServe(ctx context.Context, engine *gin.Engine, egrp *errgroup.Group) ( cacheServer := &cache_ui.CacheServer{} err = cacheServer.GetNamespaceAdsFromDirector() + cacheServer.SetFilters() if err != nil { return nil, err } diff --git a/param/parameters.go b/param/parameters.go index 6b405aebf..488a2f051 100644 --- a/param/parameters.go +++ b/param/parameters.go @@ -180,6 +180,7 @@ var ( ) var ( + Cache_PermittedNamespaces = StringSliceParam{"Cache.PermittedNamespaces"} Director_CacheResponseHostnames = StringSliceParam{"Director.CacheResponseHostnames"} Director_OriginResponseHostnames = StringSliceParam{"Director.OriginResponseHostnames"} Issuer_GroupRequirements = StringSliceParam{"Issuer.GroupRequirements"} diff --git a/param/parameters_struct.go b/param/parameters_struct.go index b9a22c027..b30f6fb78 100644 --- a/param/parameters_struct.go +++ b/param/parameters_struct.go @@ -29,6 +29,7 @@ type Config struct { DataLocation string EnableVoms bool ExportLocation string + PermittedNamespaces []string Port int RunLocation string SelfTest bool @@ -255,6 +256,7 @@ type configWithType struct { DataLocation struct { Type string; Value string } EnableVoms struct { Type string; Value bool } ExportLocation struct { Type string; Value string } + PermittedNamespaces struct { Type string; Value []string } Port struct { Type string; Value int } RunLocation struct { Type string; Value string } SelfTest struct { Type string; Value bool }