Skip to content

Commit

Permalink
Merge pull request #893 from turetske/filter-cache-origins
Browse files Browse the repository at this point in the history
Cache can now filter to only serve a list of provided namespaces
  • Loading branch information
jhiemstrawisc authored Mar 11, 2024
2 parents cce8db7 + 4d69652 commit 57f17ca
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 0 deletions.
63 changes: 63 additions & 0 deletions cache_ui/advertise.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
type (
CacheServer struct {
server_utils.NamespaceHolder
namespaceFilter map[string]struct{}
}
)

Expand All @@ -49,6 +50,64 @@ func (server *CacheServer) CreateAdvertisement(name string, originUrl string, or
return ad, nil
}

func (server *CacheServer) SetFilters() {
/*
* Converts the list of permitted namespaces to a set and stores it for the serve
* This is based on the assumption that the cache server could potentially be filtering once
* every minute, so to save speed, we use a map to an empty struct to allow for O(1) lookup time
*/
server.namespaceFilter = make(map[string]struct{})
nsList := param.Cache_PermittedNamespaces.GetStringSlice()
// Ensure that each permitted namespace starts with a "/"
for _, ns := range nsList {
if !strings.HasPrefix(ns, "/") {
ns = "/" + ns
}
server.namespaceFilter[ns] = struct{}{}
}
}

func (server *CacheServer) filterAdsBasedOnNamespace(nsAds []common.NamespaceAdV2) []common.NamespaceAdV2 {
/*
* Filters out ads based on the namespaces listed in server.NamespaceFilter
* Note that this does a few checks for trailing and non-trailing "/" as it's assumed that the namespaces
* from the director and the ones provided might differ.
*/
filteredAds := []common.NamespaceAdV2{}
if len(server.namespaceFilter) > 0 {
for _, ad := range nsAds {
ns := ad.Path
sentinel := true
//If the final character isn't a '/', add it to the string
if !strings.HasSuffix(ns, "/") {
ns = ns + "/"
}
for sentinel {
_, exists := server.namespaceFilter[ns]
if exists {
filteredAds = append(filteredAds, ad)
break
}

splitIndex := strings.LastIndex(ns, "/")

//If ns isn't the root the start of the path, either remove the trailing /
//or check one director higher
if splitIndex != -1 && splitIndex != 0 {
if splitIndex != len(ns)-1 {
ns = ns[:splitIndex+1]
} else {
ns = ns[:splitIndex]
}
} else {
sentinel = false
}
}
}
}
return filteredAds
}

func (server *CacheServer) GetNamespaceAdsFromDirector() error {
// Get the endpoint of the director
var respNS []common.NamespaceAdV2
Expand Down Expand Up @@ -103,6 +162,10 @@ func (server *CacheServer) GetNamespaceAdsFromDirector() error {
}
}

if len(server.namespaceFilter) > 0 {
respNS = server.filterAdsBasedOnNamespace(respNS)
}

server.SetNamespaceAds(respNS)

return nil
Expand Down
141 changes: 141 additions & 0 deletions cache_ui/advertise_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/***************************************************************
*
* Copyright (C) 2024, Pelican Project, Morgridge Institute for Research
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
***************************************************************/

package cache_ui

import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"

"github.com/pelicanplatform/pelican/common"
"github.com/spf13/viper"
"github.com/stretchr/testify/require"
)

func TestFilterNsAdsForCache(t *testing.T) {
tests := []struct {
desc string
permittedNS []string
expectedNumNS int
}{
{
desc: "no-matching-namespaces",
permittedNS: []string{"/noexist", "/bad"},
expectedNumNS: 0,
},
{
desc: "matching-namespaces",
permittedNS: []string{"/ns1", "/ns2"},
expectedNumNS: 2,
},
{
desc: "mix-matching-namespaces",
permittedNS: []string{"/ns3/foo", "/noexist", "/ns1"},
expectedNumNS: 2,
},
{
desc: "matching-prefix",
permittedNS: []string{"/ns3", "/ns4/foo"},
expectedNumNS: 3,
},
{
desc: "no-filters-set",
expectedNumNS: 7,
},
{
desc: "empty-filter-list",
permittedNS: []string{},
expectedNumNS: 7,
},
{
desc: "trailing-/",
permittedNS: []string{"/ns1/", "/ns4/"},
expectedNumNS: 3,
},
{
desc: "no-trailing-/",
permittedNS: []string{"/ns5", "/ns3"},
expectedNumNS: 3,
},
{
desc: "no-starting/",
permittedNS: []string{"ns4/foo/bar", "ns5"},
expectedNumNS: 2,
},
}
viper.Reset()
defer viper.Reset()

nsAds := []common.NamespaceAdV2{
{
Path: "/ns1",
},
{
Path: "/ns2",
},
{
Path: "/ns3/foo",
},
{
Path: "/ns3/",
},
{
Path: "/ns4/foo/bar/",
},
{
Path: "/ns4",
},
{
Path: "/ns5/",
},
}

ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
jsonbytes, err := json.Marshal(nsAds)
require.NoError(t, err)

w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_, err = w.Write(jsonbytes)
require.NoError(t, err)
}))
defer ts.Close()

cacheServer := &CacheServer{}

for _, testInput := range tests {
t.Run(testInput.desc, func(t *testing.T) {

viper.Set("Federation.DirectorURL", ts.URL)
if testInput.permittedNS != nil {
viper.Set("Cache.PermittedNamespaces", testInput.permittedNS)
}
defer viper.Reset()

cacheServer.SetFilters()
err := cacheServer.GetNamespaceAdsFromDirector()
require.NoError(t, err)
filteredNS := cacheServer.GetNamespaceAds()

require.Equal(t, testInput.expectedNumNS, len(filteredNS))

})
}
}
9 changes: 9 additions & 0 deletions docs/parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,15 @@ type: int
default: none
components: ["cache"]
---
name: Cache.PermittedNamespaces
description: >-
A list of namespaces the cache is allowed to pull from. If the list is empty or this option is unset, it's assumed that
the cache is allowed to access any namespace that's advertised to the director. Otherwise, it will
only be allowed to access the listed namespaces.
type: stringSlice
default: none
components: ["cache"]
---
name: Cache.SelfTest
description: >-
A bool indicating whether the cache should perform self health checks.
Expand Down
1 change: 1 addition & 0 deletions launchers/cache_serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func CacheServe(ctx context.Context, engine *gin.Engine, egrp *errgroup.Group) (

cacheServer := &cache_ui.CacheServer{}
err = cacheServer.GetNamespaceAdsFromDirector()
cacheServer.SetFilters()
if err != nil {
return nil, err
}
Expand Down
1 change: 1 addition & 0 deletions param/parameters.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions param/parameters_struct.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 57f17ca

Please sign in to comment.