Skip to content

Commit

Permalink
Merge pull request #105 from flanksource/moshloop
Browse files Browse the repository at this point in the history
Add k8s file scraper
  • Loading branch information
moshloop authored Nov 6, 2022
2 parents ba9b030 + 7613caf commit 32d9f5b
Show file tree
Hide file tree
Showing 9 changed files with 214 additions and 61 deletions.
23 changes: 19 additions & 4 deletions api/v1/kubernetes.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package v1

import "github.com/flanksource/kommons"
import (
"strings"

"github.com/flanksource/kommons"
)

type Kubernetes struct {
BaseScraper `json:",inline"`
Expand Down Expand Up @@ -28,6 +32,10 @@ type PodFile struct {
Format string `json:"format,omitempty"`
}

func (p PodFile) String() string {
return strings.Join(p.Path, ",")
}

type ResourceSelector struct {
Namespace string `json:"namespace,omitempty"`
Kind string `json:"kind,omitempty"`
Expand All @@ -41,11 +49,18 @@ func (r ResourceSelector) IsEmpty() bool {
}

func (r ResourceSelector) String() string {
s := r.Kind
if r.Namespace != "" {
s += "/" + r.Namespace
}
if r.Name != "" {
return r.Name
return s + "/" + r.Name
}
if r.LabelSelector != "" {
return r.LabelSelector
s += " labels=" + r.LabelSelector
}
if r.FieldSelector != "" {
s += " fields=" + r.FieldSelector
}
return r.FieldSelector
return s
}
8 changes: 2 additions & 6 deletions db/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
v1 "github.com/flanksource/config-db/api/v1"
"github.com/flanksource/config-db/db/models"
"github.com/lib/pq"
"github.com/ohler55/ojg/oj"
"github.com/patrickmn/go-cache"
"github.com/pkg/errors"
"gorm.io/gorm/clause"
)

Expand Down Expand Up @@ -123,11 +123,7 @@ func NewConfigItemFromResult(result v1.ScrapeResult) (*models.ConfigItem, error)
case []byte:
dataStr = string(data)
default:
bytes, err := json.Marshal(data)
if err != nil {
return nil, errors.Wrapf(err, "Unable to marshal: %v", result.Config)
}
dataStr = string(bytes)
dataStr = oj.JSON(data, &oj.Options{Sort: true, OmitNil: true, Indent: 2, TimeFormat: "2006-01-02T15:04:05Z07:00"})
}

ci := &models.ConfigItem{
Expand Down
1 change: 1 addition & 0 deletions db/migrations/003_seed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ CREATE TABLE IF NOT EXISTS config_items (
id UUID DEFAULT generate_ulid() PRIMARY KEY,
parent_id UUID NULL,
path text NULL,
icon text NULL,
scraper_id UUID NULL,
config_type text NOT NULL, -- The standardized type e.g. Subnet, Network, Host, etc. that applies across platforms
external_id text[],
Expand Down
2 changes: 1 addition & 1 deletion db/migrations/099_post_seed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

INSERT INTO config_db_version(version_id, tstamp, is_applied) (
SELECT version_id, now() as tstamp, true as is_applied
FROM generate_series(100, 107) version_id
FROM generate_series(100, 108) version_id
);

3 changes: 3 additions & 0 deletions db/migrations/108_icon.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- +goose Up

ALTER TABLE config_items ADD COLUMN icon TEXT NULL;
69 changes: 46 additions & 23 deletions scrapers/file/file.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package file

import (
"math/rand"
"crypto/md5"
"encoding/hex"
"net/url"
"os"
"path"
"path/filepath"
"regexp"
"strings"
"time"

"github.com/flanksource/commons/logger"
v1 "github.com/flanksource/config-db/api/v1"
Expand All @@ -18,12 +21,6 @@ import (
type FileScrapper struct {
}

const charset = "abcdefghijklmnopqrstuvwxyz" +
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"

var seededRand *rand.Rand = rand.New(
rand.NewSource(time.Now().UnixNano()))

func isIgnored(config v1.File, path string) (bool, error) {
if !isYaml(path) && !isJson(path) {
logger.Tracef("skipping file %s, not a yaml or json file", path)
Expand All @@ -43,15 +40,49 @@ func isIgnored(config v1.File, path string) (bool, error) {
return false, nil
}

// stripSecrets returns the url with the password removed
func stripSecrets(uri string) string {
_uri, _ := url.Parse(stripPrefix(uri))
if _uri == nil {
return uri
}
return _uri.Redacted()
}

func stripPrefix(filename string) string {
filename = regexp.MustCompile(`^\w+::`).ReplaceAllString(filename, "")
return strings.Replace(filename, "file://", "", 1)
}

// convert url into a local path supported on linux filesystems
func convertToLocalPath(uri string) string {
_uri, err := url.Parse(stripPrefix(uri))
if err != nil {
return uri
}
hash := md5.Sum([]byte(uri))
p := ""
if _uri.Host != "" {
p = _uri.Host + "-"
}
return p + path.Base(_uri.Path) + "-" + hex.EncodeToString(hash[:])[0:8]
}

// Scrape ...
func (file FileScrapper) Scrape(ctx *v1.ScrapeContext, configs v1.ConfigScraper) v1.ScrapeResults {
pwd, _ := os.Getwd()
cacheDir := path.Join(pwd, ".config-db", "cache", "files")
results := v1.ScrapeResults{}
var tempDir string
for _, config := range configs.File {
url := stripSecrets(config.URL)
tempDir := path.Join(cacheDir, convertToLocalPath(url))
if err := os.MkdirAll(cacheDir, 0755); err != nil {
return results.Errorf(err, "failed to create cache dir: %v", tempDir)
}
logger.Debugf("Scraping file %s ==> %s", stripSecrets(config.URL), tempDir)
var globMatches []string
if config.URL != "" {
globMatches, tempDir = findFilesFromURL(ctx, config.URL, config.Paths)
defer os.RemoveAll(tempDir)
globMatches = getFiles(ctx, tempDir, config.URL, config.Paths)
} else {
globMatches = findFiles(ctx, "", config.Paths)
}
Expand Down Expand Up @@ -89,12 +120,12 @@ func (file FileScrapper) Scrape(ctx *v1.ScrapeContext, configs v1.ConfigScraper)
return results
}

func findFilesFromURL(ctx *v1.ScrapeContext, url string, paths []string) (matches []string, dirname string) {
tempDir := GetTempDirName(10, charset)
if err := getter.GetAny(tempDir, url); err != nil {
func getFiles(ctx *v1.ScrapeContext, dst, url string, paths []string) (matches []string) {
logger.Debugf("Downloading files from %s to %s", stripSecrets(url), dst)
if err := getter.GetAny(dst, url); err != nil {
logger.Errorf("Error downloading file: %s", err)
}
return findFiles(ctx, tempDir, paths), tempDir
return findFiles(ctx, dst, paths)
}

func findFiles(ctx *v1.ScrapeContext, dir string, paths []string) []string {
Expand Down Expand Up @@ -123,11 +154,3 @@ func isYaml(filename string) bool {
func isJson(filename string) bool {
return filepath.Ext(filename) == ".json"
}

func GetTempDirName(length int, charset string) string {
b := make([]byte, length)
for i := range b {
b[i] = charset[seededRand.Intn(len(charset))]
}
return string(b)
}
41 changes: 41 additions & 0 deletions scrapers/file/file_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package file

import "testing"

//test stripPrefix
func TestStripPrefix(t *testing.T) {
cases := []struct {
input string
expected string
}{
{"file://foo", "foo"},
{"git::foo", "foo"},
{"git::https://foo", "https://foo"},
{"foo", "foo"},
{"", ""},
}
for _, c := range cases {
actual := stripPrefix(c.input)
if actual != c.expected {
t.Errorf("stripPrefix(%s) == %s, expected %s", c.input, actual, c.expected)
}
}
}

func TestConvertLocalPath(t *testing.T) {
cases := []struct {
input string
expected string
}{
{"file://foo", "foo-ecf5c8ee"},
{"git::foo", "foo-b943d8a5"},
{"git::https://foo/path?query=abc", "foo-path-8f49fbdc"},
{"foo", "foo-acbd18db"},
}
for _, c := range cases {
actual := convertToLocalPath(c.input)
if actual != c.expected {
t.Errorf("convertToLocalPath(%s) == %s, expected %s", c.input, actual, c.expected)
}
}
}
Loading

0 comments on commit 32d9f5b

Please sign in to comment.