From 6e904c9cdd036565785961c3e129b5c193333162 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Thu, 25 Apr 2024 11:18:19 -0400 Subject: [PATCH] Initial commit. --- .github/workflows/github-release.yml | 31 +++ .github/workflows/goreleaser.yml | 33 +++ .github/workflows/lint-test-build-push.yml | 21 ++ .gitignore | 2 + .golangci.yml | 4 + .goreleaser.yml | 37 +++ README.md | 3 + go.mod | 5 + go.sum | 4 + main.go | 261 +++++++++++++++++++++ main_test.go | 243 +++++++++++++++++++ scyllaridae.complex-example.yml | 20 ++ scyllaridae.example.yml | 17 ++ 13 files changed, 681 insertions(+) create mode 100644 .github/workflows/github-release.yml create mode 100644 .github/workflows/goreleaser.yml create mode 100644 .github/workflows/lint-test-build-push.yml create mode 100644 .gitignore create mode 100644 .golangci.yml create mode 100644 .goreleaser.yml create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go create mode 100644 main_test.go create mode 100644 scyllaridae.complex-example.yml create mode 100644 scyllaridae.example.yml diff --git a/.github/workflows/github-release.yml b/.github/workflows/github-release.yml new file mode 100644 index 0000000..d84fe51 --- /dev/null +++ b/.github/workflows/github-release.yml @@ -0,0 +1,31 @@ +name: Create release +on: + pull_request_target: + branches: + - main + types: + - closed +permissions: + contents: write + actions: write +jobs: + release: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: install autotag binary + run: curl -sL https://git.io/autotag-install | sudo sh -s -- -b /usr/bin + - name: create release + run: |- + TAG=$(autotag) + git tag $TAG + git push origin $TAG + gh release create $TAG + gh workflow run goreleaser.yml --ref $TAG + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + diff --git a/.github/workflows/goreleaser.yml b/.github/workflows/goreleaser.yml new file mode 100644 index 0000000..1351163 --- /dev/null +++ b/.github/workflows/goreleaser.yml @@ -0,0 +1,33 @@ +name: goreleaser + +on: + workflow_dispatch: + push: + tags: + - "*" + +permissions: + contents: write + +jobs: + goreleaser: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - + name: Set up Go + uses: actions/setup-go@v3 + - + name: Run GoReleaser + uses: goreleaser/goreleaser-action@v4 + with: + distribution: goreleaser + version: latest + args: release --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + diff --git a/.github/workflows/lint-test-build-push.yml b/.github/workflows/lint-test-build-push.yml new file mode 100644 index 0000000..f60f2ab --- /dev/null +++ b/.github/workflows/lint-test-build-push.yml @@ -0,0 +1,21 @@ +name: lint-test +on: [push] +permissions: + contents: read + +jobs: + lint-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v4 + - name: golangci-lint + uses: golangci/golangci-lint-action@v3 + with: + version: v1.54 + - name: Install dependencies + run: go get . + - name: Build + run: go build -v ./... + - name: Test with the Go CLI + run: go test -v ./... diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2ac92f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +scyllaridae + diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..55550f7 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,4 @@ +linters: + enable: + - gofmt + diff --git a/.goreleaser.yml b/.goreleaser.yml new file mode 100644 index 0000000..a145094 --- /dev/null +++ b/.goreleaser.yml @@ -0,0 +1,37 @@ +before: + hooks: + - go mod tidy +builds: + - binary: scyllaridae + env: + - CGO_ENABLED=0 + goos: + - linux + - windows + - darwin + +archives: + - format: tar.gz + # this name template makes the OS and Arch compatible with the results of uname. + name_template: >- + {{ .ProjectName }}_ + {{- title .Os }}_ + {{- if eq .Arch "amd64" }}x86_64 + {{- else if eq .Arch "386" }}i386 + {{- else }}{{ .Arch }}{{ end }} + {{- if .Arm }}v{{ .Arm }}{{ end }} + # use zip for windows archives + format_overrides: + - goos: windows + format: zip +checksum: + name_template: 'checksums.txt' +snapshot: + name_template: "{{ incpatch .Version }}-next" +changelog: + sort: asc + filters: + exclude: + - '^docs:' + - '^test:' + diff --git a/README.md b/README.md new file mode 100644 index 0000000..b79763b --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# scyllaridae + +Any microservice that takes stdin as input and returns a stream to stdout can use scyllaridae to facilitate its execution. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..7f5c53a --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/lehigh-university-libraries/scyllaridae + +go 1.21.3 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..a62c313 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..017b5ed --- /dev/null +++ b/main.go @@ -0,0 +1,261 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "log/slog" + "net/http" + "os" + "os/exec" + "strings" + + "gopkg.in/yaml.v3" +) + +type Data struct { + Actor Actor `json:"actor"` + Object Object `json:"object"` + Attachment Attachment `json:"attachment"` + Type string `json:"type"` + Summary string `json:"summary"` +} + +type Actor struct { + Id string `json:"id"` +} + +type Object struct { + Id string `json:"id"` + URL []URL `json:"url"` + NewVersion bool `json:"isNewVersion"` +} + +type URL struct { + Name string `json:"name"` + Type string `json:"type"` + Href string `json:"href"` + MediaType string `json:"mediaType"` + Rel string `json:"rel"` +} + +type Attachment struct { + Type string `json:"type"` + Content Content `json:"content"` + MediaType string `json:"mediaType"` +} + +type Content struct { + MimeType string `json:"mimetype"` + Args string `json:"args"` + SourceUri string `json:"source_uri"` + DestinationUri string `json:"destination_uri"` + FileUploadUri string `json:"file_upload_uri"` + WebServiceUri string +} + +type Cmd struct { + Command string `yaml:"cmd,omitempty"` + PreArgs []string `yaml:"pre-args,omitempty"` + PostArgs []string `yaml:"post-args,omitempty"` +} + +type Config struct { + Label string `yaml:"label"` + Method string `yaml:"destination-http-method"` + FileHeader string `yaml:"file-header"` + ArgHeader string `yaml:"arg-header"` + ForwardAuth bool `yaml:"forward-auth"` + AllowedFormats []string `yaml:"allowed-formats"` + Mimetypes map[string]Cmd `yaml:"mimetypes"` +} + +var ( + config *Config +) + +func init() { + var err error + + config, err = ReadConfig("scyllaridae.yml") + if err != nil { + slog.Error("Could not read YML", "err", err) + os.Exit(1) + } +} + +func main() { + http.HandleFunc("/", MessageHandler) + port := os.Getenv("PORT") + if port == "" { + port = "8080" + } + + slog.Info("Server listening", "port", port) + if err := http.ListenAndServe(":"+port, nil); err != nil { + panic(err) + } +} + +func MessageHandler(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + defer r.Body.Close() + + // Read the Alpaca message payload + message, err := DecodeAlpacaMessage(r) + if err != nil { + slog.Error("Error decoding Pub/Sub message", "err", err) + http.Error(w, "Error decoding Pub/Sub message", http.StatusInternalServerError) + return + } + + // Fetch the file contents from the URL + sourceResp, err := http.Get(message.Attachment.Content.SourceUri) + if err != nil { + slog.Error("Error fetching source file contents", "err", err) + http.Error(w, "Error fetching file contents from URL", http.StatusInternalServerError) + return + } + defer sourceResp.Body.Close() + + arg := r.Header.Get(config.ArgHeader) + cmd, err := buildExecCommand(message.Attachment.Content.MimeType, arg, config) + if err != nil { + slog.Error("Error building command", "err", err) + http.Error(w, "Internal error", http.StatusInternalServerError) + return + } + cmd.Stdin = sourceResp.Body + + // Create a buffer to store the output + var outBuf, stdErr bytes.Buffer + cmd.Stdout = &outBuf + cmd.Stderr = &stdErr + + slog.Info("Running command", "cmd", cmd.String()) + + if err := cmd.Run(); err != nil { + slog.Error("Error running command", "cmd", cmd.String(), "err", stdErr.String()) + http.Error(w, "Error running convert command", http.StatusInternalServerError) + return + } + + // Create the PUT request + req, err := http.NewRequest(config.Method, message.Attachment.Content.DestinationUri, &outBuf) + if err != nil { + slog.Error("Error creating HTTP request", "err", err) + http.Error(w, "Error creating HTTP request", http.StatusInternalServerError) + return + } + if config.ForwardAuth { + auth := r.Header.Get("Authorization") + req.Header.Set("Authorization", auth) + } + req.Header.Set("Content-Type", message.Attachment.Content.MimeType) + req.Header.Set("Content-Location", message.Attachment.Content.FileUploadUri) + + // Execute the PUT request + client := http.DefaultClient + resp, err := client.Do(req) + if err != nil { + slog.Error("Error sending request", "method", config.Method, "err", err) + http.Error(w, "Internal error.", http.StatusInternalServerError) + return + } + defer resp.Body.Close() + + if resp.StatusCode > 299 { + slog.Error("Request failed on destination server", "code", resp.StatusCode) + http.Error(w, fmt.Sprintf("%s request failed with status code %d", config.Method, resp.StatusCode), resp.StatusCode) + return + } + + w.WriteHeader(http.StatusNoContent) + _, err = w.Write([]byte("")) + if err != nil { + slog.Error("Error writing response", "err", err) + } +} + +func DecodeAlpacaMessage(r *http.Request) (Data, error) { + var d Data + + if err := json.NewDecoder(r.Body).Decode(&d); err != nil { + return Data{}, err + } + + return d, nil +} + +func ReadConfig(yp string) (*Config, error) { + var ( + y []byte + err error + ) + yml := os.Getenv("SCYLLARIDAE_YML") + if yml != "" { + y = []byte(yml) + } else { + y, err = os.ReadFile(yp) + if err != nil { + return nil, err + } + } + + var c Config + err = yaml.Unmarshal(y, &c) + if err != nil { + return nil, err + } + + return &c, nil +} + +func buildExecCommand(mimetype, addtlArgs string, c *Config) (*exec.Cmd, error) { + var cmdConfig Cmd + var exists bool + slog.Info("Allowed formats", "formats", c.AllowedFormats) + if isAllowedMIMEType(mimetype, c.AllowedFormats) { + cmdConfig, exists = c.Mimetypes[mimetype] + if !exists || (len(cmdConfig.Command) == 0) { + // Fallback to default if specific MIME type not configured or if command is empty + cmdConfig = c.Mimetypes["default"] + } + } else { + return nil, fmt.Errorf("undefined mimetype: %s", mimetype) + } + + args := []string{} + if len(cmdConfig.PreArgs) > 0 { + args = append(args, cmdConfig.PreArgs...) + } + if addtlArgs != "" { + args = append(args, addtlArgs) + } + if len(cmdConfig.PostArgs) > 0 { + args = append(args, cmdConfig.PostArgs...) + } + cmd := exec.Command(cmdConfig.Command, args...) + + return cmd, nil +} + +func isAllowedMIMEType(mimetype string, allowedFormats []string) bool { + for _, format := range allowedFormats { + if format == mimetype { + return true + } + if strings.HasSuffix(format, "/*") { + // Check wildcard MIME type + prefix := strings.TrimSuffix(format, "*") + if strings.HasPrefix(mimetype, prefix) { + return true + } + } + } + return false +} diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..ecba57a --- /dev/null +++ b/main_test.go @@ -0,0 +1,243 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "os" + "testing" +) + +func TestMessageHandler_MethodNotAllowed(t *testing.T) { + req, err := http.NewRequest("GET", "/", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(MessageHandler) + + handler.ServeHTTP(rr, req) + + if status := rr.Code; status != http.StatusMethodNotAllowed { + t.Errorf("handler returned wrong status code: got %v want %v", + status, http.StatusMethodNotAllowed) + } +} + +func TestIntegration_PutDestination(t *testing.T) { + var err error + + method := "PUT" + content := "This is a test file content" + + destinationServer := createMockDestinationServer(t, method, content) + defer destinationServer.Close() + + sourceServer := createMockSourceServer(t, content) + defer sourceServer.Close() + + // Mock the environment variable for the configuration file path + os.Setenv("SCYLLARIDAE_YML", ` +destination-http-method: "PUT" +file-header: Apix-Ldp-Resource +arg-header: X-Islandora-Args +forward-auth: false +allowed-formats: [ + "text/plain" +] +mimetypes: + default: + cmd: "cat" +`) + defer os.Unsetenv("SCYLLARIDAE_YML") + + config, err = ReadConfig("scyllaridae.yml") + if err != nil { + slog.Error("Could not read YML", "err", err) + os.Exit(1) + } + // Configure and start the main server + setupServer := httptest.NewServer(http.HandlerFunc(MessageHandler)) + defer setupServer.Close() + + // Prepare a mock message to be sent to the main server + testData := Data{ + Actor: Actor{ + Id: "actor1", + }, + Object: Object{ + Id: "object1", + URL: []URL{ + { + Name: "Source", + Type: "source", + Href: sourceServer.URL, + MediaType: "text/plain", + Rel: "source", + }, + }, + NewVersion: true, + }, + Attachment: Attachment{ + Type: "file", + Content: Content{ + MimeType: "text/plain", + Args: "", + SourceUri: sourceServer.URL, + DestinationUri: destinationServer.URL, + FileUploadUri: "", + }, + MediaType: "text/plain", + }, + Type: "TestType", + Summary: "This is a test", + } + + jsonBytes, err := json.Marshal(testData) + if err != nil { + t.Fatalf("Failed to marshal test data: %v", err) + } + + // Send the mock message to the main server + req, err := http.NewRequest("POST", setupServer.URL, bytes.NewReader(jsonBytes)) + if err != nil { + t.Fatal(err) + } + req.Header.Set("Content-Type", "application/json") + + // Capture the response + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusNoContent { + t.Errorf("Expected status %d, got %d", http.StatusNoContent, resp.StatusCode) + } +} + +func TestIntegration_GetDestination(t *testing.T) { + var err error + method := "GET" + content := "" + + sourceServer := createMockSourceServer(t, content) + defer sourceServer.Close() + + destinationServer := createMockDestinationServer(t, method, content) + defer destinationServer.Close() + + // Mock the environment variable for the configuration file path + os.Setenv("SCYLLARIDAE_YML", fmt.Sprintf(` +destination-http-method: "%s" +file-header: Apix-Ldp-Resource +arg-header: X-Islandora-Args +forward-auth: false +allowed-formats: [ + "text/plain" +] +mimetypes: + default: + cmd: "cat" +`, method)) + defer os.Unsetenv("SCYLLARIDAE_YML") + + config, err = ReadConfig("scyllaridae.yml") + if err != nil { + slog.Error("Could not read YML", "err", err) + os.Exit(1) + } + // Configure and start the main server + setupServer := httptest.NewServer(http.HandlerFunc(MessageHandler)) + defer setupServer.Close() + + // Prepare a mock message to be sent to the main server + testData := Data{ + Actor: Actor{ + Id: "actor1", + }, + Object: Object{ + Id: "object1", + URL: []URL{ + { + Name: "Source", + Type: "source", + Href: sourceServer.URL, + MediaType: "text/plain", + Rel: "source", + }, + }, + NewVersion: true, + }, + Attachment: Attachment{ + Type: "file", + Content: Content{ + MimeType: "text/plain", + Args: "", + SourceUri: sourceServer.URL, + DestinationUri: destinationServer.URL, + FileUploadUri: "", + }, + MediaType: "text/plain", + }, + Type: "TestType", + Summary: "This is a test", + } + + jsonBytes, err := json.Marshal(testData) + if err != nil { + t.Fatalf("Failed to marshal test data: %v", err) + } + + // Send the mock message to the main server + req, err := http.NewRequest("POST", setupServer.URL, bytes.NewReader(jsonBytes)) + if err != nil { + t.Fatal(err) + } + req.Header.Set("Content-Type", "application/json") + + // Capture the response + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusNoContent { + t.Errorf("Expected status %d, got %d", http.StatusNoContent, resp.StatusCode) + } +} + +func createMockDestinationServer(t *testing.T, method, content string) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != method { + t.Errorf("Expected %s method, got %s", method, r.Method) + } + if r.Method != "GET" { + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatal("Failed to read request body") + } + defer r.Body.Close() + + if string(body) != content { + t.Errorf("Unexpected body content: %s", string(body)) + } + } + w.WriteHeader(http.StatusOK) + })) +} + +func createMockSourceServer(t *testing.T, content string) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if _, err := w.Write([]byte(content)); err != nil { + t.Fatal("Failed to write response in mock server") + } + })) +} diff --git a/scyllaridae.complex-example.yml b/scyllaridae.complex-example.yml new file mode 100644 index 0000000..feb1884 --- /dev/null +++ b/scyllaridae.complex-example.yml @@ -0,0 +1,20 @@ +label: "OCR PDFs and Images" +destination-http-method: "PUT" +file-header: Apix-Ldp-Resource +arg-header: X-Islandora-Args +forward-auth: false +allowed-formats: [ + "application/pdf", + "image/*" +] +mimetypes: + application/pdf: + cmd: "pdftotext" + post-args: + - "-" + - "-" + default: + cmd: "tesseract" + pre-args: + - "stdin" + - "stdout" diff --git a/scyllaridae.example.yml b/scyllaridae.example.yml new file mode 100644 index 0000000..9103b56 --- /dev/null +++ b/scyllaridae.example.yml @@ -0,0 +1,17 @@ +# take the input from a source server +# and print it to the output server +# +# Though since we're using destination-http-method: "GET" +# the destination server isn't receiving anything +# this example can be used to warm the cache with a properly formatted event +label: "Warm cache" +destination-http-method: "GET" +file-header: Apix-Ldp-Resource +arg-header: X-Islandora-Args +forward-auth: false +allowed-formats: [ + "text/plain" +] +mimetypes: + default: + cmd: "cat"