diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..677bc27 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "daily" + open-pull-requests-limit: 10 + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml new file mode 100644 index 0000000..d88d4db --- /dev/null +++ b/.github/workflows/linter.yml @@ -0,0 +1,24 @@ +name: Linter + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + types: [ opened, synchronize, reopened ] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout default branch + uses: actions/checkout@v2 + + - name: Lint Code Base + uses: github/super-linter@v4 + env: + DEFAULT_BRANCH: 'main' + VALIDATE_ALL_CODEBASE: false + VALIDATE_JSON: false + VALIDATE_ANSIBLE: false + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..ee5aa54 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,82 @@ +name: Release + +on: + push: + tags: + - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + +env: + PRODUCT: warcraft + +jobs: + build: + name: Checkout, build, archive, upload + runs-on: ubuntu-latest + steps: + - name: Check out code into the Go module directory + uses: actions/checkout@v2 + + - name: Set up Go 1.x + uses: actions/setup-go@v2 + with: + go-version: ^1.16 + + - name: Build fat binary + run: make all-arch + + - name: Archive binary + run: make releases + + - name: Upload archived binary + uses: actions/upload-artifact@v2 + with: + name: ${{ env.PRODUCT }} + path: build/package/${{ env.PRODUCT }}* + + checksum: + name: Get archived packages checksum + runs-on: ubuntu-latest + needs: build + outputs: + digest: ${{ steps.digest.outputs.result }} + steps: + - name: Download math result from build job + uses: actions/download-artifact@v2 + with: + name: ${{ env.PRODUCT }} + path: . + + - name: Create all binary digest + id: digest + run: | + digest=$(find ${{ env.PRODUCT }}* -type f -exec sha256sum {} +) + digest="${digest//$'%'/%25}" + digest="${digest//$'\n'/%0A}" + echo "::set-output name=result::$digest" + + release: + name: Create and upload release + runs-on: ubuntu-latest + needs: [build, checksum] + steps: + - name: Download math result from build and checksum jobs + uses: actions/download-artifact@v2 + with: + name: ${{ env.PRODUCT }} + path: ${{ env.PRODUCT }} + + - name: Create Release + uses: softprops/action-gh-release@v1 + if: startsWith(github.ref, 'refs/tags/') + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token + with: + body: | + **Digests in this release:** + + ``` + ${{ needs.checksum.outputs.digest }} + ``` + files: ${{ env.PRODUCT }}/*${{ env.PRODUCT }}* + draft: false + prerelease: true diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 0000000..5553d1e --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,19 @@ +name: Stale + +on: + schedule: + - cron: "0 3 * * 6" + +jobs: + stale: + name: Stale + runs-on: ubuntu-latest + steps: + - name: Mark stale issues and pull requests + uses: actions/stale@v3 + with: + repo-token: ${{ github.token }} + stale-issue-message: "This issue is stale because it has been open 120 days with no activity. Remove stale label or comment or this will be closed in 5 days" + stale-pr-message: 'It has been open 120 days with no activity. Remove stale label or comment or this will be closed in 5 days' + days-before-stale: 120 + days-before-close: 5 diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 0000000..86237b4 --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,70 @@ +name: Testing + +on: + push: + branches: + - "*" + paths: + - "**/*.go" + - "go.mod" + - "go.sum" + - ".github/workflows/testing.yml" + pull_request: + branches: [ main ] + types: [ opened, synchronize, reopened ] + paths: + - "**/*.go" + - "go.mod" + - "go.sum" + - ".github/workflows/testing.yml" + +jobs: + test: + name: Testing + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ ubuntu-latest, macos-latest, windows-latest ] + go: [ "1.12", "1.13", "1.14", "1.15", "1.16" ] + + steps: + - name: Set up Go 1.x + uses: actions/setup-go@v2 + with: + go-version: ${{ matrix.go }} + + - name: Set up Chocolatey + if: matrix.os == 'windows-latest' + uses: crazy-max/ghaction-chocolatey@v1 + with: + args: -h + + - name: Install Wget + if: matrix.os == 'windows-latest' + run: | + choco install wget + wget --help + + - name: Check out code base + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Get dependencies + run: | + go get -v -t -d ./... + + - name: Run test + run: | + make test + make test-cover + + - name: Upload coverage + uses: actions/upload-artifact@v2 + with: + name: coverage-${{ matrix.os }}-go${{ matrix.go }} + path: coverage.* + + - name: Run integration test + run: make test-integration diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b1db11d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/build +**.warc.gz diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1dd5e51 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Wayback Archiver + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d9c1839 --- /dev/null +++ b/Makefile @@ -0,0 +1,84 @@ +export GO111MODULE = on +export GOPROXY = https://proxy.golang.org + +NAME = warcraft +BINDIR ?= ./build/binary +PACKDIR ?= ./build/package +GOBUILD := CGO_ENABLED=0 go build --ldflags="-s -w" -v +GOFILES := $(wildcard ./cmd/warcraft/*.go) +VERSION := $(shell git describe --tags `git rev-list --tags --max-count=1`) +VERSION := $(VERSION:v%=%) +PROJECT := github.com/wabarc/warcraft +PACKAGES := $(shell go list ./...) + +PLATFORM_LIST = \ + darwin-amd64 \ + darwin-arm64 \ + linux-amd64 + +WINDOWS_ARCH_LIST = \ + windows-amd64 + +.PHONY: all +all: linux-amd64 darwin-amd64 windows-amd64 + +darwin-amd64: + GOOS=darwin GOARCH=amd64 $(GOBUILD) -o $(BINDIR)/$(NAME)-$@ $(GOFILES) + +darwin-arm64: + GOOS=darwin GOARCH=arm64 $(GOBUILD) -o $(BINDIR)/$(NAME)-$@ $(GOFILES) + +linux-amd64: + GOOS=linux GOARCH=amd64 $(GOBUILD) -o $(BINDIR)/$(NAME)-$@ $(GOFILES) + +windows-amd64: + GOOS=windows GOARCH=amd64 $(GOBUILD) -o $(BINDIR)/$(NAME)-$@.exe $(GOFILES) + +fmt: + @echo "-> Running go fmt" + @go fmt $(PACKAGES) + +test: + @echo "-> Running go test" + @CGO_ENABLED=1 go test -v -race -cover -coverprofile=coverage.out -covermode=atomic ./... + +test-integration: + @echo 'mode: atomic' > coverage.out + @go list ./... | xargs -n1 -I{} sh -c 'CGO_ENABLED=1 go test -race -tags=integration -covermode=atomic -coverprofile=coverage.tmp -coverpkg $(go list ./... | tr "\n" ",") {} && tail -n +2 coverage.tmp >> coverage.out || exit 255' + @rm coverage.tmp + +test-cover: + @echo "-> Running go tool cover" + @go tool cover -func=coverage.out + @go tool cover -html=coverage.out -o coverage.html + +bench: + @echo "-> Running benchmark" + @go test -v -bench . + +profile: + @echo "-> Running profile" + @go test -cpuprofile cpu.prof -memprofile mem.prof -v -bench . + +tar_releases := $(addsuffix .gz, $(PLATFORM_LIST)) +zip_releases := $(addsuffix .zip, $(WINDOWS_ARCH_LIST)) + +$(tar_releases): %.gz : % + @mkdir -p $(PACKDIR) + chmod +x $(BINDIR)/$(NAME)-$(basename $@) + tar -czf $(PACKDIR)/$(NAME)-$(basename $@)-$(VERSION).tar.gz --transform "s/$(notdir $(BINDIR))//g" $(BINDIR)/$(NAME)-$(basename $@) + +$(zip_releases): %.zip : % + @mkdir -p $(PACKDIR) + zip -m -j $(PACKDIR)/$(NAME)-$(basename $@)-$(VERSION).zip $(BINDIR)/$(NAME)-$(basename $@).exe + +all-arch: $(PLATFORM_LIST) $(WINDOWS_ARCH_LIST) + +releases: $(tar_releases) $(zip_releases) + +clean: + rm -f $(PACKDIR)/* + rm -f *.warc.gz + +tag: + git tag v$(VERSION) diff --git a/README.md b/README.md new file mode 100644 index 0000000..6ab93ee --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# warcraft + +`warcraft` is a toolkit to help download webpage as `warc` file using wget. + +## Installation + +The simplest, cross-platform way is to download from [GitHub Releases](https://github.com/wabarc/warcraft/releases) and place the executable file in your PATH. + +Via Golang package get command + +```sh +go get -u github.com/wabarc/warcraft/cmd/warcraft +``` + +From [gobinaries.com](https://gobinaries.com): + +```sh +$ curl -sf https://gobinaries.com/wabarc/warcraft | sh +``` + +## Usage + +Command-line: + +```sh +$ warcraft +A CLI tool help download webpage as warc file using wget. + +Usage: + + warcraft [options] [url1] ... [urlN] +``` + +Go package: +```go +import ( + "fmt" + + "github.com/wabarc/warcraft" +) + +func main() { + if b, err := warcraft.NewWarcraft(nil).Download(url); err != nil { + fmt.Fprintf(os.Stderr, "warcraft: %v\n", err) + } else { + fmt.Fprintf(os.Stdout, "%s %s\n", url, string(b)) + } +} +``` + +## License + +This software is released under the terms of the MIT. See the [LICENSE](https://github.com/wabarc/warcraft/blob/main/LICENSE) file for details. diff --git a/cmd/warcraft/main.go b/cmd/warcraft/main.go new file mode 100644 index 0000000..b369991 --- /dev/null +++ b/cmd/warcraft/main.go @@ -0,0 +1,63 @@ +package main + +import ( + "flag" + "fmt" + "net/url" + "os" + "strings" + "sync" + + "github.com/wabarc/warcraft" +) + +func init() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage:\n\n") + fmt.Fprintf(os.Stderr, " warcraft [options] [url1] ... [urlN]\n") + + flag.PrintDefaults() + } + var basePrint = func() { + fmt.Print("A CLI tool help download webpage as warc file using wget.\n\n") + flag.Usage() + fmt.Fprint(os.Stderr, "\n") + } + + flag.Parse() + + args := flag.Args() + + if len(args) < 1 { + basePrint() + os.Exit(0) + } + +} + +func main() { + uris := flag.Args() + warc := warcraft.New() + + pwd, _ := os.Getwd() + + var wg sync.WaitGroup + for _, uri := range uris { + wg.Add(1) + go func(uri string) { + in, err := url.Parse(uri) + if err != nil { + fmt.Fprintf(os.Stderr, "parse %s failed: %v\n", uri, err) + return + } + + if path, err := warc.Download(in); err != nil { + fmt.Fprintf(os.Stderr, "warcraft: %v\n", err) + } else { + fmt.Fprintf(os.Stdout, "%s %s\n", strings.TrimLeft(path, pwd), uri) + } + wg.Done() + }(uri) + } + wg.Wait() +} diff --git a/doc.go b/doc.go new file mode 100644 index 0000000..de06705 --- /dev/null +++ b/doc.go @@ -0,0 +1,10 @@ +// Copyright 2021 Wayback Archiver. All rights reserved. +// Use of this source code is governed by the MIT +// license that can be found in the LICENSE file. + +/* +warcraft is a Golang package and command-line tool to +help download webpage as `warc` file using wget. +*/ + +package warcraft // import "github.com/wabarc/warcraft" diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..d3c4e9e --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module github.com/wabarc/warcraft + +go 1.16 + +require ( + github.com/pkg/errors v0.9.1 + github.com/wabarc/helper v0.0.0-20210701193643-e0fe0a807cb9 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..b4bb70b --- /dev/null +++ b/go.sum @@ -0,0 +1,13 @@ +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/rogpeppe/go-internal v1.5.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/wabarc/helper v0.0.0-20210701193643-e0fe0a807cb9 h1:cO9x2t4ruB90tdbvUrvX0dNsiaTUjhmOAYeiHBZZwAI= +github.com/wabarc/helper v0.0.0-20210701193643-e0fe0a807cb9/go.mod h1:TuTZtoiOu984UWOf7FfX58JllKMjq7FCz701kB5W88E= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +mvdan.cc/xurls/v2 v2.2.0 h1:NSZPykBXJFCetGZykLAxaL6SIpvbVy/UFEniIfHAa8A= +mvdan.cc/xurls/v2 v2.2.0/go.mod h1:EV1RMtya9D6G5DMYPGD8zTQzaHet6Jh8gFlRgGRJeO8= diff --git a/warcraft.go b/warcraft.go new file mode 100644 index 0000000..c6899cb --- /dev/null +++ b/warcraft.go @@ -0,0 +1,105 @@ +// Copyright 2021 Wayback Archiver. All rights reserved. +// Use of this source code is governed by the MIT +// license that can be found in the LICENSE file. + +package warcraft // import "github.com/wabarc/warcraft" + +import ( + "net/url" + "os" + "os/exec" + "path/filepath" + "runtime" + + "github.com/pkg/errors" + "github.com/wabarc/helper" +) + +// Warcraft represents warcraft config. +type Warcraft struct { + BasePath string // base path of warc file, defaults to current directory +} + +// New a Warcraft struct +func New() *Warcraft { + pwd, _ := os.Getwd() + + return &Warcraft{ + BasePath: pwd, + } +} + +// Download +// wget --delete-after --no-directories --warc-file=google --recursive --level=1 URI +func (warc *Warcraft) Download(u *url.URL) (string, error) { + if warc.BasePath == "" { + pwd, err := os.Getwd() + if err != nil { + return "", err + } + warc.BasePath = pwd + } + if !helper.IsDir(warc.BasePath) { + return "", errors.New(warc.BasePath + " is invalid") + } + if err := helper.Writable(warc.BasePath); err != nil { + return "", errors.Wrap(err, "no writable") + } + + binPath, err := findWgetExecPath() + if err != nil { + return "", err + } + + name := filepath.Join(warc.BasePath, helper.RandString(10, "")) + args := []string{ + "--delete-after", "--no-directories", + "--recursive", "--level=1", + "--warc-file=" + name, + u.String(), + } + cmd := exec.Command(binPath, args...) + if err := cmd.Start(); err != nil { + return "", err + } + if err := cmd.Wait(); err != nil { + return "", err + } + + dst := name + ".warc.gz" + + return dst, nil +} + +func findWgetExecPath() (string, error) { + var locations []string + switch runtime.GOOS { + case "darwin": + locations = []string{ + // Mac + "wget", + "/usr/local/bin/wget", + } + case "windows": + locations = []string{ + // Windows + "wget", + "wget.exe", // in case PATHEXT is misconfigured + } + default: + locations = []string{ + // Unix-like + "wget", + "/usr/bin/wget", + } + } + + for _, path := range locations { + found, err := exec.LookPath(path) + if err == nil { + return found, nil + } + } + + return "", errors.New("wget not found") +} diff --git a/warcraft_test.go b/warcraft_test.go new file mode 100644 index 0000000..2398874 --- /dev/null +++ b/warcraft_test.go @@ -0,0 +1,52 @@ +// Copyright 2021 Wayback Archiver. All rights reserved. +// Use of this source code is governed by the MIT +// license that can be found in the LICENSE file. + +package warcraft // import "github.com/wabarc/warcraft" + +import ( + "fmt" + "io/ioutil" + "net/http" + "net/url" + "os" + "testing" + + "github.com/wabarc/helper" +) + +func TestDownload(t *testing.T) { + if _, err := findWgetExecPath(); err != nil { + t.Skip(err.Error(), ", skipped") + } + + _, mux, server := helper.MockServer() + defer server.Close() + + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Hello, Golang.") + }) + + uri := server.URL + in, err := url.Parse(uri) + if err != nil { + t.Fatal(err) + } + + dir, err := ioutil.TempDir("", "warcraft") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dir) + + warc := New() + path, err := warc.Download(in) + if err != nil { + t.Fatal(err) + } + defer os.Remove(path) + + if !helper.Exists(path) { + t.Errorf(`download warc file failed`) + } +}