Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kadai3-2 imura81gt #50

Draft
wants to merge 16 commits into
base: kadai3-imura81gt
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions kadai3/imura81gt/rget/.go-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.13.4
48 changes: 48 additions & 0 deletions kadai3/imura81gt/rget/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
rget
=========================================================

Command
-----------------------------------------

```
go run cmd/rget/main.go https://upload.wikimedia.org/wikipedia/commons/1/16/Notocactus_minimus.jpg
```

Theme
-----------------------------------------

分割ダウンロードを行う

元ネタ: https://qiita.com/codehex/items/d0a500ac387d39a34401

- [x]Rangeアクセスを用いる
- [ ]いくつかのゴルーチンでダウンロードしてマージする
- [x]エラー処理を工夫する
- [x]golang.org/x/sync/errgroupパッケージなどを使ってみる
- [x]キャンセルが発生した場合の実装を行う

ref: https://qiita.com/codehex/items/d0a500ac387d39a34401



Note.
------------------------------------------

### Range Request

https://developer.mozilla.org/ja/docs/Web/HTTP/Range_requests

> Accept-Ranges が HTTP レスポンスに存在した場合 (そして値が "none" ではない場合)、サーバーは範囲リクエストに対応しています。これは例えば、 HEAD リクエストを cURL で発行することで確認することができます。


https://developer.mozilla.org/ja/docs/Web/HTTP/Headers/Accept-Ranges

> Accept-Ranges: bytes
> Accept-Ranges: none

https://developer.mozilla.org/ja/docs/Web/HTTP/Headers/Range

> Range: <unit>=<range-start>-
> Range: <unit>=<range-start>-<range-end>
> Range: <unit>=<range-start>-<range-end>, <range-start>-<range-end>
> Range: <unit>=<range-start>-<range-end>, <range-start>-<range-end>, <range-start>-<range-end>
35 changes: 35 additions & 0 deletions kadai3/imura81gt/rget/cmd/rget/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package main

import (
"flag"
"fmt"
"os"

"github.com/gopherdojo/dojo7/kadai3/imura81gt/rget"
)

func main() {
concurrency := flag.Uint("c", 2, "concurrency")
outputDir := flag.String("o", "./", "output directory")

flag.Parse()
option := rget.Option{
Concurrency: *concurrency,
OutputDir: *outputDir,
}
urls := flag.Args()
if len(urls) != 1 {
fmt.Fprintf(os.Stderr, "%s <url>\n", os.Args[0])
fmt.Fprintln(os.Stderr, "option:")
flag.PrintDefaults()
os.Exit(1)
}

option.URL = urls[0]
fmt.Println(option)
err := rget.Run(option)
if err != nil {
fmt.Fprintf(os.Stderr, "err: %s", err)
os.Exit(1)
}
}
5 changes: 5 additions & 0 deletions kadai3/imura81gt/rget/cmd/rget/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package main

import "testing"

func TestMain(t *testing.T) {}
8 changes: 8 additions & 0 deletions kadai3/imura81gt/rget/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
module github.com/gopherdojo/dojo7/kadai3/imura81gt/rget

go 1.13

require (
github.com/google/go-cmp v0.3.1
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
)
4 changes: 4 additions & 0 deletions kadai3/imura81gt/rget/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
235 changes: 235 additions & 0 deletions kadai3/imura81gt/rget/rget.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
package rget

import (
"context"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path"
"path/filepath"

"golang.org/x/sync/errgroup"
)

type Option struct {
Concurrency uint
URL string
OutputDir string
ContentLength int64
Units Units
}

type Unit struct {
RangeStart int64
RangeEnd int64
TempFileName string
DownloadedSize int64
}

func (u *Unit) Write(data []byte) (int, error) {
d := len(data)
u.DownloadedSize += int64(d)
// fmt.Printf("%v is downloaded %v/%v \n",
// u.TempFileName, u.DownloadedSize, u.RangeEnd-u.RangeStart+1)
return d, nil
}

type Units []Unit

func Run(option Option) error {
fmt.Printf("%+v\n", option)
err := option.checkingHeaders()
if err != nil {
return fmt.Errorf("%s", err)
}

option.divide()

tmpDir, err := ioutil.TempDir("", "rget")
if err != nil {
return fmt.Errorf("%s", err)
}
defer os.RemoveAll(tmpDir)
fmt.Println(tmpDir)

err = option.parallelDownload(tmpDir)
if err != nil {
return fmt.Errorf("%s", err)
}

err = option.combine(tmpDir)
if err != nil {
return fmt.Errorf("%s", err)
}

return nil
}

func (o *Option) checkingHeaders() error {
resp, err := http.Head(o.URL)
if err != nil {
return err
}

if resp.Header.Get("Accept-Ranges") == "" {
err := fmt.Errorf("%s : %s cannot support Ranges Requests", o.URL, resp.Request.URL.String())
return err
}

if resp.Header["Accept-Ranges"][0] == "none" {
err := fmt.Errorf("%s : %s cannot support Ranges Requests", o.URL, resp.Request.URL.String())
return err
}

if resp.ContentLength == 0 {
err := fmt.Errorf("%s size is nil", o.URL)
return err
}

redirectURL := resp.Request.URL.String()

o.ContentLength = resp.ContentLength

// keep the redirect URL that accept Ranges Requests because some mirror sites may deny.
// TODO: redirectURL should set by Unit separately.
if o.URL != redirectURL {
o.URL = redirectURL
}

return err
}

//func divide(contentLength int64, concurrency int) Units {
func (o *Option) divide() {
var units []Unit

if o.Concurrency == 0 {
o.Concurrency = 1
}

if o.ContentLength < int64(o.Concurrency) {
o.Concurrency = uint(o.ContentLength)
}

sbyte := o.ContentLength / int64(o.Concurrency)

for i := 0; i < int(o.Concurrency); i++ {
units = append(units, Unit{
RangeStart: int64(i) * sbyte,
RangeEnd: int64((i+1))*sbyte - 1,
TempFileName: fmt.Sprintf("%d_%s", i, path.Base(o.URL)),
})
}

// TODO: should distribute the remainder to each unit
units[len(units)-1].RangeEnd = (o.ContentLength - 1)

o.Units = units
}

func (o *Option) parallelDownload(tmpDir string) error {
fmt.Println("parallelDownload", o.Units)

eg, ctx := errgroup.WithContext(context.Background())
for i := range o.Units {
// https://godoc.org/golang.org/x/sync/errgroup#example-Group--Parallel
// https://golang.org/doc/faq#closures_and_goroutines
i := i
eg.Go(func() error {
return o.downloadWithContext(ctx, i, tmpDir)
})
}

if err := eg.Wait(); err != nil {
return err
}

return nil
}

func (o *Option) downloadWithContext(
ctx context.Context,
i int,
dir string,
) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()

fmt.Printf("Downloading: %v %+v\n", i, o.Units[i])

//v1.13
req, err := http.NewRequestWithContext(ctx, http.MethodGet, o.URL, nil)
if err != nil {
return fmt.Errorf("Error: %v", err)
}

// add range header
byteRange := fmt.Sprintf("bytes=%d-%d", o.Units[i].RangeStart, o.Units[i].RangeEnd)
fmt.Println(byteRange)
req.Header.Set("Range", byteRange)

client := http.DefaultClient
// TODO: should check resp.StatusCode.
// client.Do cannot seems to return the err when statusCode is 50x etc.
resp, err := client.Do(req)
if err != nil {
fmt.Printf("client err: %s", err)
return fmt.Errorf("Error: %v", err)
}
defer resp.Body.Close()

select {
case <-ctx.Done():
fmt.Printf("Done: %v %+v\n", i, o.Units[i])
return fmt.Errorf("Error: %v", err)
default:
fmt.Println("default:", i, o.Units[i])
}

w, err := os.Create(filepath.Join(dir, o.Units[i].TempFileName))
if err != nil {
return fmt.Errorf("Error: %v", err)
}
defer func() error {
if err := w.Close(); err != nil {
return fmt.Errorf("Error: %v", err)
}
return nil
}()

_, err = io.Copy(w, io.TeeReader(resp.Body, &o.Units[i]))
if err != nil {
return fmt.Errorf("Error: %v", err)
}

return nil
}

func (o *Option) combine(dir string) error {
w, err := os.Create(filepath.Join(o.OutputDir, path.Base(o.URL)))
if err != nil {
return fmt.Errorf("Error: %v", err)
}
defer func() error {
if err := w.Close(); err != nil {
return fmt.Errorf("Error: %v", err)
}
return nil
}()

for _, unit := range o.Units {
r, err := os.Open(filepath.Join(dir, unit.TempFileName))
if err != nil {
return fmt.Errorf("Error: %v", err)
}

_, err = io.Copy(w, r)
if err != nil {
return fmt.Errorf("Error: %v", err)
}
}

return nil
}
Loading