Commit 9631baf1 authored by Cristian Maglie's avatar Cristian Maglie

Updated grab library

parent b089fba6
...@@ -115,7 +115,7 @@ ...@@ -115,7 +115,7 @@
branch = "master" branch = "master"
name = "github.com/cavaliercoder/grab" name = "github.com/cavaliercoder/grab"
packages = ["."] packages = ["."]
revision = "94177710b7005a1bc795e6805f2f7481f4120189" revision = "b7c6f788e7cbd3afa871543749c38b7a3abeb7e4"
[[projects]] [[projects]]
name = "github.com/cenkalti/backoff" name = "github.com/cenkalti/backoff"
......
...@@ -20,7 +20,7 @@ lint: ...@@ -20,7 +20,7 @@ lint:
gocyclo -over 15 . || : gocyclo -over 15 . || :
misspell ./* || : misspell ./* || :
deps: deps:
$(GOGET) github.com/golang/lint/golint $(GOGET) github.com/golang/lint/golint
$(GOGET) github.com/fzipp/gocyclo $(GOGET) github.com/fzipp/gocyclo
$(GOGET) github.com/client9/misspell/cmd/misspell $(GOGET) github.com/client9/misspell/cmd/misspell
......
# grab # grab
[![GoDoc](https://godoc.org/github.com/cavaliercoder/grab?status.svg)](https://godoc.org/github.com/cavaliercoder/grab) [![Build Status](https://travis-ci.org/cavaliercoder/grab.svg)](https://travis-ci.org/cavaliercoder/grab) [![Go Report Card](https://goreportcard.com/badge/github.com/cavaliercoder/grab)](https://goreportcard.com/report/github.com/cavaliercoder/grab) [![GoDoc](https://godoc.org/github.com/cavaliercoder/grab?status.svg)](https://godoc.org/github.com/cavaliercoder/grab) [![Build Status](https://travis-ci.org/cavaliercoder/grab.svg?branch=master)](https://travis-ci.org/cavaliercoder/grab) [![Go Report Card](https://goreportcard.com/badge/github.com/cavaliercoder/grab)](https://goreportcard.com/report/github.com/cavaliercoder/grab)
*Downloading the internet, one goroutine at a time!* *Downloading the internet, one goroutine at a time!*
...@@ -15,23 +15,26 @@ rad features: ...@@ -15,23 +15,26 @@ rad features:
* Safely cancel downloads using context.Context * Safely cancel downloads using context.Context
* Validate downloads using checksums * Validate downloads using checksums
* Download batches of files concurrently * Download batches of files concurrently
* Apply rate limiters
Requires Go v1.7+ Requires Go v1.7+
## Older versions ## Example
If you are using an older version of Go, or require previous versions of the
Grab API, you can import older version of this package, thanks to gpkg.in.
Please see all GitHub tags for available versions.
$ go get gopkg.in/cavaliercoder/grab.v1 The following example downloads a PDF copy of the free eBook, "An Introduction
to Programming in Go" into the current working directory.
```go
resp, err := grab.Get(".", "http://www.golang-book.com/public/pdf/gobook.pdf")
if err != nil {
log.Fatal(err)
}
## Example fmt.Println("Download saved to", resp.Filename)
```
The following example downloads a PDF copy of the free eBook, "An Introduction The following, more complete example allows for more granular control and
to Programming in Go" and periodically prints the download progress until it is periodically prints the download progress until it is complete.
complete.
The second time you run the example, it will auto-resume the previous download The second time you run the example, it will auto-resume the previous download
and exit sooner. and exit sooner.
...@@ -93,3 +96,32 @@ Loop: ...@@ -93,3 +96,32 @@ Loop:
// Download saved to ./gobook.pdf // Download saved to ./gobook.pdf
} }
``` ```
## Design trade-offs
The primary use case for Grab is to concurrently downloading thousands of large
files from remote file repositories where the remote files are immutable.
Examples include operating system package repositories or ISO libraries.
Grab aims to provide robust, sane defaults. These are usually determined using
the HTTP specifications, or by mimicking the behavior of common web clients like
cURL, wget and common web browsers.
Grab aims to be stateless. The only state that exists is the remote files you
wish to download and the local copy which may be completed, partially completed
or not yet created. The advantage to this is that the local file system is not
cluttered unnecessarily with addition state files (like a `.crdownload` file).
The disadvantage of this approach is that grab must make assumptions about the
local and remote state; specifically, that they have not been modified by
another program.
If the local or remote file are modified outside of grab, and you download the
file again with resuming enabled, the local file will likely become corrupted.
In this case, you might consider making remote files immutable, or disabling
resume.
Grab aims to enable best-in-class functionality for more complex features
through extensible interfaces, rather than reimplementation. For example,
you can provide your own Hash algorithm to compute file checksums, or your
own rate limiter implementation (with all the associated trade-offs) to rate
limit downloads.
...@@ -72,7 +72,6 @@ func (c *Client) Do(req *Request) *Response { ...@@ -72,7 +72,6 @@ func (c *Client) Do(req *Request) *Response {
ctx: ctx, ctx: ctx,
cancel: cancel, cancel: cancel,
bufferSize: req.BufferSize, bufferSize: req.BufferSize,
writeFlags: os.O_CREATE | os.O_WRONLY,
} }
if resp.bufferSize == 0 { if resp.bufferSize == 0 {
// default to Client.BufferSize // default to Client.BufferSize
...@@ -242,7 +241,6 @@ func (c *Client) validateLocal(resp *Response) stateFunc { ...@@ -242,7 +241,6 @@ func (c *Client) validateLocal(resp *Response) stateFunc {
} }
if resp.Request.NoResume { if resp.Request.NoResume {
resp.writeFlags = os.O_TRUNC | os.O_WRONLY
return c.getRequest return c.getRequest
} }
...@@ -257,7 +255,6 @@ func (c *Client) validateLocal(resp *Response) stateFunc { ...@@ -257,7 +255,6 @@ func (c *Client) validateLocal(resp *Response) stateFunc {
fmt.Sprintf("bytes=%d-", resp.fi.Size())) fmt.Sprintf("bytes=%d-", resp.fi.Size()))
resp.DidResume = true resp.DidResume = true
resp.bytesResumed = resp.fi.Size() resp.bytesResumed = resp.fi.Size()
resp.writeFlags = os.O_APPEND | os.O_WRONLY
return c.getRequest return c.getRequest
} }
return c.headRequest return c.headRequest
...@@ -281,7 +278,7 @@ func (c *Client) checksumFile(resp *Response) stateFunc { ...@@ -281,7 +278,7 @@ func (c *Client) checksumFile(resp *Response) stateFunc {
defer f.Close() defer f.Close()
// hash file // hash file
t := newTransfer(resp.Request.Context(), resp.Request.hash, f, nil) t := newTransfer(resp.Request.Context(), nil, resp.Request.hash, f, nil)
if nc, err := t.copy(); err != nil { if nc, err := t.copy(); err != nil {
resp.err = err resp.err = err
return c.closeResponse return c.closeResponse
...@@ -390,7 +387,7 @@ func (c *Client) readResponse(resp *Response) stateFunc { ...@@ -390,7 +387,7 @@ func (c *Client) readResponse(resp *Response) stateFunc {
// openWriter opens the destination file for writing and seeks to the location // openWriter opens the destination file for writing and seeks to the location
// from whence the file transfer will resume. // from whence the file transfer will resume.
// //
// Requires that Response.Filename and Response.writeFlags already be set. // Requires that Response.Filename and resp.DidResume are already be set.
func (c *Client) openWriter(resp *Response) stateFunc { func (c *Client) openWriter(resp *Response) stateFunc {
if !resp.Request.NoCreateDirectories { if !resp.Request.NoCreateDirectories {
resp.err = mkdirp(resp.Filename) resp.err = mkdirp(resp.Filename)
...@@ -399,7 +396,18 @@ func (c *Client) openWriter(resp *Response) stateFunc { ...@@ -399,7 +396,18 @@ func (c *Client) openWriter(resp *Response) stateFunc {
} }
} }
f, err := os.OpenFile(resp.Filename, resp.writeFlags, 0644) // compute write flags
flag := os.O_CREATE | os.O_WRONLY
if resp.fi != nil {
if resp.DidResume {
flag = os.O_APPEND | os.O_WRONLY
} else {
flag = os.O_TRUNC | os.O_WRONLY
}
}
// open file
f, err := os.OpenFile(resp.Filename, flag, 0644)
if err != nil { if err != nil {
resp.err = err resp.err = err
return c.closeResponse return c.closeResponse
...@@ -423,6 +431,7 @@ func (c *Client) openWriter(resp *Response) stateFunc { ...@@ -423,6 +431,7 @@ func (c *Client) openWriter(resp *Response) stateFunc {
b := make([]byte, resp.bufferSize) b := make([]byte, resp.bufferSize)
resp.transfer = newTransfer( resp.transfer = newTransfer(
resp.Request.Context(), resp.Request.Context(),
resp.Request.RateLimiter,
resp.writer, resp.writer,
resp.HTTPResponse.Body, resp.HTTPResponse.Body,
b) b)
......
...@@ -20,7 +20,7 @@ var ( ...@@ -20,7 +20,7 @@ var (
ErrNoFilename = errors.New("no filename could be determined") ErrNoFilename = errors.New("no filename could be determined")
// ErrNoTimestamp indicates that a timestamp could not be automatically // ErrNoTimestamp indicates that a timestamp could not be automatically
// determined using the reponse headers from the remote server. // determined using the response headers from the remote server.
ErrNoTimestamp = errors.New("no timestamp could be determined for the remote file") ErrNoTimestamp = errors.New("no timestamp could be determined for the remote file")
// ErrFileExists indicates that the destination path already exists. // ErrFileExists indicates that the destination path already exists.
......
package grab
import "context"
// RateLimiter is an interface that must be satisfied by any third-party rate
// limiters that may be used to limit download transfer speeds.
//
// A recommended token bucket implementation can be found at
// https://godoc.org/golang.org/x/time/rate#Limiter.
type RateLimiter interface {
WaitN(ctx context.Context, n int) (err error)
}
...@@ -67,9 +67,15 @@ type Request struct { ...@@ -67,9 +67,15 @@ type Request struct {
// BufferSize specifies the size in bytes of the buffer that is used for // BufferSize specifies the size in bytes of the buffer that is used for
// transferring the requested file. Larger buffers may result in faster // transferring the requested file. Larger buffers may result in faster
// throughput but will use more memory and result in less frequent updates // throughput but will use more memory and result in less frequent updates
// to the transfer progress statistics. Default: 32KB. // to the transfer progress statistics. If a RateLimiter is configured,
// BufferSize should be much lower than the rate limit. Default: 32KB.
BufferSize int BufferSize int
// RateLimiter allows the transfer rate of a download to be limited. The given
// Request.BufferSize determines how frequently the RateLimiter will be
// polled.
RateLimiter RateLimiter
// BeforeCopy is a user provided function that is called immediately before // BeforeCopy is a user provided function that is called immediately before
// a request starts downloading. If BeforeCopy returns an error, the request // a request starts downloading. If BeforeCopy returns an error, the request
// is cancelled and the same error is returned on the Response object. // is cancelled and the same error is returned on the Response object.
......
...@@ -70,8 +70,7 @@ type Response struct { ...@@ -70,8 +70,7 @@ type Response struct {
// writer is the file handle used to write the downloaded file to local // writer is the file handle used to write the downloaded file to local
// storage // storage
writer io.WriteCloser writer io.WriteCloser
writeFlags int
// bytesCompleted specifies the number of bytes which were already // bytesCompleted specifies the number of bytes which were already
// transferred before this transfer began. // transferred before this transfer began.
......
...@@ -9,14 +9,16 @@ import ( ...@@ -9,14 +9,16 @@ import (
type transfer struct { type transfer struct {
n int64 // must be 64bit aligned on 386 n int64 // must be 64bit aligned on 386
ctx context.Context ctx context.Context
lim RateLimiter
w io.Writer w io.Writer
r io.Reader r io.Reader
b []byte b []byte
} }
func newTransfer(ctx context.Context, dst io.Writer, src io.Reader, buf []byte) *transfer { func newTransfer(ctx context.Context, lim RateLimiter, dst io.Writer, src io.Reader, buf []byte) *transfer {
return &transfer{ return &transfer{
ctx: ctx, ctx: ctx,
lim: lim,
w: dst, w: dst,
r: src, r: src,
b: buf, b: buf,
...@@ -37,6 +39,12 @@ func (c *transfer) copy() (written int64, err error) { ...@@ -37,6 +39,12 @@ func (c *transfer) copy() (written int64, err error) {
default: default:
// keep working // keep working
} }
if c.lim != nil {
err = c.lim.WaitN(c.ctx, len(c.b))
if err != nil {
return
}
}
nr, er := c.r.Read(c.b) nr, er := c.r.Read(c.b)
if nr > 0 { if nr > 0 {
nw, ew := c.w.Write(c.b[0:nr]) nw, ew := c.w.Write(c.b[0:nr])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment