diff --git a/README.md b/README.md
index 9eb1063cf..fb045aa4b 100644
--- a/README.md
+++ b/README.md
@@ -48,11 +48,10 @@ See the full list of available options at
### Remote URL ###
The URL of the original image to load is specified as the remainder of the
-path, without any encoding. For example,
+path and may be URL encoded. For example,
`http://localhost/200/https://willnorris.com/logo.jpg`.
-In order to [optimize caching][], it is recommended that URLs not contain query
-strings.
+If remote image URLs contain query strings, it is recommended to URL-encode them when passing to imageproxy in order to [optimize caching][].
[optimize caching]: http://www.stevesouders.com/blog/2008/08/23/revving-filenames-dont-use-querystring/
@@ -75,6 +74,7 @@ x0.15 | 15% original height, proportional width |
200x,png | 200px wide, converted to PNG format |
cx175,cw400,ch300,100x | crop to 400x300px starting at (175,0), scale to 100px wide |
+x | no options, don't transform, just proxy |
The [smart crop feature](https://godoc.org/willnorris.com/go/imageproxy#hdr-Smart_Crop)
can best be seen by comparing crops of [this source image][judah-sheets], with
diff --git a/data.go b/data.go
index 5df4a6954..954f57587 100644
--- a/data.go
+++ b/data.go
@@ -308,61 +308,106 @@ func (r Request) String() string {
// NewRequest parses an http.Request into an imageproxy Request. Options and
// the remote image URL are specified in the request path, formatted as:
-// /{options}/{remote_url}. Options may be omitted, so a request path may
-// simply contain /{remote_url}. The remote URL must be an absolute "http" or
-// "https" URL, should not be URL encoded, and may contain a query string.
+// /{options}/{remote_url}. Options may not be omitted, but `x` or `0x0` can
+// be used as noop option (/x/{remote_url}).
+// The remote URL must be an absolute "http(s)" URL unless BaseURL is set.
+// The remote URL may be URL encoded.
//
// Assuming an imageproxy server running on localhost, the following are all
// valid imageproxy requests:
//
// http://localhost/100x200/http://example.com/image.jpg
// http://localhost/100x200,r90/http://example.com/image.jpg?foo=bar
-// http://localhost//http://example.com/image.jpg
-// http://localhost/http://example.com/image.jpg
+// http://localhost/x/http://example.com/image.jpg
+// http://localhost/x/http%3A%2F%2Fexample.com%2Fimage.jpg
func NewRequest(r *http.Request, baseURL *url.URL) (*Request, error) {
var err error
req := &Request{Original: r}
-
path := r.URL.EscapedPath()[1:] // strip leading slash
- req.URL, err = parseURL(path)
- if err != nil || !req.URL.IsAbs() {
- // first segment should be options
- parts := strings.SplitN(path, "/", 2)
- if len(parts) != 2 {
- return nil, URLError{"too few path segments", r.URL}
- }
-
- var err error
- req.URL, err = parseURL(parts[1])
- if err != nil {
- return nil, URLError{fmt.Sprintf("unable to parse remote URL: %v", err), r.URL}
- }
+ // first segment should be options
+ parts := strings.SplitN(path, "/", 2)
+ if len(parts) != 2 {
+ return nil, URLError{"too few path segments", r.URL}
+ }
- req.Options = ParseOptions(parts[0])
+ req.URL, err = ParseURL(parts[1], (baseURL == nil)) // don't url decode if baseURL is set
+ if err != nil {
+ return nil, URLError{fmt.Sprintf("unable to parse remote URL: %v", err), r.URL}
}
+ req.Options = ParseOptions(parts[0])
+
+
+
if baseURL != nil {
req.URL = baseURL.ResolveReference(req.URL)
}
if !req.URL.IsAbs() {
- return nil, URLError{"must provide absolute remote URL", r.URL}
+ return nil, URLError{"must provide absolute remote URL", req.URL}
}
if req.URL.Scheme != "http" && req.URL.Scheme != "https" {
return nil, URLError{"remote URL must have http or https scheme", r.URL}
}
- // query string is always part of the remote URL
- req.URL.RawQuery = r.URL.RawQuery
+ // only append original (unencoded) query string
+ // if we don't have one already. Example:
+ // http://localhost/x/https%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld?more=query
+ // should be https://example.com/foo/bar?hello=world
+ // not https://example.com/foo/bar?more=query
+ if len(r.URL.RawQuery) > 0 && len(req.URL.RawQuery) == 0 {
+ // query string is always part of the remote URL
+ req.URL.RawQuery = r.URL.RawQuery
+ }
+
return req, nil
}
var reCleanedURL = regexp.MustCompile(`^(https?):/+([^/])`)
-// parseURL parses s as a URL, handling URLs that have been munged by
+// ParseURL parses s as a URL, handling URLs that have been munged by
// path.Clean or a webserver that collapses multiple slashes.
-func parseURL(s string) (*url.URL, error) {
+func ParseURL(s string, urlDecode bool) (*url.URL, error) {
+ var err error
+
+ // convert http:/example.com to http://example.com
s = reCleanedURL.ReplaceAllString(s, "$1://$2")
+
+ // only decode remote url param if baseURL is not set
+ if urlDecode {
+ s, err = decodeURL(s)
+ if err != nil {
+ return nil, err
+ }
+ }
+
return url.Parse(s)
}
+
+var reAbsURL = regexp.MustCompile(`^https?`)
+var reDecodedURL = regexp.MustCompile(`^https?://`)
+
+func decodeURL(s string) (string, error) {
+ var err error
+ // don't try to decode unless looks like abs url
+ if !reAbsURL.MatchString(s) {
+ return s, nil
+ }
+
+ // preserve original value in case we are not able to decode
+ decodedURL := s
+ maxDecodeAttempts := 3
+ for i := 0; !reDecodedURL.MatchString(decodedURL) && i < maxDecodeAttempts; i++ {
+ decodedURL, err = url.QueryUnescape(decodedURL)
+ if err != nil {
+ return "", URLError{fmt.Sprintf("remote URL could not be decoded: %v", err), nil}
+ }
+ }
+ if reDecodedURL.MatchString(decodedURL) {
+ return decodedURL, nil
+ } else {
+ // return original value. might be relative url (e.g. https/foo.jpg)
+ return s, nil
+ }
+}
diff --git a/data_test.go b/data_test.go
index 80d3c3dd2..0df60d1e4 100644
--- a/data_test.go
+++ b/data_test.go
@@ -113,6 +113,12 @@ func TestNewRequest(t *testing.T) {
{"http://localhost//example.com/foo", "", emptyOptions, true},
{"http://localhost//ftp://example.com/foo", "", emptyOptions, true},
+ // invalid URL because options now required
+ {"http://localhost/http://example.com/foo", "", emptyOptions, true},
+
+ // invalid URL because remote url has invalid url encoding (the %u)
+ {"http://localhost/x/https%253A%252F%252Fexample.com%252F%253FbadParam%253D%25u0414", "", emptyOptions, true},
+
// invalid options. These won't return errors, but will not fully parse the options
{
"http://localhost/s/http://example.com/",
@@ -123,17 +129,23 @@ func TestNewRequest(t *testing.T) {
"http://example.com/", Options{Width: 1}, false,
},
+
// valid URLs
+
{
- "http://localhost/http://example.com/foo",
+ "http://localhost//http://example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{
- "http://localhost//http://example.com/foo",
+ "http://localhost/x/http://example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{
- "http://localhost//https://example.com/foo",
+ "http://localhost/x/http://example.com/foo",
+ "http://example.com/foo", emptyOptions, false,
+ },
+ {
+ "http://localhost/0x0/https://example.com/foo",
"https://example.com/foo", emptyOptions, false,
},
{
@@ -141,21 +153,56 @@ func TestNewRequest(t *testing.T) {
"http://example.com/foo", Options{Width: 1, Height: 2}, false,
},
{
- "http://localhost//http://example.com/foo?bar",
+ "http://localhost/0x0/http://example.com/foo?bar",
"http://example.com/foo?bar", emptyOptions, false,
},
{
- "http://localhost/http:/example.com/foo",
+ "http://localhost/x/http:/example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{
- "http://localhost/http:///example.com/foo",
+ "http://localhost/x/http:///example.com/foo",
"http://example.com/foo", emptyOptions, false,
},
{ // escaped path
- "http://localhost/http://example.com/%2C",
+ "http://localhost/x/http://example.com/%2C",
"http://example.com/%2C", emptyOptions, false,
},
+ // unescaped querystring
+ {
+ "http://localhost/x/http://example.com/foo/bar?hello=world",
+ "http://example.com/foo/bar?hello=world", emptyOptions, false,
+ },
+ // escaped remote including querystring
+ {
+ "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld",
+ "http://example.com/foo/bar?hello=world", emptyOptions, false,
+ },
+ // escaped remote including encoded querystring and unencoded querystring (not retained)
+ {
+ "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld?a=b",
+ "http://example.com/foo/bar?hello=world", emptyOptions, false,
+ },
+ // escaped remote WITHOUT encoded querystring and unencoded querystring (should be retained)
+ {
+ "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar?a=b",
+ "http://example.com/foo/bar?a=b", emptyOptions, false,
+ },
+ {
+ "http://localhost/x/https%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld",
+ "https://example.com/foo/bar?hello=world", emptyOptions, false,
+ },
+ // multi-escaped remote
+ {
+ "http://localhost/x/https%25253A%25252F%25252Fexample.com%25252Ffoo%25252Fbar%25253Fhello%25253Dworld",
+ "https://example.com/foo/bar?hello=world", emptyOptions, false,
+ },
+ // escaped remote containing double escaped url as param
+ // test that we don't over-decode remote url breaking parameters
+ {
+ "http://localhost/x/http%3A%2F%2Fexample.com%2Ffoo%2Fbar%3Fhello%3Dworld%26url%3Dhttps%253A%252F%252Fwww.example.com%252F%253Ffoo%253Dbar%2526hello%253Dworld",
+ "http://example.com/foo/bar?hello=world&url=https%3A%2F%2Fwww.example.com%2F%3Ffoo%3Dbar%26hello%3Dworld", emptyOptions, false,
+ },
}
for _, tt := range tests {
@@ -186,16 +233,124 @@ func TestNewRequest(t *testing.T) {
}
func TestNewRequest_BaseURL(t *testing.T) {
- req, _ := http.NewRequest("GET", "/x/path", nil)
- base, _ := url.Parse("https://example.com/")
+ tests := []struct {
+ BaseURL string // base url to use
+ URL string // input URL to parse as an imageproxy request
+ RemoteURL string // expected URL of remote image parsed from input
+ Options Options // expected options parsed from input
+ ExpectError bool // whether an error is expected from NewRequest
+ }{
+ {
+ "http://example.com/",
+ "http://localhost/x/foo",
+ "http://example.com/foo", emptyOptions, false,
+ },
+ {
+ "http://example.com/hello",
+ "http://localhost/x//foo/bar",
+ "http://example.com/foo/bar", emptyOptions, false,
+ },
+ // if BaseURL doesn't have trailing slash
+ // URL.ResolveReference will strip last directory
+ {
+ "http://example.com/hello/",
+ "http://localhost/x/foo/bar",
+ "http://example.com/hello/foo/bar", emptyOptions, false,
+ },
+ {
+ "http://example.com/hello/",
+ "http://localhost/x/../foo/bar",
+ "http://example.com/foo/bar", emptyOptions, false,
+ },
+ // relative remote urls (baseURL set) should not have URL Decoding even if
+ // they start with http
+ {
+ "http://example.com/hello/",
+ "http://localhost/x/https%3A%2F%2Fimg.example.com%2Fpic.jpg",
+ "http://example.com/hello/https%3A%2F%2Fimg.example.com%2Fpic.jpg", emptyOptions, false,
+ },
+ }
- r, err := NewRequest(req, base)
- if err != nil {
- t.Errorf("NewRequest(%v, %v) returned unexpected error: %v", req, base, err)
+ for _, tt := range tests {
+ req, err := http.NewRequest("GET", tt.URL, nil)
+ if err != nil {
+ t.Errorf("http.NewRequest(%q) returned error: %v", tt.URL, err)
+ continue
+ }
+ base, err := url.Parse(tt.BaseURL)
+ if err != nil {
+ t.Errorf("url.Parse(%q) returned error: %v", tt.BaseURL, err)
+ continue
+ }
+
+ r, err := NewRequest(req, base)
+ if tt.ExpectError {
+ if err == nil {
+ t.Errorf("NewRequest(%v, %v) did not return expected error", req, base)
+ }
+ continue
+ } else if err != nil {
+ t.Errorf("NewRequest(%v, %v) returned unexpected error: %v", req, base, err)
+ continue
+ }
+
+ if got, want := r.URL.String(), tt.RemoteURL; got != want {
+ t.Errorf("NewRequest(%q, %q) request URL = %v, want %v", tt.URL, tt.BaseURL, got, want)
+ }
+ if got, want := r.Options, tt.Options; got != want {
+ t.Errorf("NewRequest(%q, %q) request options = %v, want %v", tt.URL, tt.BaseURL, got, want)
+ }
}
+}
+
+
- want := "https://example.com/path#0x0"
- if got := r.String(); got != want {
- t.Errorf("NewRequest(%v, %v) returned %q, want %q", req, base, got, want)
+func TestParseURL(t *testing.T) {
+ tests := []struct {
+ URL string // input URL to parse as an imageproxy request
+ URLDecode bool // expected options parsed from input
+ URLExpected string // expected URL of remote image parsed from input
+ ExpectError bool // whether an error is expected from NewRequest
+ }{
+ // should fix missing slashes
+ {"http:/example.com/", true,
+ "http://example.com/", false},
+ {"https:/example.com/", true,
+ "https://example.com/", false},
+
+ // should decode when told
+ {"https%253A%252F%252Fexample.com%252Fimg.jpg%253Ffoo%253Dbar%2526bar%253Ddo%25252Fnot%25252Fdecode%25252Fme", true,
+ "https://example.com/img.jpg?foo=bar&bar=do%2Fnot%2Fdecode%2Fme", false},
+
+ // should NOT decode unless told (e.g. baseURL set)
+ {"https%3A%2F%2Fexample.com%2Fimg.jpg%3Ffoo%3Dbar%26bar%3Ddo%252Fnot%252Fdecode%252Fme", false,
+ "https%3A%2F%2Fexample.com%2Fimg.jpg%3Ffoo%3Dbar%26bar%3Ddo%252Fnot%252Fdecode%252Fme", false},
+
+ // should return original url if asked to decode but can't find anything that
+ // looks like absolute url (starts with https?://) before giving up
+ {"https%3Aexample.com%2Fimg.jpg", true,
+ "https%3Aexample.com%2Fimg.jpg", false},
+
+ // should error when asked to decode a url with invalid encoding
+ {"https%253A%252F%252Fexample.com%252F%253FbadParam%253D%25u0414", true,
+ "", true},
+ }
+
+ for _, tt := range tests {
+
+ URLOut, err := ParseURL(tt.URL, tt.URLDecode)
+ if tt.ExpectError {
+ if err == nil {
+ t.Errorf("ParseURL(%q, decode=%v) did not return expected error", tt.URL, tt.URLDecode)
+ }
+ continue
+ } else if err != nil {
+ t.Errorf("ParseURL(%v, decode=%v) return unexpected error: %v", tt.URL, tt.URLDecode, err)
+ continue
+ }
+
+ if got, want := URLOut.String(), tt.URLExpected; got != want {
+ t.Errorf("ParseURL(%q, decode=%v) returned = %v, wanted %v", tt.URL, tt.URLDecode, got, want)
+ }
}
}
diff --git a/imageproxy_test.go b/imageproxy_test.go
index 86bd6a6d3..280f716c4 100644
--- a/imageproxy_test.go
+++ b/imageproxy_test.go
@@ -382,10 +382,10 @@ func TestProxy_ServeHTTP(t *testing.T) {
code int // expected response status code
}{
{"/favicon.ico", http.StatusOK},
- {"//foo", http.StatusBadRequest}, // invalid request URL
- {"/http://bad.test/", http.StatusForbidden}, // Disallowed host
- {"/http://good.test/error", http.StatusInternalServerError}, // HTTP protocol error
- {"/http://good.test/nocontent", http.StatusNoContent}, // non-OK response
+ {"/x/foo", http.StatusBadRequest}, // invalid request URL
+ {"/x/http://bad.test/", http.StatusForbidden}, // Disallowed host
+ {"/x/http://good.test/error", http.StatusInternalServerError}, // HTTP protocol error
+ {"/x/http://good.test/nocontent", http.StatusNoContent}, // non-OK response
{"/100/http://good.test/png", http.StatusOK},
{"/100/http://good.test/plain", http.StatusForbidden}, // non-image response
@@ -413,7 +413,7 @@ func TestProxy_ServeHTTP_is304(t *testing.T) {
},
}
- req, _ := http.NewRequest("GET", "http://localhost/http://good.test/etag", nil)
+ req, _ := http.NewRequest("GET", "http://localhost/x/http://good.test/etag", nil)
req.Header.Add("If-None-Match", `"tag"`)
resp := httptest.NewRecorder()
p.ServeHTTP(resp, req)
@@ -444,7 +444,7 @@ func TestProxy_ServeHTTP_maxRedirects(t *testing.T) {
}
for _, tt := range tests {
- req, _ := http.NewRequest("GET", "http://localhost"+tt.url, nil)
+ req, _ := http.NewRequest("GET", "http://localhost/x"+tt.url, nil)
resp := httptest.NewRecorder()
p.ServeHTTP(resp, req)