Skip to content

Commit 5d50637

Browse files
authored
Merge pull request #156 from Pix4D/PCI-2125_non_naive_s3_resource_check
Non naive s3 resource check
2 parents 80c54bd + 6faa4ee commit 5d50637

File tree

8 files changed

+444
-149
lines changed

8 files changed

+444
-149
lines changed

README.md

+13-4
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,11 @@ version numbers.
5656

5757
One of the following two options must be specified:
5858

59-
* `regexp`: *Optional.* The pattern to match filenames against within S3. The first
60-
grouped match is used to extract the version, or if a group is explicitly
61-
named `version`, that group is used. At least one capture group must be
62-
specified, with parentheses.
59+
* `regexp`: *Optional.* The forward-slash (`/`) delimited sequence of patterns to
60+
match against the sub-directories and filenames of the objects stored within
61+
the S3 bucket. The first grouped match is used to extract the version, or if
62+
a group is explicitly named `version`, that group is used. At least one
63+
capture group must be specified, with parentheses.
6364

6465
The version extracted from this pattern is used to version the resource.
6566
Semantic versions, or just numbers, are supported. Accordingly, full regular
@@ -255,6 +256,14 @@ docker build . -t s3-resource --target tests -f dockerfiles/ubuntu/Dockerfile \
255256
--build-arg S3_ENDPOINT="https://s3.amazonaws.com"
256257
```
257258

259+
##### Speeding up integration tests by skipping large file upload
260+
261+
One of the integration tests uploads a large file (>40GB) and so can be slow.
262+
It can be skipped by adding the following option when running the tests:
263+
```
264+
--build-arg S3_TESTING_NO_LARGE_UPLOAD=true
265+
```
266+
258267
##### Integration tests using role assumption
259268

260269
If `S3_TESTING_AWS_ROLE_ARN` is set to a role ARN, this role will be assumed for accessing

check/command_test.go

+29-6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
. "github.com/onsi/ginkgo"
88
. "github.com/onsi/gomega"
99

10-
"github.com/concourse/s3-resource"
10+
s3resource "github.com/concourse/s3-resource"
1111
"github.com/concourse/s3-resource/fakes"
1212

1313
. "github.com/concourse/s3-resource/check"
@@ -37,11 +37,24 @@ var _ = Describe("Check Command", func() {
3737
s3client = &fakes.FakeS3Client{}
3838
command = NewCommand(s3client)
3939

40-
s3client.BucketFilesReturns([]string{
41-
"files/abc-0.0.1.tgz",
42-
"files/abc-2.33.333.tgz",
43-
"files/abc-2.4.3.tgz",
44-
"files/abc-3.53.tgz",
40+
s3client.ChunkedBucketListReturnsOnCall(0, s3resource.BucketListChunk{
41+
Truncated: false,
42+
ContinuationToken: nil,
43+
CommonPrefixes: []string{"files/abc-3/"},
44+
Paths: []string{
45+
"files/abc-0.0.1.tgz",
46+
"files/abc-2.33.333.tgz",
47+
"files/abc-2.4.3.tgz",
48+
"files/abc-3.53.tgz",
49+
},
50+
}, nil)
51+
s3client.ChunkedBucketListReturnsOnCall(1, s3resource.BucketListChunk{
52+
Truncated: false,
53+
ContinuationToken: nil,
54+
Paths: []string{
55+
"files/abc-3/53.tgz",
56+
"files/abc-3/no-magic",
57+
},
4558
}, nil)
4659
})
4760

@@ -123,6 +136,16 @@ var _ = Describe("Check Command", func() {
123136
Expect(response).To(ConsistOf(s3resource.Version{Path: "files/abc-2.33.333.tgz"}))
124137
})
125138
})
139+
140+
Context("when the regexp does not contain any magic regexp char", func() {
141+
It("does not explode", func() {
142+
request.Source.Regexp = "files/abc-3/no-magic"
143+
response, err := command.Run(request)
144+
Ω(err).ShouldNot(HaveOccurred())
145+
146+
Ω(response).Should(HaveLen(0))
147+
})
148+
})
126149
})
127150

128151
Context("when there is a previous version", func() {

dockerfiles/alpine/Dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ ARG S3_TESTING_AWS_ROLE_ARN
2727
ARG S3_VERSIONED_TESTING_BUCKET
2828
ARG S3_TESTING_BUCKET
2929
ARG S3_TESTING_REGION
30+
ARG S3_TESTING_NO_LARGE_UPLOAD
3031
ARG S3_ENDPOINT
3132
ARG TEST_SESSION_TOKEN
3233
COPY --from=builder /tests /go-tests

fakes/fake_s3client.go

+83
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,21 @@ type FakeS3Client struct {
3636
result1 []string
3737
result2 error
3838
}
39+
ChunkedBucketListStub func(string, string, *string) (s3resource.BucketListChunk, error)
40+
chunkedBucketListMutex sync.RWMutex
41+
chunkedBucketListArgsForCall []struct {
42+
arg1 string
43+
arg2 string
44+
arg3 *string
45+
}
46+
chunkedBucketListReturns struct {
47+
result1 s3resource.BucketListChunk
48+
result2 error
49+
}
50+
chunkedBucketListReturnsOnCall map[int]struct {
51+
result1 s3resource.BucketListChunk
52+
result2 error
53+
}
3954
DeleteFileStub func(string, string) error
4055
deleteFileMutex sync.RWMutex
4156
deleteFileArgsForCall []struct {
@@ -265,6 +280,72 @@ func (fake *FakeS3Client) BucketFilesReturnsOnCall(i int, result1 []string, resu
265280
}{result1, result2}
266281
}
267282

283+
func (fake *FakeS3Client) ChunkedBucketList(arg1 string, arg2 string, arg3 *string) (s3resource.BucketListChunk, error) {
284+
fake.chunkedBucketListMutex.Lock()
285+
ret, specificReturn := fake.chunkedBucketListReturnsOnCall[len(fake.chunkedBucketListArgsForCall)]
286+
fake.chunkedBucketListArgsForCall = append(fake.chunkedBucketListArgsForCall, struct {
287+
arg1 string
288+
arg2 string
289+
arg3 *string
290+
}{arg1, arg2, arg3})
291+
stub := fake.ChunkedBucketListStub
292+
fakeReturns := fake.chunkedBucketListReturns
293+
fake.recordInvocation("ChunkedBucketList", []interface{}{arg1, arg2, arg3})
294+
fake.chunkedBucketListMutex.Unlock()
295+
if stub != nil {
296+
return stub(arg1, arg2, arg3)
297+
}
298+
if specificReturn {
299+
return ret.result1, ret.result2
300+
}
301+
return fakeReturns.result1, fakeReturns.result2
302+
}
303+
304+
func (fake *FakeS3Client) ChunkedBucketListCallCount() int {
305+
fake.chunkedBucketListMutex.RLock()
306+
defer fake.chunkedBucketListMutex.RUnlock()
307+
return len(fake.chunkedBucketListArgsForCall)
308+
}
309+
310+
func (fake *FakeS3Client) ChunkedBucketListCalls(stub func(string, string, *string) (s3resource.BucketListChunk, error)) {
311+
fake.chunkedBucketListMutex.Lock()
312+
defer fake.chunkedBucketListMutex.Unlock()
313+
fake.ChunkedBucketListStub = stub
314+
}
315+
316+
func (fake *FakeS3Client) ChunkedBucketListArgsForCall(i int) (string, string, *string) {
317+
fake.chunkedBucketListMutex.RLock()
318+
defer fake.chunkedBucketListMutex.RUnlock()
319+
argsForCall := fake.chunkedBucketListArgsForCall[i]
320+
return argsForCall.arg1, argsForCall.arg2, argsForCall.arg3
321+
}
322+
323+
func (fake *FakeS3Client) ChunkedBucketListReturns(result1 s3resource.BucketListChunk, result2 error) {
324+
fake.chunkedBucketListMutex.Lock()
325+
defer fake.chunkedBucketListMutex.Unlock()
326+
fake.ChunkedBucketListStub = nil
327+
fake.chunkedBucketListReturns = struct {
328+
result1 s3resource.BucketListChunk
329+
result2 error
330+
}{result1, result2}
331+
}
332+
333+
func (fake *FakeS3Client) ChunkedBucketListReturnsOnCall(i int, result1 s3resource.BucketListChunk, result2 error) {
334+
fake.chunkedBucketListMutex.Lock()
335+
defer fake.chunkedBucketListMutex.Unlock()
336+
fake.ChunkedBucketListStub = nil
337+
if fake.chunkedBucketListReturnsOnCall == nil {
338+
fake.chunkedBucketListReturnsOnCall = make(map[int]struct {
339+
result1 s3resource.BucketListChunk
340+
result2 error
341+
})
342+
}
343+
fake.chunkedBucketListReturnsOnCall[i] = struct {
344+
result1 s3resource.BucketListChunk
345+
result2 error
346+
}{result1, result2}
347+
}
348+
268349
func (fake *FakeS3Client) DeleteFile(arg1 string, arg2 string) error {
269350
fake.deleteFileMutex.Lock()
270351
ret, specificReturn := fake.deleteFileReturnsOnCall[len(fake.deleteFileArgsForCall)]
@@ -713,6 +794,8 @@ func (fake *FakeS3Client) Invocations() map[string][][]interface{} {
713794
defer fake.bucketFileVersionsMutex.RUnlock()
714795
fake.bucketFilesMutex.RLock()
715796
defer fake.bucketFilesMutex.RUnlock()
797+
fake.chunkedBucketListMutex.RLock()
798+
defer fake.chunkedBucketListMutex.RUnlock()
716799
fake.deleteFileMutex.RLock()
717800
defer fake.deleteFileMutex.RUnlock()
718801
fake.deleteVersionedFileMutex.RLock()

integration/out_test.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ import (
1111
"github.com/aws/aws-sdk-go/aws"
1212
"github.com/aws/aws-sdk-go/service/s3"
1313
"github.com/aws/aws-sdk-go/service/s3/s3manager"
14-
"github.com/concourse/s3-resource"
14+
s3resource "github.com/concourse/s3-resource"
1515
"github.com/concourse/s3-resource/out"
1616
. "github.com/onsi/ginkgo"
1717
. "github.com/onsi/gomega"
1818
"github.com/onsi/gomega/gbytes"
1919
"github.com/onsi/gomega/gexec"
2020

21-
"github.com/nu7hatch/gouuid"
21+
uuid "github.com/nu7hatch/gouuid"
2222
)
2323

2424
var _ = Describe("out", func() {
@@ -300,6 +300,10 @@ var _ = Describe("out", func() {
300300

301301
Context("with a large file that is multiple of MaxUploadParts", func() {
302302
BeforeEach(func() {
303+
if os.Getenv("S3_TESTING_NO_LARGE_UPLOAD") != "" {
304+
Skip("'S3_TESTING_NO_LARGE_UPLOAD' is set, skipping.")
305+
}
306+
303307
path := filepath.Join(sourceDir, "large-file-to-upload")
304308

305309
// touch the file

s3client.go

+63-57
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ import (
55
"encoding/json"
66
"errors"
77
"fmt"
8-
"github.com/aws/aws-sdk-go/aws/credentials/stscreds"
98
"io"
109
"io/ioutil"
1110
"os"
1211
"strings"
1312
"time"
1413

14+
"github.com/aws/aws-sdk-go/aws/credentials/stscreds"
15+
1516
"net/http"
1617

1718
"github.com/aws/aws-sdk-go/aws"
@@ -28,6 +29,8 @@ type S3Client interface {
2829
BucketFiles(bucketName string, prefixHint string) ([]string, error)
2930
BucketFileVersions(bucketName string, remotePath string) ([]string, error)
3031

32+
ChunkedBucketList(bucketName string, prefix string, continuationToken *string) (BucketListChunk, error)
33+
3134
UploadFile(bucketName string, remotePath string, localPath string, options UploadFileOptions) (string, error)
3235
DownloadFile(bucketName string, remotePath string, versionID string, localPath string) error
3336

@@ -149,17 +152,24 @@ func NewAwsConfig(
149152
return awsConfig
150153
}
151154

152-
func (client *s3client) BucketFiles(bucketName string, prefixHint string) ([]string, error) {
153-
entries, err := client.getBucketContents(bucketName, prefixHint)
154-
155-
if err != nil {
156-
return []string{}, err
157-
}
158-
159-
paths := make([]string, 0, len(entries))
160-
161-
for _, entry := range entries {
162-
paths = append(paths, *entry.Key)
155+
// BucketFiles returns all the files in bucketName immediately under directoryPrefix
156+
func (client *s3client) BucketFiles(bucketName string, directoryPrefix string) ([]string, error) {
157+
if !strings.HasSuffix(directoryPrefix, "/") {
158+
directoryPrefix = directoryPrefix + "/"
159+
}
160+
var (
161+
continuationToken *string
162+
truncated bool
163+
paths []string
164+
)
165+
for continuationToken, truncated = nil, true; truncated; {
166+
s3ListChunk, err := client.ChunkedBucketList(bucketName, directoryPrefix, continuationToken)
167+
if err != nil {
168+
return []string{}, err
169+
}
170+
truncated = s3ListChunk.Truncated
171+
continuationToken = s3ListChunk.ContinuationToken
172+
paths = append(paths, s3ListChunk.Paths...)
163173
}
164174
return paths, nil
165175
}
@@ -189,6 +199,47 @@ func (client *s3client) BucketFileVersions(bucketName string, remotePath string)
189199
return versions, nil
190200
}
191201

202+
type BucketListChunk struct {
203+
Truncated bool
204+
ContinuationToken *string
205+
CommonPrefixes []string
206+
Paths []string
207+
}
208+
209+
// ChunkedBucketList lists the S3 bucket `bucketName` content's under `prefix` one chunk at a time
210+
//
211+
// The returned `BucketListChunk` contains part of the files and subdirectories
212+
// present in `bucketName` under `prefix`. The files are listed in `Paths` and
213+
// the subdirectories in `CommonPrefixes`. If the returned chunk does not
214+
// include all the files and subdirectories, the `Truncated` flag will be set
215+
// to `true` and the `ContinuationToken` can be used to retrieve the next chunk.
216+
func (client *s3client) ChunkedBucketList(bucketName string, prefix string, continuationToken *string) (BucketListChunk, error) {
217+
params := &s3.ListObjectsV2Input{
218+
Bucket: aws.String(bucketName),
219+
ContinuationToken: continuationToken,
220+
Delimiter: aws.String("/"),
221+
Prefix: aws.String(prefix),
222+
}
223+
response, err := client.client.ListObjectsV2(params)
224+
if err != nil {
225+
return BucketListChunk{}, err
226+
}
227+
commonPrefixes := make([]string, 0, len(response.CommonPrefixes))
228+
paths := make([]string, 0, len(response.Contents))
229+
for _, commonPrefix := range response.CommonPrefixes {
230+
commonPrefixes = append(commonPrefixes, *commonPrefix.Prefix)
231+
}
232+
for _, path := range response.Contents {
233+
paths = append(paths, *path.Key)
234+
}
235+
return BucketListChunk{
236+
Truncated: *response.IsTruncated,
237+
ContinuationToken: response.NextContinuationToken,
238+
CommonPrefixes: commonPrefixes,
239+
Paths: paths,
240+
}, nil
241+
}
242+
192243
func (client *s3client) UploadFile(bucketName string, remotePath string, localPath string, options UploadFileOptions) (string, error) {
193244
uploader := s3manager.NewUploaderWithClient(client.client)
194245

@@ -398,51 +449,6 @@ func (client *s3client) DeleteFile(bucketName string, remotePath string) error {
398449
return err
399450
}
400451

401-
func (client *s3client) getBucketContents(bucketName string, prefix string) (map[string]*s3.Object, error) {
402-
bucketContents := map[string]*s3.Object{}
403-
marker := ""
404-
405-
for {
406-
listObjectsResponse, err := client.client.ListObjects(&s3.ListObjectsInput{
407-
Bucket: aws.String(bucketName),
408-
Prefix: aws.String(prefix),
409-
Marker: aws.String(marker),
410-
})
411-
412-
if err != nil {
413-
return bucketContents, err
414-
}
415-
416-
lastKey := ""
417-
418-
for _, key := range listObjectsResponse.Contents {
419-
bucketContents[*key.Key] = key
420-
421-
lastKey = *key.Key
422-
}
423-
424-
if *listObjectsResponse.IsTruncated {
425-
prevMarker := marker
426-
if listObjectsResponse.NextMarker == nil {
427-
// From the s3 docs: If response does not include the
428-
// NextMarker and it is truncated, you can use the value of the
429-
// last Key in the response as the marker in the subsequent
430-
// request to get the next set of object keys.
431-
marker = lastKey
432-
} else {
433-
marker = *listObjectsResponse.NextMarker
434-
}
435-
if marker == prevMarker {
436-
return nil, errors.New("Unable to list all bucket objects; perhaps this is a CloudFront S3 bucket that needs its `Query String Forwarding and Caching` set to `Forward all, cache based on all`?")
437-
}
438-
} else {
439-
break
440-
}
441-
}
442-
443-
return bucketContents, nil
444-
}
445-
446452
func (client *s3client) getBucketVersioning(bucketName string) (bool, error) {
447453
params := &s3.GetBucketVersioningInput{
448454
Bucket: aws.String(bucketName),

0 commit comments

Comments
 (0)