@@ -99,6 +99,32 @@ func uidFromLocalPath(localPath string) (string, error) {
9999 return fmt .Sprintf ("\" %s\" " , uid ), nil
100100}
101101
102+ type Puller struct {
103+ RemoteUri string
104+ LocalDir string
105+
106+ workingDir string
107+ exclude []string
108+ workerCnt int
109+ uidCache map [string ]string
110+ uidLock * sync.Mutex
111+ taskQueue chan DownloadTask
112+ errMsgQueue chan string
113+ // Here is how filesToDelete is being used:
114+ //
115+ // 1. before each pull action, we populate filesToDelete with all files
116+ // (without dirs) from local target directory. During this process, we also
117+ // delete local empty directories.
118+ //
119+ // 2. we list S3 bucket, for any file in the bucket, we remove related
120+ // entry from the delete list
121+ //
122+ // 3. at the end of the pull, we delete files from the list
123+ filesToDelete map [string ]bool
124+ fileListedCnt int
125+ filePulledCnt int
126+ }
127+
102128func (self * Puller ) downloadHandler (task DownloadTask , downloader GenericDownloader ) {
103129 l := zap .S ()
104130
@@ -125,9 +151,9 @@ func (self *Puller) downloadHandler(task DownloadTask, downloader GenericDownloa
125151 }
126152
127153 // create file
128- tmpfile , err := ioutil .TempFile (os . TempDir (), "objinsync-download-" )
154+ tmpfile , err := ioutil .TempFile (self . workingDir , filepath . Base ( task . LocalPath ) )
129155 if err != nil {
130- self .errMsgQueue <- fmt .Sprintf ("Failed to create file %s for download: %v" , tmpfile . Name () , err )
156+ self .errMsgQueue <- fmt .Sprintf ("Failed to create file for download: %v" , err )
131157 return
132158 }
133159 defer tmpfile .Close ()
@@ -228,50 +254,41 @@ func (self *Puller) handlePageList(
228254 return true
229255}
230256
231- type Puller struct {
232- exclude []string
233- workerCnt int
234- uidCache map [string ]string
235- uidLock * sync.Mutex
236- taskQueue chan DownloadTask
237- errMsgQueue chan string
238- // Here is how filesToDelete is being used:
239- //
240- // 1. before each pull action, we populate filesToDelete with all files
241- // (without dirs) from local target directory. During this process, we also
242- // delete local empty directories.
243- //
244- // 2. we list S3 bucket, for any file in the bucket, we remove related
245- // entry from the delete list
246- //
247- // 3. at the end of the pull, we delete files from the list
248- filesToDelete map [string ]bool
249- fileListedCnt int
250- filePulledCnt int
251- }
252-
253257func (self * Puller ) AddExcludePatterns (patterns []string ) {
254258 for _ , pattern := range patterns {
255259 self .exclude = append (self .exclude , pattern )
256260 }
257261}
258262
259- func (self * Puller ) Pull (remoteUri string , localDir string ) string {
263+ func (self * Puller ) SetupWorkingDir () error {
264+ // create temporary working directory to hold downloads for atomic rename
265+ // TmpDir won't work because it could be in a different partition, which
266+ // will lead to invalid cross-device link error
267+ if _ , err := os .Stat (self .workingDir ); os .IsNotExist (err ) {
268+ err = os .MkdirAll (self .workingDir , os .ModePerm )
269+ if err != nil {
270+ return err
271+ }
272+ }
273+ return nil
274+ }
275+
276+ func (self * Puller ) Pull () string {
260277 l := zap .S ()
261278
262- filesToDelete , err := listAndPruneDir (localDir , self .exclude )
279+ filesToDelete , err := listAndPruneDir (self . LocalDir , self .exclude )
263280 if err != nil {
264- return fmt .Sprintf ("Failed to list and prune local dir %s: %v" , localDir , err )
281+ return fmt .Sprintf ("Failed to list and prune local dir %s: %v" , self . LocalDir , err )
265282 }
266283 // handlePageList method will remove files existed in remote source from this list
267284 self .filesToDelete = filesToDelete
268285 defer func () {
269286 self .filesToDelete = nil
270287 }()
271288
272- bucket , remoteDirPath , err := parseObjectUri (remoteUri )
289+ bucket , remoteDirPath , err := parseObjectUri (self . RemoteUri )
273290 if err != nil {
274- return fmt .Sprintf ("Invalid remote uri %s: %v" , remoteUri , err )
291+ return fmt .Sprintf ("Invalid remote uri %s: %v" , self . RemoteUri , err )
275292 }
276293
277294 self .taskQueue = make (chan DownloadTask , 30 )
@@ -292,6 +309,11 @@ func (self *Puller) Pull(remoteUri string, localDir string) string {
292309 svc := s3 .New (sess , aws .NewConfig ().WithRegion (region ))
293310 downloader := s3manager .NewDownloaderWithClient (svc )
294311
312+ if err := self .SetupWorkingDir (); err != nil {
313+ return fmt .Sprintf ("Failed to create working directory %s: %v" , self .workingDir , err )
314+ }
315+ defer os .RemoveAll (self .workingDir ) // purge working dir when downlaods are done
316+
295317 // spawn worker goroutines
296318 var wg sync.WaitGroup
297319 for i := 0 ; i < self .workerCnt ; i ++ {
@@ -329,7 +351,7 @@ func (self *Puller) Pull(remoteUri string, localDir string) string {
329351
330352 err = svc .ListObjectsV2Pages (listParams ,
331353 func (page * s3.ListObjectsV2Output , lastPage bool ) bool {
332- return self .handlePageList (page , lastPage , bucket , remoteDirPath , localDir )
354+ return self .handlePageList (page , lastPage , bucket , remoteDirPath , self . LocalDir )
333355 })
334356 close (self .taskQueue )
335357 wg .Wait ()
@@ -339,7 +361,7 @@ func (self *Puller) Pull(remoteUri string, localDir string) string {
339361 metricsFilePulled .Set (float64 (self .filePulledCnt ))
340362
341363 if err != nil {
342- return fmt .Sprintf ("Failed to list remote uri %s: %v" , remoteUri , err )
364+ return fmt .Sprintf ("Failed to list remote uri %s: %v" , self . RemoteUri , err )
343365 } else {
344366 errMsgWg .Wait ()
345367
@@ -354,7 +376,7 @@ func (self *Puller) Pull(remoteUri string, localDir string) string {
354376 }
355377}
356378
357- func (self * Puller ) PopulateChecksum (localDir string ) {
379+ func (self * Puller ) PopulateChecksum () {
358380 l := zap .S ()
359381
360382 setFileChecksum := func (path string ) {
@@ -369,9 +391,9 @@ func (self *Puller) PopulateChecksum(localDir string) {
369391 l .Errorf ("Failed to calculate checksum for file: %s, err: %s" , path , err )
370392 }
371393
372- uidKey , err := uidKeyFromLocalPath (localDir , path )
394+ uidKey , err := uidKeyFromLocalPath (self . LocalDir , path )
373395 if err != nil {
374- l .Errorf ("Failed to calculate uidKey for file: %s under dir: %s, err: %s" , path , localDir , err )
396+ l .Errorf ("Failed to calculate uidKey for file: %s under dir: %s, err: %s" , path , self . LocalDir , err )
375397 return
376398 }
377399
@@ -386,14 +408,14 @@ func (self *Puller) PopulateChecksum(localDir string) {
386408 self .uidLock .Unlock ()
387409 }
388410
389- err := filepath .Walk (localDir , func (path string , info os.FileInfo , err error ) error {
411+ err := filepath .Walk (self . LocalDir , func (path string , info os.FileInfo , err error ) error {
390412 if err != nil {
391413 return err
392414 }
393415
394416 // ignore file that matches exclude rules
395417 shouldSkip := false
396- relPath , err := filepath .Rel (localDir , path )
418+ relPath , err := filepath .Rel (self . LocalDir , path )
397419 if err != nil {
398420 l .Errorf ("Got invalid path from filepath.Walk: %s, err: %s" , path , err )
399421 shouldSkip = true
@@ -424,10 +446,17 @@ func (self *Puller) PopulateChecksum(localDir string) {
424446 }
425447}
426448
427- func NewPuller () * Puller {
428- return & Puller {
429- workerCnt : 5 ,
430- uidCache : map [string ]string {},
431- uidLock : & sync.Mutex {},
449+ func NewPuller (remoteUri string , localDir string ) (* Puller , error ) {
450+ if _ , err := os .Stat (localDir ); os .IsNotExist (err ) {
451+ return nil , fmt .Errorf ("local directory `%s` does not exist: %v" , localDir , err )
432452 }
453+
454+ return & Puller {
455+ RemoteUri : remoteUri ,
456+ LocalDir : localDir ,
457+ workingDir : filepath .Join (localDir , ".objinsync" ),
458+ workerCnt : 5 ,
459+ uidCache : map [string ]string {},
460+ uidLock : & sync.Mutex {},
461+ }, nil
433462}
0 commit comments