3535
3636_DEBUG_VERBOSITY = 0
3737
38+ TARFILE_MEMBER_TYPE_TO_STR = {
39+ b"0" : "REGTYPE" ,
40+ b"\0 " : "AREGTYPE" ,
41+ b"1" : "LNKTYPE" ,
42+ b"2" : "SYMTYPE" ,
43+ b"3" : "CHRTYPE" ,
44+ b"4" : "BLKTYPE" ,
45+ b"5" : "DIRTYPE" ,
46+ b"6" : "FIFOTYPE" ,
47+ b"7" : "CONTTYPE" ,
48+ }
49+
3850
3951class TarFileWriter (object ):
4052 """A wrapper to write tar files."""
@@ -103,13 +115,7 @@ def __init__(self,
103115
104116 self .tar = tarfile .open (name = name , mode = mode , fileobj = self .fileobj ,
105117 format = tarfile .GNU_FORMAT )
106- self .members = set ()
107- self .directories = set ()
108- # Preseed the added directory list with things we should not add. If we
109- # some day need to allow '.' or '/' as an explicit member of the archive,
110- # we can adjust that here based on the setting of root_directory.
111- self .directories .add ('/' )
112- self .directories .add ('./' )
118+ self .existing_members = {}
113119 self .create_parents = create_parents
114120 self .allow_dups_from_deps = allow_dups_from_deps
115121
@@ -119,28 +125,43 @@ def __enter__(self):
119125 def __exit__ (self , t , v , traceback ):
120126 self .close ()
121127
122- def _have_added (self , path ):
123- """Have we added this file before."""
124- return (path in self .members ) or (path in self .directories )
128+ def _existing_member_type (self , path ):
129+ """Retrieve an existing tar file member's type if we have added it previously,
130+ return None otherwise."""
131+ # Things we should not add.
132+ # If we some day need to allow '.' or '/' as an explicit member of the archive,
133+ # we can adjust that here based on the setting of root_directory.
134+ if path == '/' or path == './' :
135+ return tarfile .DIRTYPE
136+
137+ normalized_path = path .rstrip ("/" )
138+ return self .existing_members .get (normalized_path , None )
125139
126140 def _addfile (self , info , fileobj = None ):
127141 """Add a file in the tar file if there is no conflict."""
128142 if info .type == tarfile .DIRTYPE :
129- # Enforce the ending / for directories so we correctly deduplicate .
143+ # Enforce the ending / for directories.
130144 if not info .name .endswith ('/' ):
131145 info .name += '/'
132- if not self .allow_dups_from_deps and self ._have_added (info .name ):
146+ existing_member_type = self ._existing_member_type (info .name )
147+ if not self .allow_dups_from_deps and existing_member_type is not None :
133148 # Directories with different contents should get merged without warnings.
134149 # If they have overlapping content, the warning will be on their duplicate *files* instead
135150 if info .type != tarfile .DIRTYPE :
136151 print ('Duplicate file in archive: %s, '
137152 'picking first occurrence' % info .name )
153+ # Directories that shadow
154+ elif existing_member_type != tarfile .DIRTYPE and existing_member_type != tarfile .SYMTYPE :
155+ print ('Directory shadows a member of type %s in archive: %s, '
156+ 'picking first occurrence' % (TARFILE_MEMBER_TYPE_TO_STR .get (
157+ existing_member_type , "UNKNOWN" ), info .name ))
158+
138159 return
139160
140161 self .tar .addfile (info , fileobj )
141- self . members . add ( info . name )
142- if info . type == tarfile . DIRTYPE :
143- self .directories . add ( info .name )
162+ # Strip the trailing slash from the path so that we can detect when, for example, we are
163+ # trying to overwrite a symbolic link with a directory.
164+ self .existing_members [ info .name . rstrip ( "/" )] = info . type
144165
145166 def add_directory_path (self ,
146167 path ,
@@ -182,7 +203,8 @@ def conditionally_add_parents(self, path, uid=0, gid=0, uname='', gname='', mtim
182203 for next_level in dirs [0 :- 1 ]:
183204 parent_path = parent_path + next_level + '/'
184205
185- if self .create_parents and not self ._have_added (parent_path ):
206+ if self .create_parents and self ._existing_member_type (
207+ parent_path ) is None :
186208 self .add_directory_path (
187209 parent_path ,
188210 uid = uid ,
@@ -224,7 +246,8 @@ def add_file(self,
224246 return
225247 if name == '.' :
226248 return
227- if not self .allow_dups_from_deps and name in self .members :
249+ if not self .allow_dups_from_deps and self ._existing_member_type (
250+ name ) is not None :
228251 return
229252
230253 if mtime is None :
0 commit comments