diff --git a/artifact/image/layerscanning/image/image.go b/artifact/image/layerscanning/image/image.go index fd8bf887..1cae3a8b 100644 --- a/artifact/image/layerscanning/image/image.go +++ b/artifact/image/layerscanning/image/image.go @@ -33,6 +33,7 @@ import ( "github.com/google/go-containerregistry/pkg/v1/tarball" scalibrImage "github.com/google/osv-scalibr/artifact/image" "github.com/google/osv-scalibr/artifact/image/pathtree" + "github.com/google/osv-scalibr/artifact/image/require" "github.com/google/osv-scalibr/artifact/image/symlink" "github.com/google/osv-scalibr/artifact/image/whiteout" "github.com/google/osv-scalibr/log" @@ -50,6 +51,8 @@ var ( ErrFileReadLimitExceeded = errors.New("file exceeds read limit") // ErrSymlinkPointsOutsideRoot is returned when a symlink points outside the root. ErrSymlinkPointsOutsideRoot = errors.New("symlink points outside the root") + // ErrInvalidConfig is returned when the image config is invalid. + ErrInvalidConfig = errors.New("invalid image config") ) // ======================================================== @@ -59,20 +62,32 @@ var ( // Config contains the configuration to load an Image. type Config struct { MaxFileBytes int64 + Requirer require.FileRequirer } // DefaultConfig returns the default configuration to load an Image. func DefaultConfig() *Config { return &Config{ MaxFileBytes: DefaultMaxFileBytes, + Requirer: &require.FileRequirerAll{}, } } +func validateConfig(config *Config) error { + if config.MaxFileBytes <= 0 { + return fmt.Errorf("%w: max file bytes must be positive: %d", ErrInvalidConfig, config.MaxFileBytes) + } + if config.Requirer == nil { + return fmt.Errorf("%w: requirer must be specified", ErrInvalidConfig) + } + return nil +} + // Image is a container image. It is composed of a set of layers that can be scanned for software // inventory. It contains the proper metadata to attribute inventory to layers. type Image struct { chainLayers []*chainLayer - maxFileBytes int64 + config *Config ExtractDir string BaseImageIndex int } @@ -113,11 +128,16 @@ func FromTarball(tarPath string, config *Config) (*Image, error) { // FromV1Image takes a v1.Image and produces a layer-scannable Image. The steps taken are as // follows: // -// (1) Retrieves v1.Layers, configFile. Creates tempPath to store the image files. -// (2) Initializes the output image and the chain layers. -// (3) Unpacks the layers by looping through the layers in reverse, while filling in the files +// (1) Validates the user input image config object. +// (2) Retrieves v1.Layers, configFile. Creates tempPath to store the image files. +// (3) Initializes the output image and the chain layers. +// (4) Unpacks the layers by looping through the layers in reverse, while filling in the files // into the appropriate chain layer. func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) { + if err := validateConfig(config); err != nil { + return nil, fmt.Errorf("invalid image config: %w", err) + } + configFile, err := v1Image.ConfigFile() if err != nil { return nil, fmt.Errorf("failed to load config file: %w", err) @@ -145,9 +165,9 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) { outputImage := Image{ chainLayers: chainLayers, + config: config, ExtractDir: tempPath, BaseImageIndex: baseImageIndex, - maxFileBytes: config.MaxFileBytes, } // Add the root directory to each chain layer. If this is not done, then the virtual paths won't @@ -277,9 +297,9 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay if err != nil { return fmt.Errorf("could not read tar: %w", err) } - // Some tools prepend everything with "./", so if we don't Clean the - // name, we may have duplicate entries, which angers tar-split. - // Using path instead of filepath to keep `/` and deterministic behavior + // Some tools prepend everything with "./", so if we don't path.Clean the name, we may have + // duplicate entries, which angers tar-split. Using path instead of filepath to keep `/` and + // deterministic behavior. cleanedFilePath := path.Clean(filepath.ToSlash(header.Name)) // Prevent "Zip Slip" @@ -287,8 +307,8 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay continue } - // Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR - // and to not depend on internal tar package guess which prefers USTAR over PAX. + // Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR and + // to not depend on internal tar package guess which prefers USTAR over PAX. header.Format = tar.FormatPAX // There is a difference between the filepath and path modules. The filepath module will handle @@ -305,10 +325,10 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay continue } - tombstone := strings.HasPrefix(basename, whiteout.WhiteoutPrefix) + isWhiteout := whiteout.IsWhiteout(basename) // TODO: b/379094217 - Handle Opaque Whiteouts - if tombstone { - basename = basename[len(whiteout.WhiteoutPrefix):] + if isWhiteout { + basename = whiteout.Reverse(basename) } // If we're checking a directory, don't filepath.Join names. @@ -320,19 +340,22 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay } // realFilePath is where the file will be written to disk. filepath.Clean first to convert - // to OS specific file path. - // TODO: b/377553499 - Escape invalid characters on windows that's valid on linux - // realFilePath := filepath.Join(dirPath, filepath.Clean(cleanedFilePath)) + // to OS specific file path.s realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath)) + // If the file is not required, then skip it. + if !img.config.Requirer.FileRequired(virtualPath, header.FileInfo()) { + continue + } + var newNode *fileNode switch header.Typeflag { case tar.TypeDir: - newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) + newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout) case tar.TypeReg: - newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) + newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout) case tar.TypeSymlink, tar.TypeLink: - newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) + newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout) default: log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name) continue @@ -406,8 +429,8 @@ func (img *Image) handleFile(realFilePath, virtualPath, originLayerID string, ta } defer f.Close() - numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.maxFileBytes)) - if numBytes >= img.maxFileBytes || errors.Is(err, io.EOF) { + numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.config.MaxFileBytes)) + if numBytes >= img.config.MaxFileBytes || errors.Is(err, io.EOF) { return nil, ErrFileReadLimitExceeded } diff --git a/artifact/image/layerscanning/image/image_test.go b/artifact/image/layerscanning/image/image_test.go index 44d47070..9cdbf951 100644 --- a/artifact/image/layerscanning/image/image_test.go +++ b/artifact/image/layerscanning/image/image_test.go @@ -27,6 +27,7 @@ import ( v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/types" "github.com/google/osv-scalibr/artifact/image" + "github.com/google/osv-scalibr/artifact/image/require" ) const testdataDir = "testdata" @@ -132,6 +133,23 @@ func TestFromTarball(t *testing.T) { wantErrDuringImageCreation error wantErrWhileReadingFiles error }{ + { + name: "invalid config - non positive maxFileBytes", + tarPath: filepath.Join(testdataDir, "single-file.tar"), + config: &Config{ + Requirer: &require.FileRequirerAll{}, + MaxFileBytes: 0, + }, + wantErrDuringImageCreation: ErrInvalidConfig, + }, + { + name: "invalid config - missing requirer", + tarPath: filepath.Join(testdataDir, "single-file.tar"), + config: &Config{ + MaxFileBytes: DefaultMaxFileBytes, + }, + wantErrDuringImageCreation: ErrInvalidConfig, + }, { name: "image with one file", tarPath: filepath.Join(testdataDir, "single-file.tar"), @@ -294,6 +312,7 @@ func TestFromTarball(t *testing.T) { tarPath: filepath.Join(testdataDir, "single-file.tar"), config: &Config{ MaxFileBytes: 1, + Requirer: &require.FileRequirerAll{}, }, wantErrDuringImageCreation: ErrFileReadLimitExceeded, }, @@ -415,12 +434,39 @@ func TestFromTarball(t *testing.T) { config: DefaultConfig(), wantErrDuringImageCreation: ErrSymlinkPointsOutsideRoot, }, + { + name: "require single file from images", + tarPath: filepath.Join(testdataDir, "multiple-files.tar"), + config: &Config{ + MaxFileBytes: DefaultMaxFileBytes, + // Only require foo.txt. + Requirer: require.NewFileRequirerPaths([]string{"/foo.txt"}), + }, + wantChainLayerEntries: []chainLayerEntries{ + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "foo.txt", + content: "foo\n", + }, + }, + }, + { + // dir1/bar.txt and dir1/baz.txt are ignored in the second layer. + filepathContentPairs: []filepathContentPair{ + { + filepath: "foo.txt", + content: "foo\n", + }, + }, + }, + }, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { gotImage, gotErr := FromTarball(tc.tarPath, tc.config) - defer gotImage.CleanUp() if tc.wantErrDuringImageCreation != nil { if errors.Is(gotErr, tc.wantErrDuringImageCreation) { @@ -432,6 +478,7 @@ func TestFromTarball(t *testing.T) { if gotErr != nil { t.Fatalf("FromTarball(%v) returned unexpected error: %v", tc.tarPath, gotErr) } + defer gotImage.CleanUp() chainLayers, err := gotImage.ChainLayers() if err != nil { diff --git a/artifact/image/require/require.go b/artifact/image/require/require.go index f962c766..67d3601b 100644 --- a/artifact/image/require/require.go +++ b/artifact/image/require/require.go @@ -16,7 +16,9 @@ // in during a container image extraction. package require -import "io/fs" +import ( + "io/fs" +) // FileRequirer determines if a file is required to unpack. type FileRequirer interface { diff --git a/artifact/image/whiteout/whiteout.go b/artifact/image/whiteout/whiteout.go index dd8c4619..a380f9a0 100644 --- a/artifact/image/whiteout/whiteout.go +++ b/artifact/image/whiteout/whiteout.go @@ -59,3 +59,32 @@ func Files(scalibrfs scalibrfs.FS) (map[string]struct{}, error) { } return whiteouts, nil } + +// IsWhiteout returns true if a path is a whiteout path. +func IsWhiteout(p string) bool { + _, file := filepath.Split(p) + return strings.HasPrefix(file, WhiteoutPrefix) +} + +// Path returns the whiteout version of a path. +func Path(p string) string { + dir, file := filepath.Split(p) + return filepath.Join(dir, fmt.Sprintf("%s%s", WhiteoutPrefix, file)) +} + +// Reverse returns the non whiteout version of a path. +func Reverse(p string) string { + dir, file := filepath.Split(p) + + if strings.HasPrefix(file, WhiteoutPrefix) { + file = strings.TrimPrefix(file, WhiteoutPrefix) + } + + reverse := filepath.Join(dir, file) + + if dir != "" && file == "" { + reverse = fmt.Sprintf("%s/", reverse) + } + + return reverse +}