Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Layer Scanning] Add FileRequirer to the image Config object to allow users to specify files that should be unpacked during Image object creation. Only regular files are handled by the FileRequirer in this cl. Symlinks will be handled in another cl. #389

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 43 additions & 18 deletions artifact/image/layerscanning/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/google/go-containerregistry/pkg/v1/tarball"
scalibrImage "github.com/google/osv-scalibr/artifact/image"
"github.com/google/osv-scalibr/artifact/image/pathtree"
"github.com/google/osv-scalibr/artifact/image/require"
"github.com/google/osv-scalibr/artifact/image/symlink"
"github.com/google/osv-scalibr/artifact/image/whiteout"
"github.com/google/osv-scalibr/log"
Expand All @@ -50,6 +51,8 @@ var (
ErrFileReadLimitExceeded = errors.New("file exceeds read limit")
// ErrSymlinkPointsOutsideRoot is returned when a symlink points outside the root.
ErrSymlinkPointsOutsideRoot = errors.New("symlink points outside the root")
// ErrInvalidConfig is returned when the image config is invalid.
ErrInvalidConfig = errors.New("invalid image config")
)

// ========================================================
Expand All @@ -59,20 +62,32 @@ var (
// Config contains the configuration to load an Image.
type Config struct {
MaxFileBytes int64
Requirer require.FileRequirer
}

// DefaultConfig returns the default configuration to load an Image.
func DefaultConfig() *Config {
return &Config{
MaxFileBytes: DefaultMaxFileBytes,
Requirer: &require.FileRequirerAll{},
}
}

func validateConfig(config *Config) error {
if config.MaxFileBytes <= 0 {
return fmt.Errorf("%w: max file bytes must be positive: %d", ErrInvalidConfig, config.MaxFileBytes)
}
if config.Requirer == nil {
return fmt.Errorf("%w: requirer must be specified", ErrInvalidConfig)
}
return nil
}

// Image is a container image. It is composed of a set of layers that can be scanned for software
// inventory. It contains the proper metadata to attribute inventory to layers.
type Image struct {
chainLayers []*chainLayer
maxFileBytes int64
config *Config
ExtractDir string
BaseImageIndex int
}
Expand Down Expand Up @@ -113,11 +128,16 @@ func FromTarball(tarPath string, config *Config) (*Image, error) {
// FromV1Image takes a v1.Image and produces a layer-scannable Image. The steps taken are as
// follows:
//
// (1) Retrieves v1.Layers, configFile. Creates tempPath to store the image files.
// (2) Initializes the output image and the chain layers.
// (3) Unpacks the layers by looping through the layers in reverse, while filling in the files
// (1) Validates the user input image config object.
// (2) Retrieves v1.Layers, configFile. Creates tempPath to store the image files.
// (3) Initializes the output image and the chain layers.
// (4) Unpacks the layers by looping through the layers in reverse, while filling in the files
// into the appropriate chain layer.
func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
if err := validateConfig(config); err != nil {
return nil, fmt.Errorf("invalid image config: %w", err)
}

configFile, err := v1Image.ConfigFile()
if err != nil {
return nil, fmt.Errorf("failed to load config file: %w", err)
Expand Down Expand Up @@ -145,9 +165,9 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {

outputImage := Image{
chainLayers: chainLayers,
config: config,
ExtractDir: tempPath,
BaseImageIndex: baseImageIndex,
maxFileBytes: config.MaxFileBytes,
}

// Add the root directory to each chain layer. If this is not done, then the virtual paths won't
Expand Down Expand Up @@ -291,18 +311,18 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
if err != nil {
return fmt.Errorf("could not read tar: %w", err)
}
// Some tools prepend everything with "./", so if we don't Clean the
// name, we may have duplicate entries, which angers tar-split.
// Using path instead of filepath to keep `/` and deterministic behavior
// Some tools prepend everything with "./", so if we don't path.Clean the name, we may have
// duplicate entries, which angers tar-split. Using path instead of filepath to keep `/` and
// deterministic behavior.
cleanedFilePath := path.Clean(filepath.ToSlash(header.Name))

// Prevent "Zip Slip"
if strings.HasPrefix(cleanedFilePath, "../") {
continue
}

// Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR
// and to not depend on internal tar package guess which prefers USTAR over PAX.
// Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR and
// to not depend on internal tar package guess which prefers USTAR over PAX.
header.Format = tar.FormatPAX

// There is a difference between the filepath and path modules. The filepath module will handle
Expand All @@ -325,10 +345,10 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
continue
}

tombstone := strings.HasPrefix(basename, whiteout.WhiteoutPrefix)
isWhiteout := whiteout.IsWhiteout(basename)
// TODO: b/379094217 - Handle Opaque Whiteouts
if tombstone {
basename = basename[len(whiteout.WhiteoutPrefix):]
if isWhiteout {
basename = whiteout.ToPath(basename)
}

// If we're checking a directory, don't filepath.Join names.
Expand All @@ -343,14 +363,19 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
// any forward slashes to the appropriate OS specific path separator.
realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath))

// If the file is not required, then skip it.
if !img.config.Requirer.FileRequired(virtualPath, header.FileInfo()) {
continue
}

var newNode *fileNode
switch header.Typeflag {
case tar.TypeDir:
newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
case tar.TypeReg:
newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
case tar.TypeSymlink, tar.TypeLink:
newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone)
newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
default:
log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name)
continue
Expand Down Expand Up @@ -437,8 +462,8 @@ func (img *Image) handleFile(realFilePath, virtualPath, originLayerID string, ta
}
defer f.Close()

numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.maxFileBytes))
if numBytes >= img.maxFileBytes || errors.Is(err, io.EOF) {
numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.config.MaxFileBytes))
if numBytes >= img.config.MaxFileBytes || errors.Is(err, io.EOF) {
return nil, ErrFileReadLimitExceeded
}

Expand Down
50 changes: 49 additions & 1 deletion artifact/image/layerscanning/image/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
v1 "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/types"
"github.com/google/osv-scalibr/artifact/image"
"github.com/google/osv-scalibr/artifact/image/require"
)

const testdataDir = "testdata"
Expand Down Expand Up @@ -132,6 +133,23 @@ func TestFromTarball(t *testing.T) {
wantErrDuringImageCreation error
wantErrWhileReadingFiles error
}{
{
name: "invalid config - non positive maxFileBytes",
tarPath: filepath.Join(testdataDir, "single-file.tar"),
config: &Config{
Requirer: &require.FileRequirerAll{},
MaxFileBytes: 0,
},
wantErrDuringImageCreation: ErrInvalidConfig,
},
{
name: "invalid config - missing requirer",
tarPath: filepath.Join(testdataDir, "single-file.tar"),
config: &Config{
MaxFileBytes: DefaultMaxFileBytes,
},
wantErrDuringImageCreation: ErrInvalidConfig,
},
{
name: "image with one file",
tarPath: filepath.Join(testdataDir, "single-file.tar"),
Expand Down Expand Up @@ -294,6 +312,7 @@ func TestFromTarball(t *testing.T) {
tarPath: filepath.Join(testdataDir, "single-file.tar"),
config: &Config{
MaxFileBytes: 1,
Requirer: &require.FileRequirerAll{},
},
wantChainLayerEntries: []chainLayerEntries{
{
Expand Down Expand Up @@ -427,12 +446,39 @@ func TestFromTarball(t *testing.T) {
config: DefaultConfig(),
wantErrDuringImageCreation: ErrSymlinkPointsOutsideRoot,
},
{
name: "require single file from images",
tarPath: filepath.Join(testdataDir, "multiple-files.tar"),
config: &Config{
MaxFileBytes: DefaultMaxFileBytes,
// Only require foo.txt.
Requirer: require.NewFileRequirerPaths([]string{"/foo.txt"}),
},
wantChainLayerEntries: []chainLayerEntries{
{
filepathContentPairs: []filepathContentPair{
{
filepath: "foo.txt",
content: "foo\n",
},
},
},
{
// dir1/bar.txt and dir1/baz.txt are ignored in the second layer.
filepathContentPairs: []filepathContentPair{
{
filepath: "foo.txt",
content: "foo\n",
},
},
},
},
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
gotImage, gotErr := FromTarball(tc.tarPath, tc.config)
defer gotImage.CleanUp()

if tc.wantErrDuringImageCreation != nil {
if errors.Is(gotErr, tc.wantErrDuringImageCreation) {
Expand All @@ -444,6 +490,8 @@ func TestFromTarball(t *testing.T) {
if gotErr != nil {
t.Fatalf("FromTarball(%v) returned unexpected error: %v", tc.tarPath, gotErr)
}
// Only defer call to CleanUp if the image was created successfully.
defer gotImage.CleanUp()

chainLayers, err := gotImage.ChainLayers()
if err != nil {
Expand Down
32 changes: 31 additions & 1 deletion artifact/image/whiteout/whiteout.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package whiteout
import (
"fmt"
"io/fs"
"path"
"path/filepath"
"strings"

Expand All @@ -39,7 +40,7 @@ func Files(scalibrfs scalibrfs.FS) (map[string]struct{}, error) {

err := fs.WalkDir(scalibrfs, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
//nolint:nilerr // continue walking if there is an error
//nolint:nilerr // Continue walking if there is an error.
return nil
}

Expand All @@ -60,3 +61,32 @@ func Files(scalibrfs scalibrfs.FS) (map[string]struct{}, error) {
}
return whiteouts, nil
}

// IsWhiteout returns true if a path is a whiteout path.
func IsWhiteout(p string) bool {
_, file := path.Split(p)
return strings.HasPrefix(file, WhiteoutPrefix)
}

// ToWhiteout returns the whiteout version of a path.
func ToWhiteout(p string) string {
dir, file := path.Split(p)
return path.Join(dir, fmt.Sprintf("%s%s", WhiteoutPrefix, file))
}

// ToPath returns the non whiteout version of a path.
func ToPath(p string) string {
dir, file := path.Split(p)

if strings.HasPrefix(file, WhiteoutPrefix) {
file = strings.TrimPrefix(file, WhiteoutPrefix)
}

nonWhitoutPath := path.Join(dir, file)

if dir != "" && file == "" {
nonWhitoutPath = fmt.Sprintf("%s/", nonWhitoutPath)
}

return nonWhitoutPath
}
Loading