Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xload: E2E tests #1650

Open
wants to merge 20 commits into
base: feature/xload
Choose a base branch
from
172 changes: 172 additions & 0 deletions azure-pipeline-templates/e2e-tests-xload.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
parameters:
- name: conf_template
type: string
- name: config_file
type: string
- name: container
type: string
- name: temp_dir
type: string
- name: mount_dir
type: string
- name: idstring
type: string
- name: adls
type: boolean
- name: account_name
type: string
- name: account_key
type: string
- name: account_type
type: string
- name: account_endpoint
- name: distro_name
type: string
- name: quick_test
type: boolean
default: true
- name: mnt_flags
type: string
default: ""
- name: verbose_log
type: boolean
default: false
- name: clone
type: boolean
default: false

steps:
- script: |
sudo apt-get update
sudo apt-get install jq python3-setuptools python3-pip -y
sudo pip3 install pandas numpy pyarrow fastparquet
displayName: 'Install dependencies'

- script: |
$(WORK_DIR)/blobfuse2 gen-test-config --config-file=$(WORK_DIR)/testdata/config/azure_key.yaml --container-name=${{ parameters.container }} --temp-path=${{ parameters.temp_dir }} --output-file=${{ parameters.config_file }}
displayName: 'Create Config File for RW mount'
env:
NIGHTLY_STO_ACC_NAME: ${{ parameters.account_name }}
NIGHTLY_STO_ACC_KEY: ${{ parameters.account_key }}
ACCOUNT_TYPE: ${{ parameters.account_type }}
ACCOUNT_ENDPOINT: ${{ parameters.account_endpoint }}
VERBOSE_LOG: ${{ parameters.verbose_log }}
continueOnError: false

- script:
cat ${{ parameters.config_file }}
displayName: 'Print config file'

# run below step only if direct_io is false
- template: 'mount.yml'
parameters:
working_dir: $(WORK_DIR)
mount_dir: ${{ parameters.mount_dir }}
temp_dir: ${{ parameters.temp_dir }}
prefix: ${{ parameters.idstring }}
mountStep:
script: |
$(WORK_DIR)/blobfuse2 mount ${{ parameters.mount_dir }} --config-file=${{ parameters.config_file }} --default-working-dir=$(WORK_DIR) --file-cache-timeout=3200 ${{ parameters.mnt_flags }}

- script: |
for i in {1,2,3,4,5,6,7,8,9,10,20,30,50,100,200,1024,2048,4096}; do echo $i; done | parallel --will-cite -j 5 'head -c {}M < /dev/urandom > ${{ parameters.mount_dir }}/myfile_{}'
for i in $(seq 1 10); do echo $(shuf -i 0-4294967296 -n 1); done | parallel --will-cite -j 5 'head -c {} < /dev/urandom > ${{ parameters.mount_dir }}/datafiles_{}'
cd ${{ parameters.mount_dir }}
python3 $(WORK_DIR)/testdata/scripts/generate-parquet-files.py
ls -l ${{ parameters.mount_dir }}/*
displayName: 'Generate data'

- script: |
md5sum ${{ parameters.mount_dir }}/* > $(WORK_DIR)/md5sum_file_cache.txt
displayName: 'Generate md5Sum with File-Cache'

- script: |
$(WORK_DIR)/blobfuse2 unmount all
displayName: 'Unmount RW mount'

- script: |
$(WORK_DIR)/blobfuse2 gen-test-config --config-file=$(WORK_DIR)/testdata/config/azure_key_xload.yaml --container-name=${{ parameters.container }} --temp-path=${{ parameters.temp_dir }} --output-file=${{ parameters.config_file }}
displayName: 'Create Config File for preload'
env:
NIGHTLY_STO_ACC_NAME: ${{ parameters.account_name }}
NIGHTLY_STO_ACC_KEY: ${{ parameters.account_key }}
ACCOUNT_TYPE: ${{ parameters.account_type }}
ACCOUNT_ENDPOINT: ${{ parameters.account_endpoint }}
VERBOSE_LOG: ${{ parameters.verbose_log }}
continueOnError: false


- script:
cat ${{ parameters.config_file }}
displayName: 'Print preload config file'

- template: 'mount.yml'
parameters:
working_dir: $(WORK_DIR)
mount_dir: ${{ parameters.mount_dir }}
temp_dir: ${{ parameters.temp_dir }}
prefix: ${{ parameters.idstring }}
ro_mount: true
mountStep:
script: |
$(WORK_DIR)/blobfuse2 mount ${{ parameters.mount_dir }} --config-file=${{ parameters.config_file }} --default-working-dir=$(WORK_DIR) -o ro ${{ parameters.mnt_flags }}

- script: |
STATS_MANAGER=`ls $(WORK_DIR)/xload_stats_*.json`
echo $STATS_MANAGER

while true; do
percent=$(jq -r '.. | .PercentCompleted? | select(.)' "$STATS_MANAGER" | tail -n 1)
echo "PercentCompleted = $percent"

if [[ "$percent" == "100" ]]; then
echo "Processing complete!"
break
fi

sleep 5
done

echo "----------------------------------------------"
head -n 20 $STATS_MANAGER

echo "----------------------------------------------"
tail -n 100 $STATS_MANAGER
displayName: 'Stats manager info'

- script: |
md5sum ${{ parameters.mount_dir }}/* > $(WORK_DIR)/md5sum_xload.txt
displayName: 'Generate md5Sum with preload'

- script: |
$(WORK_DIR)/blobfuse2 unmount all
displayName: 'Unmount preload mount'

- script: |
echo "----------------------------------------------"
cat $(WORK_DIR)/md5sum_xload.txt
echo "----------------------------------------------"
cat $(WORK_DIR)/md5sum_file_cache.txt
echo "----------------------------------------------"
diff $(WORK_DIR)/md5sum_xload.txt $(WORK_DIR)/md5sum_file_cache.txt
if [ $? -ne 0 ]; then
exit 1
fi
displayName: 'Compare md5sum'

- task: PublishBuildArtifacts@1
inputs:
pathToPublish: blobfuse2-logs.txt
artifactName: 'blobfuse_block_cache.txt'
condition: failed()

- script: |
tail -n 200 blobfuse2-logs.txt
displayName: 'View Logs'
condition: failed()

- template: 'cleanup.yml'
parameters:
working_dir: $(WORK_DIR)
mount_dir: ${{ parameters.mount_dir }}
temp_dir: ${{ parameters.temp_dir }}
78 changes: 78 additions & 0 deletions blobfuse2-nightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,84 @@ stages:
temp_dir: $(TEMP_DIR)
mount_dir: $(MOUNT_DIR)

- stage: XloadValidation
jobs:
# Ubuntu Tests
- job: Set_1
timeoutInMinutes: 300
strategy:
matrix:
Ubuntu-20:
AgentName: 'blobfuse-ubuntu20'
containerName: 'test-cnt-ubn-20'
adlsSas: $(AZTEST_ADLS_CONT_SAS_UBN_20)
fuselib: 'libfuse-dev'
tags: 'fuse2'
Ubuntu-22:
AgentName: 'blobfuse-ubuntu22'
containerName: 'test-cnt-ubn-22'
adlsSas: $(AZTEST_ADLS_CONT_SAS_UBN_22)
fuselib: 'libfuse3-dev'
tags: 'fuse3'

pool:
name: "blobfuse-ubuntu-pool"
demands:
- ImageOverride -equals $(AgentName)

variables:
- group: NightlyBlobFuse
- name: MOUNT_DIR
value: '$(Pipeline.Workspace)/blob_mnt'
- name: TEMP_DIR
value: '$(Pipeline.Workspace)/blobfuse2_tmp'
- name: BLOBFUSE2_CFG
value: '$(Pipeline.Workspace)/blobfuse2.yaml'
- name: BLOBFUSE2_ADLS_CFG
value: '$(Pipeline.Workspace)/blobfuse2.adls.yaml'
- name: skipComponentGovernanceDetection
value: true
- name: GOPATH
value: '$(Pipeline.Workspace)/go'
- name: ROOT_DIR
value: '$(System.DefaultWorkingDirectory)'
- name: WORK_DIR
value: '$(System.DefaultWorkingDirectory)/azure-storage-fuse'

steps:
# -------------------------------------------------------
# Pull and build the code
- template: 'azure-pipeline-templates/build.yml'
parameters:
working_directory: $(WORK_DIR)
root_dir: $(ROOT_DIR)
mount_dir: $(MOUNT_DIR)
temp_dir: $(TEMP_DIR)
gopath: $(GOPATH)
container: $(containerName)
tags: $(tags)
fuselib: $(fuselib)
skip_ut: true

- template: 'azure-pipeline-templates/e2e-tests-xload.yml'
parameters:
conf_template: azure_key.yaml
config_file: $(BLOBFUSE2_CFG)
container: $(containerName)
idstring: Block_Blob
adls: false
account_name: $(NIGHTLY_STO_BLOB_ACC_NAME)
account_key: $(NIGHTLY_STO_BLOB_ACC_KEY)
account_type: block
account_endpoint: https://$(NIGHTLY_STO_BLOB_ACC_NAME).blob.core.windows.net
distro_name: $(AgentName)
quick_test: false
verbose_log: ${{ parameters.verbose_log }}
clone: false
# TODO: These can be removed one day and replace all instances of ${{ parameters.temp_dir }} with $(TEMP_DIR) since it is a global variable
temp_dir: $(TEMP_DIR)
mount_dir: $(MOUNT_DIR)

- stage: Healthmon
jobs:
- job: Set_1
Expand Down
12 changes: 12 additions & 0 deletions common/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"bytes"
"crypto/aes"
"crypto/cipher"
"crypto/md5"
"crypto/rand"
"encoding/binary"
"fmt"
Expand Down Expand Up @@ -513,3 +514,14 @@ func GetCRC64(data []byte, len int) []byte {

return checksumBytes
}

func GetMD5(fi *os.File) ([]byte, error) {
hasher := md5.New()
_, err := io.Copy(hasher, fi)

if err != nil {
return nil, fmt.Errorf("failed to generate md5 [%s]", err.Error())
}

return hasher.Sum(nil), nil
}
22 changes: 22 additions & 0 deletions common/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,25 @@ func (suite *utilTestSuite) TestGetFuseMinorVersion() {
i := GetFuseMinorVersion()
suite.assert.GreaterOrEqual(i, 0)
}

func (s *utilTestSuite) TestGetMD5() {
assert := assert.New(s.T())

f, err := os.Create("abc.txt")
assert.Nil(err)

_, err = f.Write([]byte(randomString(50)))
assert.Nil(err)

f.Close()

f, err = os.Open("abc.txt")
assert.Nil(err)

md5Sum, err := GetMD5(f)
assert.Nil(err)
assert.NotZero(md5Sum)

f.Close()
os.Remove("abc.txt")
}
4 changes: 2 additions & 2 deletions component/azstorage/block_blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ func (bb *BlockBlob) ReadToFile(name string, offset int64, count int64, fi *os.F

if bb.Config.validateMD5 {
// Compute md5 of local file
fileMD5, err := getMD5(fi)
fileMD5, err := common.GetMD5(fi)
if err != nil {
log.Warn("BlockBlob::ReadToFile : Failed to generate MD5 Sum for %s", name)
} else {
Expand Down Expand Up @@ -1030,7 +1030,7 @@ func (bb *BlockBlob) WriteFromFile(name string, metadata map[string]*string, fi
// hence we take cost of calculating md5 only for files which are bigger in size and which will be converted to blocks.
md5sum := []byte{}
if bb.Config.updateMD5 && stat.Size() >= blockblob.MaxUploadBlobBytes {
md5sum, err = getMD5(fi)
md5sum, err = common.GetMD5(fi)
if err != nil {
// Md5 sum generation failed so set nil while uploading
log.Warn("BlockBlob::WriteFromFile : Failed to generate md5 of %s", name)
Expand Down
6 changes: 3 additions & 3 deletions component/azstorage/block_blob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2864,7 +2864,7 @@ func (s *blockBlobTestSuite) TestMD5SetOnUpload() {
s.assert.NotEmpty(prop.MD5)

_, _ = f.Seek(0, 0)
localMD5, err := getMD5(f)
localMD5, err := common.GetMD5(f)
s.assert.Nil(err)
s.assert.EqualValues(localMD5, prop.MD5)

Expand Down Expand Up @@ -2965,7 +2965,7 @@ func (s *blockBlobTestSuite) TestMD5AutoSetOnUpload() {
s.assert.NotEmpty(prop.MD5)

_, _ = f.Seek(0, 0)
localMD5, err := getMD5(f)
localMD5, err := common.GetMD5(f)
s.assert.Nil(err)
s.assert.EqualValues(localMD5, prop.MD5)

Expand Down Expand Up @@ -3021,7 +3021,7 @@ func (s *blockBlobTestSuite) TestInvalidateMD5PostUpload() {
s.assert.NotEmpty(prop.MD5)

_, _ = f.Seek(0, 0)
localMD5, err := getMD5(f)
localMD5, err := common.GetMD5(f)
s.assert.Nil(err)
s.assert.NotEqualValues(localMD5, prop.MD5)

Expand Down
13 changes: 0 additions & 13 deletions component/azstorage/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,9 @@
package azstorage

import (
"crypto/md5"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/url"
Expand Down Expand Up @@ -558,17 +556,6 @@ func sanitizeSASKey(key string) string {
return key
}

func getMD5(fi *os.File) ([]byte, error) {
hasher := md5.New()
_, err := io.Copy(hasher, fi)

if err != nil {
return nil, errors.New("failed to generate md5")
}

return hasher.Sum(nil), nil
}

func autoDetectAuthMode(opt AzStorageOptions) string {
if opt.ApplicationID != "" || opt.ResourceID != "" || opt.ObjectID != "" {
return "msi"
Expand Down
Loading