@@ -967,6 +967,32 @@ Status StageBlock(Blobs::BlockBlobClient* block_blob_client, const std::string&
967967 return Status::OK ();
968968}
969969
970+ // Usually if the first page is empty it means there are no results. This was assumed in
971+ // several places in AzureFilesystem. The Azure docs do not guarantee this and we have
972+ // evidence (GH-49043) that there can be subsequent non-empty pages.
973+ // Applying `SkipStartingEmptyPages` on a paged response corrects this assumption.
974+ void SkipStartingEmptyPages (Blobs::ListBlobContainersPagedResponse& paged_response) {
975+ while (paged_response.HasPage () && paged_response.BlobContainers .empty ()) {
976+ paged_response.MoveToNextPage ();
977+ }
978+ }
979+ void SkipStartingEmptyPages (Blobs::ListBlobsPagedResponse& paged_response) {
980+ while (paged_response.HasPage () && paged_response.Blobs .size () == 0 ) {
981+ paged_response.MoveToNextPage ();
982+ }
983+ }
984+ void SkipStartingEmptyPages (Blobs::ListBlobsByHierarchyPagedResponse& paged_response) {
985+ while (paged_response.HasPage () && paged_response.Blobs .empty () &&
986+ paged_response.BlobPrefixes .empty ()) {
987+ paged_response.MoveToNextPage ();
988+ }
989+ }
990+ void SkipStartingEmptyPages (DataLake::ListPathsPagedResponse& paged_response) {
991+ while (paged_response.HasPage () && paged_response.Paths .empty ()) {
992+ paged_response.MoveToNextPage ();
993+ }
994+ }
995+
970996// / Writes will be buffered up to this size (in bytes) before actually uploading them.
971997static constexpr int64_t kBlockUploadSizeBytes = 10 * 1024 * 1024 ;
972998// / The maximum size of a block in Azure Blob (as per docs).
@@ -1805,12 +1831,14 @@ class AzureFileSystem::Impl {
18051831 try {
18061832 FileInfo info{location.all };
18071833 auto list_response = container_client.ListBlobsByHierarchy (kDelimiter , options);
1834+ SkipStartingEmptyPages (list_response);
18081835 // Since PageSizeHint=1, we expect at most one entry in either Blobs or
18091836 // BlobPrefixes. A BlobPrefix always ends with kDelimiter ("/"), so we can
18101837 // distinguish between a directory and a file by checking if we received a
18111838 // prefix or a blob.
18121839 // This strategy allows us to implement GetFileInfo with just 1 blob storage
18131840 // operation in almost every case.
1841+
18141842 if (!list_response.BlobPrefixes .empty ()) {
18151843 // Ensure the returned BlobPrefixes[0] string doesn't contain more characters than
18161844 // the requested Prefix. For instance, if we request with Prefix="dir/abra" and
@@ -1847,6 +1875,7 @@ class AzureFileSystem::Impl {
18471875 // whether the path is a directory.
18481876 options.Prefix = internal::EnsureTrailingSlash (location.path );
18491877 auto list_with_trailing_slash_response = container_client.ListBlobs (options);
1878+ SkipStartingEmptyPages (list_with_trailing_slash_response);
18501879 if (!list_with_trailing_slash_response.Blobs .empty ()) {
18511880 info.set_type (FileType::Directory);
18521881 return info;
@@ -1909,6 +1938,7 @@ class AzureFileSystem::Impl {
19091938 try {
19101939 auto container_list_response =
19111940 blob_service_client_->ListBlobContainers (options, context);
1941+ SkipStartingEmptyPages (container_list_response);
19121942 for (; container_list_response.HasPage ();
19131943 container_list_response.MoveToNextPage (context)) {
19141944 for (const auto & container : container_list_response.BlobContainers ) {
@@ -1950,6 +1980,7 @@ class AzureFileSystem::Impl {
19501980 auto base_path_depth = internal::GetAbstractPathDepth (base_location.path );
19511981 try {
19521982 auto list_response = directory_client.ListPaths (select.recursive , options, context);
1983+ SkipStartingEmptyPages (list_response);
19531984 for (; list_response.HasPage (); list_response.MoveToNextPage (context)) {
19541985 if (list_response.Paths .empty ()) {
19551986 continue ;
@@ -2040,6 +2071,7 @@ class AzureFileSystem::Impl {
20402071 try {
20412072 auto list_response =
20422073 container_client.ListBlobsByHierarchy (/* delimiter=*/ " /" , options, context);
2074+ SkipStartingEmptyPages (list_response);
20432075 for (; list_response.HasPage (); list_response.MoveToNextPage (context)) {
20442076 if (list_response.Blobs .empty () && list_response.BlobPrefixes .empty ()) {
20452077 continue ;
@@ -2442,6 +2474,7 @@ class AzureFileSystem::Impl {
24422474 bool found_dir_marker_blob = false ;
24432475 try {
24442476 auto list_response = container_client.ListBlobs (options);
2477+ SkipStartingEmptyPages (list_response);
24452478 if (list_response.Blobs .empty ()) {
24462479 if (require_dir_to_exist) {
24472480 return PathNotFound (location);
@@ -2575,6 +2608,7 @@ class AzureFileSystem::Impl {
25752608 auto directory_client = adlfs_client.GetDirectoryClient (location.path );
25762609 try {
25772610 auto list_response = directory_client.ListPaths (false );
2611+ SkipStartingEmptyPages (list_response);
25782612 for (; list_response.HasPage (); list_response.MoveToNextPage ()) {
25792613 for (const auto & path : list_response.Paths ) {
25802614 if (path.IsDirectory ) {
@@ -2899,6 +2933,7 @@ class AzureFileSystem::Impl {
28992933 list_blobs_options.PageSizeHint = 1 ;
29002934 try {
29012935 auto dest_list_response = dest_container_client.ListBlobs (list_blobs_options);
2936+ SkipStartingEmptyPages (dest_list_response);
29022937 dest_is_empty = dest_list_response.Blobs .empty ();
29032938 if (!dest_is_empty) {
29042939 return NotEmpty (dest);
@@ -2952,6 +2987,7 @@ class AzureFileSystem::Impl {
29522987 list_blobs_options.PageSizeHint = 1 ;
29532988 try {
29542989 auto src_list_response = src_container_client.ListBlobs (list_blobs_options);
2990+ SkipStartingEmptyPages (src_list_response);
29552991 if (!src_list_response.Blobs .empty ()) {
29562992 // Reminder: dest is used here because we're semantically replacing dest
29572993 // with src. By deleting src if it's empty just like dest.
0 commit comments