Skip to content

Commit 0124d5b

Browse files
authored
apacheGH-49078: [FS][Azure] Fix lossy pickling of SubTreeFileSystem(base_path, AzureFileSystem(...)) (apache#49140)
### Rationale for this change Fix apache#49078 ### What changes are included in this PR? - Implement getters on the C++ side of `AzureOptions`, for the values that are currently stored only on the python side. - This required adding some more member variables - I decided to add `ClearCredentials` , so that it can't get into strange states by configuring one credential type then another. IMO configuring the credentials during initialisation on the `AzureOptions` would be neater but I don't want to make this PR too big. - Update the C++ side `AzureOptions::Equals` - Remove python side `self` attributes and instead depend on getters from C++ side. ### Are these changes tested? - Updated tests on the C++ side for the updated `Equals` and newly added getter methods. - Added a fixture `pickle_with_and_without_subtree_filesystem`, which can be used in place of the `pickle_module`. This adds test combinations with and without wrapping the filesystem in a `SubTreeFilesystem` before pickling it. ### Are there any user-facing changes? Only that pickling `SubTreeFileSystem(base_path, AzureFileSystem(...))` now works properly. * GitHub Issue: apache#49078 Authored-by: Thomas Newton <thomas.w.newton@gmail.com> Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
1 parent aae49e8 commit 0124d5b

File tree

6 files changed

+194
-51
lines changed

6 files changed

+194
-51
lines changed

cpp/src/arrow/filesystem/azurefs.cc

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,17 @@ Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
248248
}
249249

250250
bool AzureOptions::Equals(const AzureOptions& other) const {
251-
const bool equals = blob_storage_authority == other.blob_storage_authority &&
252-
dfs_storage_authority == other.dfs_storage_authority &&
253-
blob_storage_scheme == other.blob_storage_scheme &&
254-
dfs_storage_scheme == other.dfs_storage_scheme &&
255-
default_metadata == other.default_metadata &&
256-
account_name == other.account_name &&
257-
credential_kind_ == other.credential_kind_;
251+
const bool equals =
252+
account_name == other.account_name &&
253+
blob_storage_authority == other.blob_storage_authority &&
254+
dfs_storage_authority == other.dfs_storage_authority &&
255+
blob_storage_scheme == other.blob_storage_scheme &&
256+
dfs_storage_scheme == other.dfs_storage_scheme &&
257+
default_metadata == other.default_metadata &&
258+
background_writes == other.background_writes &&
259+
credential_kind_ == other.credential_kind_ && account_key_ == other.account_key_ &&
260+
sas_token_ == other.sas_token_ && tenant_id_ == other.tenant_id_ &&
261+
client_id_ == other.client_id_ && client_secret_ == other.client_secret_;
258262
if (!equals) {
259263
return false;
260264
}
@@ -318,65 +322,90 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
318322
return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
319323
}
320324

325+
void AzureOptions::ClearCredentials() {
326+
credential_kind_ = CredentialKind::kDefault;
327+
storage_shared_key_credential_ = nullptr;
328+
account_key_.clear();
329+
sas_token_.clear();
330+
tenant_id_.clear();
331+
client_id_.clear();
332+
client_secret_.clear();
333+
token_credential_ = nullptr;
334+
}
335+
321336
Status AzureOptions::ConfigureDefaultCredential() {
337+
ClearCredentials();
322338
credential_kind_ = CredentialKind::kDefault;
323339
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
324340
return Status::OK();
325341
}
326342

327343
Status AzureOptions::ConfigureAnonymousCredential() {
344+
ClearCredentials();
328345
credential_kind_ = CredentialKind::kAnonymous;
329346
return Status::OK();
330347
}
331348

332349
Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) {
350+
ClearCredentials();
333351
credential_kind_ = CredentialKind::kStorageSharedKey;
334352
if (account_name.empty()) {
335353
return Status::Invalid("AzureOptions doesn't contain a valid account name");
336354
}
355+
account_key_ = account_key;
337356
storage_shared_key_credential_ =
338357
std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
339358
return Status::OK();
340359
}
341360

342361
Status AzureOptions::ConfigureSASCredential(const std::string& sas_token) {
343-
credential_kind_ = CredentialKind::kSASToken;
362+
ClearCredentials();
344363
if (account_name.empty()) {
345364
return Status::Invalid("AzureOptions doesn't contain a valid account name");
346365
}
347366
sas_token_ = sas_token;
367+
credential_kind_ = CredentialKind::kSASToken;
348368
return Status::OK();
349369
}
350370

351371
Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id,
352372
const std::string& client_id,
353373
const std::string& client_secret) {
374+
ClearCredentials();
375+
tenant_id_ = tenant_id;
376+
client_id_ = client_id;
377+
client_secret_ = client_secret;
354378
credential_kind_ = CredentialKind::kClientSecret;
355379
token_credential_ = std::make_shared<Azure::Identity::ClientSecretCredential>(
356380
tenant_id, client_id, client_secret);
357381
return Status::OK();
358382
}
359383

360384
Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) {
385+
ClearCredentials();
386+
client_id_ = client_id;
361387
credential_kind_ = CredentialKind::kManagedIdentity;
362388
token_credential_ =
363389
std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
364390
return Status::OK();
365391
}
366392

367393
Status AzureOptions::ConfigureCLICredential() {
394+
ClearCredentials();
368395
credential_kind_ = CredentialKind::kCLI;
369396
token_credential_ = std::make_shared<Azure::Identity::AzureCliCredential>();
370397
return Status::OK();
371398
}
372399

373400
Status AzureOptions::ConfigureWorkloadIdentityCredential() {
401+
ClearCredentials();
374402
credential_kind_ = CredentialKind::kWorkloadIdentity;
375403
token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
376404
return Status::OK();
377405
}
378406

379407
Status AzureOptions::ConfigureEnvironmentCredential() {
408+
ClearCredentials();
380409
credential_kind_ = CredentialKind::kEnvironment;
381410
token_credential_ = std::make_shared<Azure::Identity::EnvironmentCredential>();
382411
return Status::OK();

cpp/src/arrow/filesystem/azurefs.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,6 @@ struct ARROW_EXPORT AzureOptions {
9999
/// Default: "https"
100100
std::string dfs_storage_scheme = "https";
101101

102-
// TODO(GH-38598): Add support for more auth methods.
103-
// std::string connection_string;
104-
// std::string sas_token;
105-
106102
/// \brief Default metadata for OpenOutputStream.
107103
///
108104
/// This will be ignored if non-empty metadata is passed to OpenOutputStream.
@@ -126,7 +122,11 @@ struct ARROW_EXPORT AzureOptions {
126122

127123
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
128124
storage_shared_key_credential_;
125+
std::string account_key_;
129126
std::string sas_token_;
127+
std::string tenant_id_;
128+
std::string client_id_;
129+
std::string client_secret_;
130130
mutable std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
131131

132132
public:
@@ -136,6 +136,7 @@ struct ARROW_EXPORT AzureOptions {
136136
private:
137137
void ExtractFromUriSchemeAndHierPart(const Uri& uri, std::string* out_path);
138138
Status ExtractFromUriQuery(const Uri& uri);
139+
void ClearCredentials();
139140

140141
public:
141142
/// \brief Construct a new AzureOptions from an URI.
@@ -204,6 +205,12 @@ struct ARROW_EXPORT AzureOptions {
204205
std::string AccountBlobUrl(const std::string& account_name) const;
205206
std::string AccountDfsUrl(const std::string& account_name) const;
206207

208+
std::string AccountKey() const { return account_key_; }
209+
std::string SasToken() const { return sas_token_; }
210+
std::string TenantId() const { return tenant_id_; }
211+
std::string ClientId() const { return client_id_; }
212+
std::string ClientSecret() const { return client_secret_; }
213+
207214
Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
208215
MakeBlobServiceClient() const;
209216

cpp/src/arrow/filesystem/azurefs_test.cc

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,11 @@ TEST(AzureFileSystem, InitializeWithDefaultCredential) {
493493
AzureOptions options;
494494
options.account_name = "dummy-account-name";
495495
ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
496+
ASSERT_EQ(options.AccountKey(), "");
497+
ASSERT_EQ(options.SasToken(), "");
498+
ASSERT_EQ(options.TenantId(), "");
499+
ASSERT_EQ(options.ClientId(), "");
500+
ASSERT_EQ(options.ClientSecret(), "");
496501
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
497502
}
498503

@@ -509,6 +514,23 @@ TEST(AzureFileSystem, InitializeWithAnonymousCredential) {
509514
AzureOptions options;
510515
options.account_name = "dummy-account-name";
511516
ARROW_EXPECT_OK(options.ConfigureAnonymousCredential());
517+
ASSERT_EQ(options.AccountKey(), "");
518+
ASSERT_EQ(options.SasToken(), "");
519+
ASSERT_EQ(options.TenantId(), "");
520+
ASSERT_EQ(options.ClientId(), "");
521+
ASSERT_EQ(options.ClientSecret(), "");
522+
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
523+
}
524+
525+
TEST(AzureFileSystem, InitializeWithAccountKeyCredential) {
526+
AzureOptions options;
527+
options.account_name = "dummy-account-name";
528+
ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential("account_key"));
529+
ASSERT_EQ(options.AccountKey(), "account_key");
530+
ASSERT_EQ(options.SasToken(), "");
531+
ASSERT_EQ(options.TenantId(), "");
532+
ASSERT_EQ(options.ClientId(), "");
533+
ASSERT_EQ(options.ClientSecret(), "");
512534
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
513535
}
514536

@@ -517,43 +539,98 @@ TEST(AzureFileSystem, InitializeWithClientSecretCredential) {
517539
options.account_name = "dummy-account-name";
518540
ARROW_EXPECT_OK(
519541
options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
542+
ASSERT_EQ(options.AccountKey(), "");
543+
ASSERT_EQ(options.SasToken(), "");
544+
ASSERT_EQ(options.TenantId(), "tenant_id");
545+
ASSERT_EQ(options.ClientId(), "client_id");
546+
ASSERT_EQ(options.ClientSecret(), "client_secret");
520547
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
521548
}
522549

523550
TEST(AzureFileSystem, InitializeWithManagedIdentityCredential) {
524551
AzureOptions options;
525552
options.account_name = "dummy-account-name";
526553
ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
554+
ASSERT_EQ(options.AccountKey(), "");
555+
ASSERT_EQ(options.SasToken(), "");
556+
ASSERT_EQ(options.TenantId(), "");
557+
ASSERT_EQ(options.ClientId(), "");
558+
ASSERT_EQ(options.ClientSecret(), "");
527559
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
528560

529561
ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential("specific-client-id"));
562+
ASSERT_EQ(options.AccountKey(), "");
563+
ASSERT_EQ(options.SasToken(), "");
564+
ASSERT_EQ(options.TenantId(), "");
565+
ASSERT_EQ(options.ClientId(), "specific-client-id");
566+
ASSERT_EQ(options.ClientSecret(), "");
530567
EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
531568
}
532569

533570
TEST(AzureFileSystem, InitializeWithCLICredential) {
534571
AzureOptions options;
535572
options.account_name = "dummy-account-name";
536573
ARROW_EXPECT_OK(options.ConfigureCLICredential());
574+
ASSERT_EQ(options.AccountKey(), "");
575+
ASSERT_EQ(options.SasToken(), "");
576+
ASSERT_EQ(options.TenantId(), "");
577+
ASSERT_EQ(options.ClientId(), "");
578+
ASSERT_EQ(options.ClientSecret(), "");
537579
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
538580
}
539581

540582
TEST(AzureFileSystem, InitializeWithWorkloadIdentityCredential) {
541583
AzureOptions options;
542584
options.account_name = "dummy-account-name";
543585
ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
586+
ASSERT_EQ(options.AccountKey(), "");
587+
ASSERT_EQ(options.SasToken(), "");
588+
ASSERT_EQ(options.TenantId(), "");
589+
ASSERT_EQ(options.ClientId(), "");
590+
ASSERT_EQ(options.ClientSecret(), "");
544591
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
545592
}
546593

547594
TEST(AzureFileSystem, InitializeWithEnvironmentCredential) {
548595
AzureOptions options;
549596
options.account_name = "dummy-account-name";
550597
ARROW_EXPECT_OK(options.ConfigureEnvironmentCredential());
598+
ASSERT_EQ(options.AccountKey(), "");
599+
ASSERT_EQ(options.SasToken(), "");
600+
ASSERT_EQ(options.TenantId(), "");
601+
ASSERT_EQ(options.ClientId(), "");
602+
ASSERT_EQ(options.ClientSecret(), "");
551603
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
552604
}
553605

554606
TEST(AzureFileSystem, OptionsCompare) {
555-
AzureOptions options;
556-
EXPECT_TRUE(options.Equals(options));
607+
AzureOptions options0;
608+
EXPECT_TRUE(options0.Equals(options0));
609+
610+
AzureOptions options1;
611+
options1.account_name = "account_name";
612+
EXPECT_FALSE(options1.Equals(options0));
613+
614+
AzureOptions options2;
615+
options2.account_name = "account_name";
616+
ASSERT_OK(options2.ConfigureAccountKeyCredential("fake_account_key"));
617+
EXPECT_FALSE(options2.Equals(options1));
618+
619+
AzureOptions options3;
620+
options3.account_name = "account_name";
621+
ASSERT_OK(options3.ConfigureAccountKeyCredential("different_fake_account_key"));
622+
EXPECT_FALSE(options3.Equals(options2));
623+
624+
AzureOptions options4;
625+
options4.account_name = "account_name";
626+
ASSERT_OK(options4.ConfigureSASCredential("fake_sas_token"));
627+
EXPECT_FALSE(options4.Equals(options3));
628+
629+
AzureOptions options5;
630+
options5.account_name = "account_name";
631+
ASSERT_OK(options5.ConfigureClientSecretCredential("fake_tenant_id", "fake_client_id",
632+
"fake_client_secret"));
633+
EXPECT_FALSE(options5.Equals(options4));
557634
}
558635

559636
class TestAzureOptions : public ::testing::Test {
@@ -1679,9 +1756,14 @@ class TestAzureFileSystem : public ::testing::Test {
16791756
env->account_name(), env->account_key())));
16801757
// AzureOptions::FromUri will not cut off extra query parameters that it consumes, so
16811758
// make sure these don't cause problems.
1682-
ARROW_EXPECT_OK(options.ConfigureSASCredential(
1683-
"?blob_storage_authority=dummy_value0&" + sas_token.substr(1) +
1684-
"&credential_kind=dummy-value1"));
1759+
auto polluted_sas_token = "?blob_storage_authority=dummy_value0&" +
1760+
sas_token.substr(1) + "&credential_kind=dummy-value1";
1761+
ARROW_EXPECT_OK(options.ConfigureSASCredential(polluted_sas_token));
1762+
ASSERT_EQ(options.AccountKey(), "");
1763+
ASSERT_EQ(options.SasToken(), polluted_sas_token);
1764+
ASSERT_EQ(options.TenantId(), "");
1765+
ASSERT_EQ(options.ClientId(), "");
1766+
ASSERT_EQ(options.ClientSecret(), "");
16851767
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
16861768

16871769
AssertFileInfo(fs.get(), data.ObjectPath(), FileType::File);

0 commit comments

Comments
 (0)