-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Error Handling For Connection Manager #507
Changes from 9 commits
2e74baf
616bd70
1f84ecb
ad3ef27
784d5f9
9afe70c
e92bf1c
77edbff
29ac206
bc038fd
cbd4a04
9b171cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1164,7 +1164,6 @@ static void s_aws_http_connection_manager_execute_transaction(struct aws_connect | |
|
||
struct aws_http_connection_manager *manager = work->manager; | ||
|
||
int representative_error = 0; | ||
size_t new_connection_failures = 0; | ||
|
||
/* | ||
|
@@ -1218,14 +1217,18 @@ static void s_aws_http_connection_manager_execute_transaction(struct aws_connect | |
for (size_t i = 0; i < work->new_connections; ++i) { | ||
if (s_aws_http_connection_manager_new_connection(manager)) { | ||
++new_connection_failures; | ||
representative_error = aws_last_error(); | ||
int error = aws_last_error(); | ||
if (push_errors) { | ||
AWS_FATAL_ASSERT(aws_array_list_push_back(&errors, &representative_error) == AWS_OP_SUCCESS); | ||
AWS_FATAL_ASSERT(aws_array_list_push_back(&errors, &error) == AWS_OP_SUCCESS); | ||
} | ||
} | ||
} | ||
|
||
if (new_connection_failures > 0) { | ||
/* If there are pending acquisitions, schedule work again to try acquiring them */ | ||
struct aws_connection_management_transaction updated_work; | ||
s_aws_connection_management_transaction_init(&updated_work, manager); | ||
bool has_pending_acquisitions = false; | ||
/* | ||
* We failed and aren't going to receive a callback, but the current state assumes we will receive | ||
* a callback. So we need to re-lock and update the state ourselves. | ||
|
@@ -1235,31 +1238,29 @@ static void s_aws_http_connection_manager_execute_transaction(struct aws_connect | |
AWS_FATAL_ASSERT(manager->internal_ref[AWS_HCMCT_PENDING_CONNECTIONS] >= new_connection_failures); | ||
s_connection_manager_internal_ref_decrease(manager, AWS_HCMCT_PENDING_CONNECTIONS, new_connection_failures); | ||
|
||
/* | ||
* Rather than failing one acquisition for each connection failure, if there's at least one | ||
* connection failure, we instead fail all excess acquisitions, since there's no pending | ||
* connect that will necessarily resolve them. | ||
* | ||
* Try to correspond an error with the acquisition failure, but as a fallback just use the | ||
* representative error. | ||
*/ | ||
size_t i = 0; | ||
while (manager->pending_acquisition_count > manager->internal_ref[AWS_HCMCT_PENDING_CONNECTIONS]) { | ||
int error = representative_error; | ||
if (i < aws_array_list_length(&errors)) { | ||
aws_array_list_get_at(&errors, &error, i); | ||
} | ||
|
||
for (size_t i = 0; i < new_connection_failures && | ||
manager->pending_acquisition_count > manager->internal_ref[AWS_HCMCT_PENDING_CONNECTIONS]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should We just decrease the And it is confusing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It can't be an assert because there is a gap between when we calculate the new_connection_failures and acquire a lock, so it's possible that by the time we come here, the pending acquisitions that failed were already completed by something else like idle connections returning to the pool. I have simplified the check to just validate that we have pending acquisitions before trying to complete them with an error. |
||
i++) { | ||
int error; | ||
aws_array_list_get_at(&errors, &error, i); | ||
AWS_LOGF_DEBUG( | ||
AWS_LS_HTTP_CONNECTION_MANAGER, | ||
"id=%p: Failing excess connection acquisition with error code %d", | ||
"id=%p: Failing connection acquisition with error code %d", | ||
(void *)manager, | ||
(int)error); | ||
s_aws_http_connection_manager_move_front_acquisition(manager, NULL, error, &work->completions); | ||
++i; | ||
} | ||
|
||
has_pending_acquisitions = | ||
manager->internal_ref[AWS_HCMCT_PENDING_CONNECTIONS] < manager->pending_acquisition_count; | ||
if (has_pending_acquisitions) { | ||
s_aws_http_connection_manager_build_transaction(&updated_work); | ||
} | ||
aws_mutex_unlock(&manager->lock); | ||
if (has_pending_acquisitions) { | ||
s_aws_http_connection_manager_execute_transaction(&updated_work); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not a fan of this recursive call happens in the mid of the function. Also, it could lead to all the connection fails in the recursion and then callbacks starts to happen, which seems not be expected. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the link, I have updated it. |
||
} else { | ||
s_aws_connection_management_transaction_clean_up(&updated_work); | ||
} | ||
} | ||
|
||
/* | ||
|
@@ -1268,7 +1269,6 @@ static void s_aws_http_connection_manager_execute_transaction(struct aws_connect | |
s_aws_http_connection_manager_complete_acquisitions(&work->completions, work->allocator); | ||
|
||
aws_array_list_clean_up(&errors); | ||
|
||
/* | ||
* Step 5 - Clean up work. Do this here rather than at the end of every caller. Destroy the manager if necessary | ||
*/ | ||
|
@@ -1479,12 +1479,11 @@ static void s_cm_on_connection_ready_or_failed( | |
work->connection_to_release = connection; | ||
} | ||
} else { | ||
/* fail acquisition as one connection cannot be used any more */ | ||
while (manager->pending_acquisition_count > | ||
manager->internal_ref[AWS_HCMCT_PENDING_CONNECTIONS] + manager->pending_settings_count) { | ||
if (manager->pending_acquisition_count > manager->internal_ref[AWS_HCMCT_PENDING_CONNECTIONS]) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. assert? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above, it's possible that by the time we got an async error from connection callback, the pending acquisition is already complete by something else. I have simplified the check to make it less confusing. |
||
/* fail acquisition as connection acquire failed */ | ||
AWS_LOGF_DEBUG( | ||
AWS_LS_HTTP_CONNECTION_MANAGER, | ||
"id=%p: Failing excess connection acquisition with error code %d", | ||
"id=%p: Failing connection acquisition with error code %d", | ||
(void *)manager, | ||
(int)error_code); | ||
s_aws_http_connection_manager_move_front_acquisition(manager, NULL, error_code, &work->completions); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
let's just assert on the array list allocation failure. That code is probably written when the allocation can still fail and we want to handle it properly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, updated.