@@ -54,13 +54,6 @@ namespace Details
5454 void stop ();
5555
5656 private:
57- // We will wait RETRY_SECONDS + (errorCount * RETRY_SECONDS_INC) before retrying after an error.
58- // This means we attempt to recover relatively quickly but back off giving more time to recover
59- // until we finally give up after MAX_EVENT_POLL_HTTP_ERRORS attempts.
60- static const F32 EVENT_POLL_ERROR_RETRY_SECONDS;
61- static const F32 EVENT_POLL_ERROR_RETRY_SECONDS_INC;
62- static const S32 MAX_EVENT_POLL_HTTP_ERRORS;
63-
6457 void eventPollCoro (std::string url);
6558
6659 void handleMessage (const LLSD &content);
@@ -76,9 +69,13 @@ namespace Details
7669 };
7770
7871
79- const F32 LLEventPollImpl::EVENT_POLL_ERROR_RETRY_SECONDS = 15 .f; // ~ half of a normal timeout.
80- const F32 LLEventPollImpl::EVENT_POLL_ERROR_RETRY_SECONDS_INC = 5 .f; // ~ half of a normal timeout.
81- const S32 LLEventPollImpl::MAX_EVENT_POLL_HTTP_ERRORS = 10 ; // ~5 minutes, by the above rules.
72+ // We will wait RETRY_SECONDS + (errorCount * RETRY_SECONDS_INC) before retrying after an error.
73+ // This means we attempt to recover relatively quickly but back off giving more time to recover
74+ // until we finally give up after MAX_EVENT_POLL_HTTP_ERRORS attempts.
75+ constexpr F32 EVENT_POLL_ERROR_RETRY_SECONDS = 15 .f; // ~ half of a normal timeout.
76+ constexpr F32 EVENT_POLL_ERROR_RETRY_SECONDS_INC = 5 .f; // ~ half of a normal timeout.
77+ constexpr S32 MAX_EVENT_POLL_HTTP_ERRORS = 10 ; // ~5 minutes, by the above rules.
78+ constexpr F64 MIN_SECONDS_PASSED = 10.0 ; // Minimum time we expect the server to hold the request.
8279
8380 int LLEventPollImpl::sNextCounter = 1 ;
8481
@@ -151,11 +148,17 @@ namespace Details
151148 LLSD acknowledge;
152149 int errorCount = 0 ;
153150 int counter = mCounter ; // saved on the stack for logging.
151+ LLTimer message_time;
154152
155153 LL_DEBUGS (" LLEventPollImpl" ) << " <" << counter << " > entering coroutine." << LL_ENDL;
156154
157155 mAdapter = httpAdapter;
158156
157+ // This is a loop with its own waitToRetry implementation,
158+ // so disable retries.
159+ LLCore::HttpOptions::ptr_t httpOpts (new LLCore::HttpOptions);
160+ httpOpts->setRetries (0 );
161+
159162 LL::WorkQueue::ptr_t main_queue = nullptr ;
160163
161164 // HACK -- grab the mainloop workqueue to move execution of the handler
@@ -172,11 +175,13 @@ namespace Details
172175 request[" ack" ] = acknowledge;
173176 request[" done" ] = mDone ;
174177
178+ message_time.reset ();
179+
175180// LL_DEBUGS("LLEventPollImpl::eventPollCoro") << "<" << counter << "> request = "
176181// << LLSDXMLStreamer(request) << LL_ENDL;
177182
178183 LL_DEBUGS (" LLEventPollImpl" ) << " <" << counter << " > posting and yielding." << LL_ENDL;
179- LLSD result = httpAdapter->postAndSuspend (mHttpRequest , url, request);
184+ LLSD result = httpAdapter->postAndSuspend (mHttpRequest , url, request, httpOpts );
180185
181186// LL_DEBUGS("LLEventPollImpl::eventPollCoro") << "<" << counter << "> result = "
182187// << LLSDXMLStreamer(result) << LL_ENDL;
@@ -194,25 +199,44 @@ namespace Details
194199
195200 if (!status)
196201 {
197- if (status == LLCore::HttpStatus (LLCore::HttpStatus::EXT_CURL_EASY, CURLE_OPERATION_TIMEDOUT))
198- { // A standard timeout response we get this when there are no events.
199- LL_DEBUGS (" LLEventPollImpl" ) << " All is very quiet on target server. It may have gone idle?" << LL_ENDL;
200- errorCount = 0 ;
201- continue ;
202+ if (status == LLCore::HttpStatus (LLCore::HttpStatus::EXT_CURL_EASY, CURLE_OPERATION_TIMEDOUT) // A standard timeout, no events.
203+ || status == LLCore::HttpStatus (HTTP_BAD_GATEWAY) // An expected 'No events' case.
204+ || status == LLCore::HttpStatus (HTTP_INTERNAL_ERROR)
205+ || status == LLCore::HttpStatus (HTTP_SERVICE_UNAVAILABLE)
206+ || status == LLCore::HttpStatus (HTTP_GATEWAY_TIME_OUT))
207+ {
208+ if (message_time.getElapsedSeconds () < MIN_SECONDS_PASSED)
209+ {
210+ // Server is supposed to hold request for 20 to 30 seconds.
211+ // If it didn't hold the request at least for 10s, treat as an error.
212+ LL_WARNS (" LLEventPollImpl" ) << " Response arrived too early, status: " << status.toTerseString ()
213+ << " , time passed: " << message_time.getElapsedSeconds () << LL_ENDL;
214+ }
215+ else
216+ {
217+ // Timeout, expected and means 'no events'. Request is to be re-issued immediately.
218+ // Current definition of a timeout is any of :
219+ // - libcurl easy 28 status code
220+ // - Linden 499 special http status code
221+ // - RFC - standard 502 - 504 http status codes
222+ LL_DEBUGS (" LLEventPollImpl" ) << " No events, from: " << mSenderIp <<" status: " << (S32)status.getStatus () << LL_ENDL;
223+ errorCount = 0 ;
224+ continue ;
225+ }
202226 }
203227 else if ((status == LLCore::HttpStatus (LLCore::HttpStatus::LLCORE, LLCore::HE_OP_CANCELED)) ||
204228 (status == LLCore::HttpStatus (HTTP_NOT_FOUND)))
205229 { // Event polling for this server has been canceled. In
206230 // some cases the server gets ahead of the viewer and will
207231 // return a 404 error (Not Found) before the cancel event
208232 // comes back in the queue
209- LL_WARNS (" LLEventPollImpl" ) << " Canceling coroutine" << LL_ENDL;
233+ LL_WARNS (" LLEventPollImpl" ) << " < " << counter << " > Canceling coroutine, status: " << status. toTerseString () << LL_ENDL;
210234 break ;
211235 }
212236 else if (!status.isHttpStatus ())
213237 {
214238 // / Some LLCore or LIBCurl error was returned. This is unlikely to be recoverable
215- LL_WARNS (" LLEventPollImpl" ) << " Critical error from poll request returned from libraries. Canceling coroutine." << LL_ENDL;
239+ LL_WARNS (" LLEventPollImpl" ) << " < " << counter << " > Critical error from poll request returned from libraries. Canceling coroutine." << LL_ENDL;
216240 break ;
217241 }
218242 LL_WARNS (" LLEventPollImpl" ) << " <" << counter << " > Error result from LLCoreHttpUtil::HttpCoroHandler. Code "
@@ -255,6 +279,10 @@ namespace Details
255279 LL_WARNS (" LLEventPollImpl" ) << " < " << counter << " > Forcing disconnect due to stalled main region event poll." << LL_ENDL;
256280 LLAppViewer::instance ()->forceDisconnect (LLTrans::getString (" AgentLostConnection" ));
257281 }
282+ else
283+ {
284+ LL_WARNS (" LLEventPollImpl" ) << " < " << counter << " > Stopping event poll for " << mSenderIp << " due to failures." << LL_ENDL;
285+ }
258286 break ;
259287 }
260288 }
0 commit comments