1919#
2020#
2121
22- from typing import Awaitable , cast
22+ from typing import Awaitable , Dict , cast
2323
2424from twisted .internet import defer
2525from twisted .internet .testing import MemoryReactorClock
2626
2727from synapse .logging .context import (
2828 LoggingContext ,
29+ PreserveLoggingContext ,
30+ current_context ,
2931 make_deferred_yieldable ,
3032 run_in_background ,
3133)
3739)
3840from synapse .util .clock import Clock
3941
42+ from tests .server import (
43+ get_clock ,
44+ )
45+
4046try :
47+ import opentracing
48+
4149 from synapse .logging .scopecontextmanager import LogContextScopeManager
4250except ImportError :
51+ opentracing = None # type: ignore
4352 LogContextScopeManager = None # type: ignore
4453
4554try :
@@ -65,7 +74,7 @@ class LogContextScopeManagerTestCase(TestCase):
6574 opentracing backend is Jaeger.
6675 """
6776
68- if LogContextScopeManager is None :
77+ if opentracing is None or LogContextScopeManager is None :
6978 skip = "Requires opentracing" # type: ignore[unreachable]
7079 if jaeger_client is None :
7180 skip = "Requires jaeger_client" # type: ignore[unreachable]
@@ -218,6 +227,167 @@ async def root() -> None:
218227 [scopes [1 ].span , scopes [2 ].span , scopes [0 ].span ],
219228 )
220229
230+ def test_run_in_background_active_scope_still_available (self ) -> None :
231+ """
232+ Test that tasks running via `run_in_background` still have access to the
233+ active tracing scope.
234+
235+ This is a regression test for a previous Synapse issue where the tracing scope
236+ would `__exit__` and close before the `run_in_background` task completed and our
237+ own previous custom `_LogContextScope.close(...)` would clear
238+ `LoggingContext.scope` preventing further tracing spans from having the correct
239+ parent.
240+ """
241+ reactor , clock = get_clock ()
242+
243+ scope_map : Dict [str , opentracing .Scope ] = {}
244+
245+ callback_finished = False
246+
247+ async def async_task () -> None :
248+ nonlocal callback_finished
249+ try :
250+ logger .info ("asdf async_task1 - %s" , current_context ().scope )
251+ root_scope = scope_map ["root" ]
252+ root_context = cast (jaeger_client .SpanContext , root_scope .span .context )
253+
254+ self .assertEqual (
255+ self ._tracer .active_span ,
256+ root_scope .span ,
257+ "expected to inherit the root tracing scope from where this was run" ,
258+ )
259+
260+ # Return control back to the reactor thread and wait an arbitrary amount
261+ await clock .sleep (4 )
262+
263+ logger .info ("asdf async_task2 - %s" , current_context ().scope )
264+
265+ # This is a key part of what we're testing! In a previous version of
266+ # Synapse, we would lose the active span at this point.
267+ self .assertEqual (
268+ self ._tracer .active_span ,
269+ root_scope .span ,
270+ "expected to still have a root tracing scope/span active" ,
271+ )
272+
273+ # For complete-ness sake, let's also trace more sub-tasks here and assert
274+ # they have the correct span parents as well (root)
275+
276+ # Start tracing some other sub-task.
277+ #
278+ # This is a key part of what we're testing! In a previous version of
279+ # Synapse, it would have the incorrect span parents.
280+ scope = start_active_span (
281+ "task1" ,
282+ tracer = self ._tracer ,
283+ )
284+ scope_map ["task1" ] = scope
285+
286+ # Ensure the span parent is pointing to the root scope
287+ context = cast (jaeger_client .SpanContext , scope .span .context )
288+ self .assertEqual (
289+ context .parent_id ,
290+ root_context .span_id ,
291+ "expected task1 parent to be the root span" ,
292+ )
293+
294+ # Ensure that the active span is our new sub-task now
295+ self .assertEqual (self ._tracer .active_span , scope .span )
296+ # Return control back to the reactor thread and wait an arbitrary amount
297+ await clock .sleep (4 )
298+ # We should still see the active span as the scope wasn't closed yet
299+ self .assertEqual (self ._tracer .active_span , scope .span )
300+ # Clock the scope using the context manager lifecycle (`scope.close()`)
301+ with scope :
302+ pass
303+ finally :
304+ # When exceptions happen, we still want to mark the callback as finished
305+ # so that the test can complete and we see the underlying error.
306+ callback_finished = True
307+
308+ async def root () -> None :
309+ with start_active_span (
310+ "root span" ,
311+ tracer = self ._tracer ,
312+ # We will close this off later. We're basically just mimicking the same
313+ # pattern for how we handle requests. We pass the span off to the
314+ # request for it to finish.
315+ finish_on_close = False ,
316+ ) as root_scope :
317+ scope_map ["root" ] = root_scope
318+ self .assertEqual (self ._tracer .active_span , root_scope .span )
319+
320+ logger .info ("asdf root1 - %s" , current_context ().scope )
321+
322+ # Fire-and-forget a task
323+ #
324+ # XXX: The root scope context manager will `__exit__` before this task
325+ # completes.
326+ run_in_background (async_task )
327+
328+ logger .info ("asdf root2 - %s" , current_context ().scope )
329+
330+ # Because we used `run_in_background`, the active span should still be
331+ # the root.
332+ self .assertEqual (self ._tracer .active_span , root_scope .span )
333+
334+ logger .info ("asdf root3 - %s" , current_context ().scope )
335+
336+ # We shouldn't see any active spans outside of the scope
337+ self .assertIsNone (self ._tracer .active_span )
338+
339+ with LoggingContext (name = "root logcontext" , server_name = "test_server" ):
340+ # Start the test off
341+ d_root = defer .ensureDeferred (root ())
342+
343+ # Pump the reactor until the tasks complete
344+ #
345+ # We're manually pumping the reactor (and causing any pending callbacks to
346+ # be called) so we need to be in the sentinel logcontext to avoid leaking
347+ # our current logcontext into the reactor (which would then get picked up
348+ # and associated with the next thing the reactor does). `with
349+ # PreserveLoggingContext()` will reset the logcontext to the sentinel while
350+ # we're pumping the reactor in the block and return us back to our current
351+ # logcontext after the block.
352+ with PreserveLoggingContext ():
353+ reactor .pump ((2 ,) * 8 )
354+ self .successResultOf (d_root )
355+
356+ self .assertTrue (
357+ callback_finished ,
358+ "Callback never finished which means the test probably didn't wait long enough" ,
359+ )
360+
361+ # After we see all of the tasks are done (like a request when it
362+ # `_finished_processing`), let's finish our root span
363+ scope_map ["root" ].span .finish ()
364+
365+ # Sanity check again: We shouldn't see any active spans leftover in this
366+ # this context.
367+ self .assertIsNone (self ._tracer .active_span )
368+
369+ # The spans should be reported in order of their finishing: task 1, task 2,
370+ # root.
371+ #
372+ # We use `assertIncludes` just as an easier way to see if items are missing or
373+ # added. We assert the order just below
374+ self .assertIncludes (
375+ set (self ._reporter .get_spans ()),
376+ {
377+ scope_map ["task1" ].span ,
378+ scope_map ["root" ].span ,
379+ },
380+ exact = True ,
381+ )
382+ # This is where we actually assert the correct order
383+ self .assertEqual (
384+ self ._reporter .get_spans (),
385+ [
386+ scope_map ["task1" ].span ,
387+ scope_map ["root" ].span ,
388+ ],
389+ )
390+
221391 def test_trace_decorator_sync (self ) -> None :
222392 """
223393 Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`
0 commit comments