@@ -389,6 +389,190 @@ func Gosched() {
389389 mcall (gosched_m )
390390}
391391
392+ // Yield cooperatively yields if, and only if, the scheduler is "busy".
393+ //
394+ // This can be called by any work wishing to utilize strictly spare capacity
395+ // while minimizing the degree to which it delays other work from being promptly
396+ // scheduled.
397+ //
398+ // Yield is intended to have very low overhead, particularly in its no-op case
399+ // where there is idle capacity in the scheduler and the caller does not need to
400+ // yield. This should allow it to be called often, such as in the body of tight
401+ // loops, in any tasks wishing to yield promptly to any waiting work.
402+ //
403+ // When there is waiting work, the yielding goroutine may briefly be rescheduled
404+ // after it, or may, in some cases, be parked in a waiting 'yield' state until
405+ // the scheduler next has spare capacity to resume it. Yield does not guarantee
406+ // fairness or starvation-prevention: once a goroutine Yields(), it may remain
407+ // parked until the scheduler next has idle capacity. This means Yield can block
408+ // for unbounded durations in the presence of sustained over-saturation; callers
409+ // are responsible for deciding where to Yield() to avoid priority inversions.
410+ //
411+ // Yield will never park if the calling goroutine is locked to an OS thread.
412+ func Yield () {
413+ // Common/fast case: do nothing if npidle is non-zero meaning there is
414+ // an idle P so no reason to yield this one. Doing only this check here keeps
415+ // Yield inlineable (~70 of 80 as of writing).
416+ if sched .npidle .Load () == 0 {
417+ maybeYield ()
418+ }
419+ }
420+
421+ // maybeYield is called by Yield if npidle is zero, meaning there are no idle Ps
422+ // and thus there may be work to which the caller should yield. Such work could
423+ // be on this local runq of the caller's P, on the global runq, in the runq of
424+ // some other P, or even in the form of ready conns waiting to be noticed by a
425+ // netpoll which would then ready runnable goroutines.
426+ //
427+ // Keeping this function extremely cheap is essential: it must be cheap enough
428+ // that callers can call it in very tight loops, as very frequent calls ensure a
429+ // task wishing to yield when work is waiting will do so promptly. Checking the
430+ // runq of every P or calling netpoll are too expensive to do in every call, so
431+ // given intent is to bound how long work may wait, such checks only need to be
432+ // performed after some amount of time has elapsed (e.g. 0.25ms). To minimize
433+ // overhead when called at a higher frequency, this elapsed time is checked with
434+ // an exponential backoff.
435+ //
436+ // runqs are checked directly with non-atomic reads rather than runqempty: being
437+ // cheap is our top priority and a microsecond of staleness is fine as long as
438+ // the check does not get optimized out of a calling loop body (hence noinline).
439+ //
440+ //go:noinline
441+ func maybeYield () {
442+ gp := getg ()
443+
444+ // Don't park while locked to an OS thread.
445+ if gp .lockedm != 0 {
446+ return
447+ }
448+
449+ // If the local P's runq ring buffer/next is non-empty, yield to waiting G.
450+ if p := gp .m .p .ptr (); p .runqhead != p .runqtail || p .runnext != 0 {
451+ // If there is work in the local P's runq, we can yield by just going to the
452+ // back of the local P's runq via goyield: this achieves the same goal of
453+ // letting waiting work run instead of us, but without parking on the global
454+ // yieldq and potentially switching Ps. While that's our preferred choice,
455+ // we want to avoid thrashing back and forth between multiple Yield-calling
456+ // goroutines: in such a case it is better to just park one so the other
457+ // stops seeing it in the queue and yielding to it. To detect and break this
458+ // cycle, we put a 1 in the yieldchecks field: if the other goroutine yields
459+ // right back, but is then still in this runq bringing us here again, we'll
460+ // see this 1 and park instead. We can clobber yieldchecks here since we're
461+ // actively yielding -- we don't need the counter to decide to do so. And
462+ // our sentinel will in turn be clobbered the very next time the time is put
463+ // in the upper bits, which it will be when they're zero if we don't yield,
464+ // so this sentinel should be relatively reliable in indicating thrashing.
465+ if gp .yieldchecks == 1 {
466+ yieldPark ()
467+ return
468+ }
469+ gp .yieldchecks = 1
470+ // Go to the back of the local runq.
471+ goyield ()
472+ return
473+ }
474+
475+ // If the global runq is non-empty, park in the global yieldq right away: that
476+ // is work someone needs to pick up and it might as well be our P. We could,
477+ // potentially, directly claim it here and goyield or equivalently to try to
478+ // remain on this P, but just parking and letting this P go to findRunnable
479+ // avoid duplication of its logic and seems good enough.
480+ if ! sched .runq .empty () {
481+ yieldPark ()
482+ return
483+ }
484+
485+ // We didn't find anything via cheap O(1) checks of our runq or global runq but
486+ // it is possible there are goroutines waiting in runqs of other Ps that are
487+ // not being stolen by an idle P -- the lack of idle Ps (npidle=0) is what got
488+ // us here. Furthermore, given the lack of idle Ps, it is also possible that
489+ // ready conns are waiting for a netpoll to notice them and ready their
490+ // goroutines i.e. work to which we should then yield. However, searching all
491+ // runqs, and even more so netpoll, is too expensive for every maybeYield
492+ // call: being extremely low overhead is essential to allowing Yield() to be
493+ // called at high enough frequency to make the caller respond to changing load
494+ // promptly.
495+ //
496+ // Given our main goal here is to reduce/bound *how long* work waits, we can
497+ // do more extensive/expensive checks searching all runqs / netpoll less often
498+ // but we still need to do them often "enough". Given our goal is to bound the
499+ // time that work may wait before a call to Yield detects it, the time elapsed
500+ // since the last check would be a good signal, but even checking nanotime()
501+ // on each call to measure this would be too expensive. Instead, we can check
502+ // nanotime() with an exponential backoff using a simple counter, to ensure we
503+ // avoid overly frequent time checks under higher call frequencies while still
504+ // checking the time often at lower frequencies.
505+ //
506+ // To implement such a time-based cap with elapsed-time checked on a subset of
507+ // calls, we can combine a call count and elapsed-time indicator into a single
508+ // uint32 on G: its 11 lower bits store a counter while the remaining 21 bits
509+ // store nanos quantized to 0.25ms "epochs" by discarding the lower 18 bits.
510+ // of a int64 nanotime() value. For counter values after increment of 2^k-1,
511+ // we check if the time -- quantized to 0.25ms -- has changed and if so move
512+ // to do the more throrough check for waiting work.
513+ //
514+ // Choosing 11 bits for a counter allows backing off to a rate of checking the
515+ // clock once every 1k calls if called extremely frequently; it seems unlikely
516+ // a caller would be able to call this at a frequency high enough to desire a
517+ // higher backoff. The 21 remaining bits allows ~9mins between rollover of
518+ // the epoch: the slim chance of a false negative is quite acceptable as if we
519+ // hit it, we just delay one check of the runqs by a quarter millisecond.
520+ const yieldCountBits , yieldCountMask = 11 , (1 << 11 ) - 1
521+ const yieldEpochShift = 18 - yieldCountBits // only need to shift by the differnce, then mask.
522+ gp .yieldchecks ++
523+ // Exp-backoff using 2^k-1 as when we check.
524+ if count := gp .yieldchecks & yieldCountMask ; (count & (count + 1 )) == 0 {
525+ prev := gp .yieldchecks &^ yieldCountMask
526+ now := uint32 (nanotime ()>> yieldEpochShift ) &^ yieldCountMask
527+ if now != prev {
528+ // Set yieldchecks to just new high timestamp bits, cleaning counter.
529+ gp .yieldchecks = now
530+
531+ // Check runqs of all Ps; if we find anything park free this P to steal.
532+ for i := range allp {
533+ // We don't need the extra accuracy (and cost) of runqempty here either;
534+ // Worst-case we'll yield a check later or maybe park and unpark.
535+ if allp [i ].runqhead != allp [i ].runqtail || allp [i ].runnext != 0 {
536+ yieldPark ()
537+ return
538+ }
539+ }
540+
541+ // Check netpoll; a ready conn is basically a runnable goroutine which we
542+ // would yield to if we saw it, but the lack of idle Ps may mean nobody is
543+ // checking this as often right now and there may be ready conns waiting.
544+ if netpollinited () && netpollAnyWaiters () && sched .lastpoll .Load () != 0 {
545+ var found bool
546+ systemstack (func () {
547+ if list , delta := netpoll (0 ); ! list .empty () {
548+ injectglist (& list )
549+ netpollAdjustWaiters (delta )
550+ found = true
551+ }
552+ })
553+ if found {
554+ goyield ()
555+ }
556+ }
557+ } else if count == yieldCountMask {
558+ // Counter overflow before hitting time; reset half way back.
559+ gp .yieldchecks = prev | (yieldCountMask / 2 )
560+ }
561+ }
562+ }
563+
564+ // yieldPark parks the current goroutine in a waiting state with reason yield
565+ // and puts it in the yieldq queue for findRunnable. A goroutine that has to
566+ // park to Yield is considered "waiting" rather than "runnable" as it is blocked
567+ // in this state until there is strictly spare execution capacity available to
568+ // resume it, unlike runnable goroutines which generally take runs running at
569+ // regular intervals. A parked yielded goroutine is more like being blocked on
570+ // a cond var or lock that will be signaled when we next detect spare capacity.
571+ func yieldPark () {
572+ checkTimeouts ()
573+ gopark (yield_put , nil , waitReasonYield , traceBlockPreempted , 1 )
574+ }
575+
392576// goschedguarded yields the processor like gosched, but also checks
393577// for forbidden states and opts out of the yield in those cases.
394578//
@@ -3546,6 +3730,23 @@ top:
35463730 }
35473731 }
35483732
3733+ // Nothing runnable, so check for yielded goroutines parked in yieldq.
3734+ if ! sched .yieldq .empty () {
3735+ lock (& sched .lock )
3736+ bg := sched .yieldq .pop ()
3737+ unlock (& sched .lock )
3738+ if bg != nil {
3739+ trace := traceAcquire ()
3740+ casgstatus (bg , _Gwaiting , _Grunnable )
3741+ if trace .ok () {
3742+ // Match other ready paths for trace visibility.
3743+ trace .GoUnpark (bg , 0 )
3744+ traceRelease (trace )
3745+ }
3746+ return bg , false , false
3747+ }
3748+ }
3749+
35493750 // We have nothing to do.
35503751 //
35513752 // If we're in the GC mark phase, can safely scan and blacken objects,
@@ -3616,6 +3817,12 @@ top:
36163817 }
36173818 return gp , false , false
36183819 }
3820+
3821+ // Re-check yieldq again, this time while holding sched.lock.
3822+ if ! sched .yieldq .empty () {
3823+ unlock (& sched .lock )
3824+ goto top
3825+ }
36193826 if ! mp .spinning && sched .needspinning .Load () == 1 {
36203827 // See "Delicate dance" comment below.
36213828 mp .becomeSpinning ()
@@ -7416,6 +7623,20 @@ func (q *gQueue) popList() gList {
74167623 return stack
74177624}
74187625
7626+ // yield_put is the gopark unlock function for Yield. It enqueues the goroutine
7627+ // onto the global yield queue. Returning true keeps the G parked until another
7628+ // part of the scheduler makes it runnable again. The G remains in _Gwaiting
7629+ // after this returns. Nothing else will find/ready this G in the interim since
7630+ // it isn't on a runq until we put it on the yieldq for findRunnable to find.
7631+ //
7632+ //go:nosplit
7633+ func yield_put (gp * g , _ unsafe.Pointer ) bool {
7634+ lock (& sched .lock )
7635+ sched .yieldq .pushBack (gp )
7636+ unlock (& sched .lock )
7637+ return true
7638+ }
7639+
74197640// A gList is a list of Gs linked through g.schedlink. A G can only be
74207641// on one gQueue or gList at a time.
74217642type gList struct {
0 commit comments