5454 freeRanges * freeRange // freeRanges is a linked list of free block ranges
5555 endBlock gcBlock // the block just past the end of the available space
5656 gcTotalAlloc uint64 // total number of bytes allocated
57- gcTotalBlocks uint64 // total number of allocated blocks
5857 gcMallocs uint64 // total number of allocations
59- gcFrees uint64 // total number of objects freed
60- gcFreedBlocks uint64 // total number of freed blocks
6158 gcLock task.PMutex // lock to avoid race conditions on multicore systems
6259)
6360
@@ -66,24 +63,28 @@ var zeroSizedAlloc uint8
6663
6764// Provide some abstraction over heap blocks.
6865
69- // blockState stores the four states in which a block can be. It is two bits in
70- // size.
66+ // blockState stores the four states in which a block can be.
67+ // It holds 1 bit in each nibble.
68+ // When stored into a state byte, each bit in a nibble corresponds to a different block.
69+ // For blocks A-D, a state byte would be laid out as 0bDCBA_DCBA.
7170type blockState uint8
7271
7372const (
74- blockStateFree blockState = 0 // 00
75- blockStateHead blockState = 1 // 01
76- blockStateTail blockState = 2 // 10
77- blockStateMark blockState = 3 // 11
78- blockStateMask blockState = 3 // 11
73+ blockStateLow blockState = 1
74+ blockStateHigh blockState = 1 << blocksPerStateByte
75+
76+ blockStateFree blockState = 0
77+ blockStateHead blockState = blockStateLow
78+ blockStateTail blockState = blockStateHigh
79+ blockStateMark blockState = blockStateLow | blockStateHigh
80+ blockStateMask blockState = blockStateLow | blockStateHigh
7981)
8082
83+ // blockStateEach is a mask that can be used to extract a nibble from the block state.
84+ const blockStateEach = 1 << blocksPerStateByte - 1
85+
8186// The byte value of a block where every block is a 'tail' block.
82- const blockStateByteAllTails = 0 |
83- uint8 (blockStateTail << (stateBits * 3 )) |
84- uint8 (blockStateTail << (stateBits * 2 )) |
85- uint8 (blockStateTail << (stateBits * 1 )) |
86- uint8 (blockStateTail << (stateBits * 0 ))
87+ const blockStateByteAllTails = byte (blockStateTail ) * blockStateEach
8788
8889// String returns a human-readable version of the block state, for debugging.
8990func (s blockState ) String () string {
@@ -180,7 +181,7 @@ func (b gcBlock) stateByte() byte {
180181// Return the block state given a state byte. The state byte must have been
181182// obtained using b.stateByte(), otherwise the result is incorrect.
182183func (b gcBlock ) stateFromByte (stateByte byte ) blockState {
183- return blockState (stateByte >> (( b % blocksPerStateByte ) * stateBits )) & blockStateMask
184+ return blockState (stateByte >> (b % blocksPerStateByte )) & blockStateMask
184185}
185186
186187// State returns the current block state.
@@ -193,38 +194,12 @@ func (b gcBlock) state() blockState {
193194// from head to mark.
194195func (b gcBlock ) setState (newState blockState ) {
195196 stateBytePtr := (* uint8 )(unsafe .Add (metadataStart , b / blocksPerStateByte ))
196- * stateBytePtr |= uint8 (newState << (( b % blocksPerStateByte ) * stateBits ))
197+ * stateBytePtr |= uint8 (newState << (b % blocksPerStateByte ))
197198 if gcAsserts && b .state () != newState {
198199 runtimePanic ("gc: setState() was not successful" )
199200 }
200201}
201202
202- // markFree sets the block state to free, no matter what state it was in before.
203- func (b gcBlock ) markFree () {
204- stateBytePtr := (* uint8 )(unsafe .Add (metadataStart , b / blocksPerStateByte ))
205- * stateBytePtr &^= uint8 (blockStateMask << ((b % blocksPerStateByte ) * stateBits ))
206- if gcAsserts && b .state () != blockStateFree {
207- runtimePanic ("gc: markFree() was not successful" )
208- }
209- if gcAsserts {
210- * (* [wordsPerBlock ]uintptr )(unsafe .Pointer (b .address ())) = [wordsPerBlock ]uintptr {}
211- }
212- }
213-
214- // unmark changes the state of the block from mark to head. It must be marked
215- // before calling this function.
216- func (b gcBlock ) unmark () {
217- if gcAsserts && b .state () != blockStateMark {
218- runtimePanic ("gc: unmark() on a block that is not marked" )
219- }
220- clearMask := blockStateMask ^ blockStateHead // the bits to clear from the state
221- stateBytePtr := (* uint8 )(unsafe .Add (metadataStart , b / blocksPerStateByte ))
222- * stateBytePtr &^= uint8 (clearMask << ((b % blocksPerStateByte ) * stateBits ))
223- if gcAsserts && b .state () != blockStateHead {
224- runtimePanic ("gc: unmark() was not successful" )
225- }
226- }
227-
228203// objHeader is a structure prepended to every heap object to hold metadata.
229204type objHeader struct {
230205 // next is the next object to scan after this.
@@ -441,7 +416,6 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer {
441416 // Update the total allocation counters.
442417 gcTotalAlloc += uint64 (rawSize )
443418 gcMallocs ++
444- gcTotalBlocks += uint64 (neededBlocks )
445419
446420 // Acquire a range of free blocks.
447421 var ranGC bool
@@ -586,10 +560,10 @@ func runGC() (freeBytes uintptr) {
586560
587561 // Sweep phase: free all non-marked objects and unmark marked objects for
588562 // the next collection cycle.
589- freeBytes = sweep ()
563+ sweep ()
590564
591565 // Rebuild the free ranges list.
592- buildFreeRanges ()
566+ freeBytes = buildFreeRanges ()
593567
594568 // Show how much has been sweeped, for debugging.
595569 if gcDebug {
@@ -700,45 +674,64 @@ func markRoot(addr, root uintptr) {
700674}
701675
702676// Sweep goes through all memory and frees unmarked memory.
703- // It returns how many bytes are free in the heap after the sweep.
704- func sweep () (freeBytes uintptr ) {
705- freeCurrentObject := false
706- var freed uint64
707- for block := gcBlock (0 ); block < endBlock ; block ++ {
708- switch block .state () {
709- case blockStateHead :
710- // Unmarked head. Free it, including all tail blocks following it.
711- block .markFree ()
712- freeCurrentObject = true
713- gcFrees ++
714- freed ++
715- case blockStateTail :
716- if freeCurrentObject {
717- // This is a tail object following an unmarked head.
718- // Free it now.
719- block .markFree ()
720- freed ++
721- }
722- case blockStateMark :
723- // This is a marked object. The next tail blocks must not be freed,
724- // but the mark bit must be removed so the next GC cycle will
725- // collect this object if it is unreferenced then.
726- block .unmark ()
727- freeCurrentObject = false
728- case blockStateFree :
729- freeBytes += bytesPerBlock
730- }
677+ func sweep () {
678+ metadataEnd := unsafe .Add (metadataStart , (endBlock + (blocksPerStateByte - 1 ))/ blocksPerStateByte )
679+ var carry byte
680+ for meta := metadataStart ; meta != metadataEnd ; meta = unsafe .Add (meta , 1 ) {
681+ // Fetch the state byte.
682+ stateBytePtr := (* byte )(unsafe .Pointer (meta ))
683+ stateByte := * stateBytePtr
684+
685+ // Separate blocks by type.
686+ // Split the nibbles.
687+ // Each nibble is a mask of blocks.
688+ high := stateByte >> blocksPerStateByte
689+ low := stateByte & blockStateEach
690+ // Marked heads are in both nibbles.
691+ markedHeads := low & high
692+ // Unmarked heads are in the low nibble but not the high nibble.
693+ unmarkedHeads := low &^ high
694+ // Tails are in the high nibble but not the low nibble.
695+ tails := high &^ low
696+
697+ // Clear all tail runs after unmarked (freed) heads.
698+ //
699+ // Adding 1 to the start of a bit run will clear the run and set the next bit:
700+ // (2^k - 1) + 1 = 2^k
701+ // e.g. 0b0011 + 1 = 0b0100
702+ // Bitwise-and with the original mask to clear the newly set bit.
703+ // e.g. (0b0011 + 1) & 0b0011 = 0b0100 & 0b0011 = 0b0000
704+ // This will not clear bits after the run because the gap stops the carry:
705+ // e.g. (0b1011 + 1) & 0b1011 = 0b1100 & 0b1011 = 0b1000
706+ // This can clear multiple runs in a single addition:
707+ // e.g. (0b1101 + 0b0101) & 0b1101 = 0b10010 & 0b1101 = 0b0000
708+ //
709+ // In order to find tail run starts after unmarked heads we could use tails & (unmarkedHeads << 1).
710+ // It is possible omit the bitwise-and because the clear still works if the next block is not a tail.
711+ // A head is not a tail, so corresponding missing tail bit will stop the carry from a previous tail run.
712+ // As such it will set the next bit which will be cleared back away later.
713+ // e.g. HHTH: (0b0010 + (0b1101 << 1)) & 0b0010 = 0b11100 & 0b0010 = 0b0000
714+ //
715+ // Treat the whole heap as a single pair of integer masks.
716+ // This is accomplished for addition by carrying the overflow to the next state byte.
717+ // The unmarkedHeads << 1 is equivalent to unmarkedHeads + unmarkedHeads, so it can be merged with the sum.
718+ // This does not require any special work for the bitwise-and because it operates bitwise.
719+ tailClear := tails + (unmarkedHeads << 1 ) + carry
720+ carry = tailClear >> blocksPerStateByte
721+ tails &= tailClear
722+
723+ // Construct the new state byte.
724+ * stateBytePtr = markedHeads | (tails << blocksPerStateByte )
731725 }
732- gcFreedBlocks += freed
733- freeBytes += uintptr (freed ) * bytesPerBlock
734- return
735726}
736727
737728// buildFreeRanges rebuilds the freeRanges list.
738729// This must be called after a GC sweep or heap grow.
739- func buildFreeRanges () {
730+ // It returns how many bytes are free in the heap.
731+ func buildFreeRanges () uintptr {
740732 freeRanges = nil
741733 block := endBlock
734+ var totalBlocks uintptr
742735 for {
743736 // Skip backwards over occupied blocks.
744737 for block > 0 && (block - 1 ).state () != blockStateFree {
@@ -755,13 +748,17 @@ func buildFreeRanges() {
755748 }
756749
757750 // Insert the free range.
758- insertFreeRange (block .pointer (), uintptr (end - block ))
751+ len := uintptr (end - block )
752+ totalBlocks += len
753+ insertFreeRange (block .pointer (), len )
759754 }
760755
761756 if gcDebug {
762757 println ("free ranges after rebuild:" )
763758 dumpFreeRangeCounts ()
764759 }
760+
761+ return totalBlocks * bytesPerBlock
765762}
766763
767764func dumpFreeRangeCounts () {
@@ -801,28 +798,75 @@ func dumpHeap() {
801798// call to ReadMemStats. This would not do GC implicitly for you.
802799func ReadMemStats (m * MemStats ) {
803800 gcLock .Lock ()
804- m .HeapIdle = 0
805- m .HeapInuse = 0
806- for block := gcBlock (0 ); block < endBlock ; block ++ {
807- bstate := block .state ()
808- if bstate == blockStateFree {
809- m .HeapIdle += uint64 (bytesPerBlock )
810- } else {
811- m .HeapInuse += uint64 (bytesPerBlock )
812- }
813- }
814- m .HeapReleased = 0 // always 0, we don't currently release memory back to the OS.
815- m .HeapSys = m .HeapInuse + m .HeapIdle
801+
802+ // Calculate the raw size of the heap.
803+ heapEnd := heapEnd
804+ heapStart := heapStart
805+ m .Sys = uint64 (heapEnd - heapStart )
806+ m .HeapSys = uint64 (uintptr (metadataStart ) - heapStart )
807+ metadataStart := metadataStart
808+ // TODO: should GCSys include objHeaders?
816809 m .GCSys = uint64 (heapEnd - uintptr (metadataStart ))
817- m .TotalAlloc = gcTotalAlloc
810+ m .HeapReleased = 0 // always 0, we don't currently release memory back to the OS.
811+
812+ // Count live heads and tails.
813+ var liveHeads , liveTails uintptr
814+ endBlock := endBlock
815+ metadataEnd := unsafe .Add (metadataStart , (endBlock + (blocksPerStateByte - 1 ))/ blocksPerStateByte )
816+ for meta := metadataStart ; meta != metadataEnd ; meta = unsafe .Add (meta , 1 ) {
817+ // Since we are outside of a GC, nothing is marked.
818+ // A bit in the low nibble implies a head.
819+ // A bit in the high nibble implies a tail.
820+ stateByte := * (* byte )(unsafe .Pointer (meta ))
821+ liveHeads += uintptr (count4LUT [stateByte & blockStateEach ])
822+ liveTails += uintptr (count4LUT [stateByte >> blocksPerStateByte ])
823+ }
824+
825+ // Add heads and tails to count live blocks.
826+ liveBlocks := liveHeads + liveTails
827+ liveBytes := uint64 (liveBlocks * bytesPerBlock )
828+ m .HeapInuse = liveBytes
829+ m .HeapAlloc = liveBytes
830+ m .Alloc = liveBytes
831+
832+ // Subtract live blocks from total blocks to count free blocks.
833+ freeBlocks := uintptr (endBlock ) - liveBlocks
834+ m .HeapIdle = uint64 (freeBlocks * bytesPerBlock )
835+
836+ // Record the number of allocated objects.
837+ gcMallocs := gcMallocs
818838 m .Mallocs = gcMallocs
819- m .Frees = gcFrees
820- m .Sys = uint64 (heapEnd - heapStart )
821- m .HeapAlloc = (gcTotalBlocks - gcFreedBlocks ) * uint64 (bytesPerBlock )
822- m .Alloc = m .HeapAlloc
839+
840+ // Subtract live objects from allocated objects to count freed objects.
841+ m .Frees = gcMallocs - uint64 (liveHeads )
842+
843+ // Record the total allocated bytes.
844+ m .TotalAlloc = gcTotalAlloc
845+
823846 gcLock .Unlock ()
824847}
825848
849+ // count4LUT is a lookup table used to count set bits in a 4-bit mask.
850+ // TODO: replace with popcnt when available
851+ var count4LUT = [16 ]uint8 {
852+ 0b0000 : 0 ,
853+ 0b0001 : 1 ,
854+ 0b0010 : 1 ,
855+ 0b0011 : 2 ,
856+ 0b0100 : 1 ,
857+ 0b0101 : 2 ,
858+ 0b0110 : 2 ,
859+ 0b0111 : 3 ,
860+ 0b1000 : 1 ,
861+ 0b1001 : 2 ,
862+ 0b1010 : 2 ,
863+ 0b1011 : 3 ,
864+ 0b1100 : 2 ,
865+ 0b1101 : 3 ,
866+ 0b1110 : 3 ,
867+ 0b1111 : 4 ,
868+ }
869+
826870func SetFinalizer (obj interface {}, finalizer interface {}) {
827871 // Unimplemented.
828872}
0 commit comments