gcDrain函数扫描完根对象, 就会开始消费标记队列, 对从标记队列中取出的对象调用scanobject函数:
代码语言:javascript复制// scanobject scans the object starting at b, adding pointers to gcw.
// b must point to the beginning of a heap object or an oblet.
// scanobject consults the GC bitmap for the pointer mask and the
// spans for the size of the object.
//
//go:nowritebarrier
func scanobject(b uintptr, gcw *gcWork) {
// Note that arena_used may change concurrently during
// scanobject and hence scanobject may encounter a pointer to
// a newly allocated heap object that is *not* in
// [start,used). It will not mark this object; however, we
// know that it was just installed by a mutator, which means
// that mutator will execute a write barrier and take care of
// marking it. This is even more pronounced on relaxed memory
// architectures since we access arena_used without barriers
// or synchronization, but the same logic applies.
arena_start := mheap_.arena_start
arena_used := mheap_.arena_used
// Find the bits for b and the size of the object at b.
//
// b is either the beginning of an object, in which case this
// is the size of the object to scan, or it points to an
// oblet, in which case we compute the size to scan below.
// 获取对象对应的bitmap
hbits := heapBitsForAddr(b)
// 获取对象所在的span
s := spanOfUnchecked(b)
// 获取对象的大小
n := s.elemsize
if n == 0 {
throw("scanobject n == 0")
}
// 对象大小过大时(maxObletBytes是128KB)需要分割扫描
// 每次最多只扫描128KB
if n > maxObletBytes {
// Large object. Break into oblets for better
// parallelism and lower latency.
if b == s.base() {
// It's possible this is a noscan object (not
// from greyobject, but from other code
// paths), in which case we must *not* enqueue
// oblets since their bitmaps will be
// uninitialized.
if s.spanclass.noscan() {
// Bypass the whole scan.
gcw.bytesMarked = uint64(n)
return
}
// Enqueue the other oblets to scan later.
// Some oblets may be in b's scalar tail, but
// these will be marked as "no more pointers",
// so we'll drop out immediately when we go to
// scan those.
for oblet := b maxObletBytes; oblet < s.base() s.elemsize; oblet = maxObletBytes {
if !gcw.putFast(oblet) {
gcw.put(oblet)
}
}
}
// Compute the size of the oblet. Since this object
// must be a large object, s.base() is the beginning
// of the object.
n = s.base() s.elemsize - b
if n > maxObletBytes {
n = maxObletBytes
}
}
// 扫描对象中的指针
var i uintptr
for i = 0; i < n; i = sys.PtrSize {
// 获取对应的bit
// Find bits for this word.
if i != 0 {
// Avoid needless hbits.next() on last iteration.
hbits = hbits.next()
}
// Load bits once. See CL 22712 and issue 16973 for discussion.
bits := hbits.bits()
// 检查scan bit判断是否继续扫描, 注意第二个scan bit是checkmark
// During checkmarking, 1-word objects store the checkmark
// in the type bit for the one word. The only one-word objects
// are pointers, or else they'd be merged with other non-pointer
// data into larger allocations.
if i != 1*sys.PtrSize && bits&bitScan == 0 {
break // no more pointers in this object
}
// 检查pointer bit, 不是指针则继续
if bits&bitPointer == 0 {
continue // not a pointer
}
// 取出指针的值
// Work here is duplicated in scanblock and above.
// If you make changes here, make changes there too.
obj := *(*uintptr)(unsafe.Pointer(b i))
// 如果指针在arena区域中, 则调用greyobject标记对象并把对象放到标记队列中
// At this point we have extracted the next potential pointer.
// Check if it points into heap and not back at the current object.
if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
// Mark the object.
if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw, objIndex)
}
}
}
// 统计扫描过的大小和对象数量
gcw.bytesMarked = uint64(n)
gcw.scanWork = int64(i)
}
在所有后台标记任务都把标记队列消费完毕时, 会执行gcMarkDone函数准备进入完成标记阶段(mark termination): 在并行GC中gcMarkDone会被执行两次, 第一次会禁止本地标记队列然后重新开始后台标记任务, 第二次会进入完成标记阶段(mark termination)。
代码语言:javascript复制// gcMarkDone transitions the GC from mark 1 to mark 2 and from mark 2
// to mark termination.
//
// This should be called when all mark work has been drained. In mark
// 1, this includes all root marking jobs, global work buffers, and
// active work buffers in assists and background workers; however,
// work may still be cached in per-P work buffers. In mark 2, per-P
// caches are disabled.
//
// The calling context must be preemptible.
//
// Note that it is explicitly okay to have write barriers in this
// function because completion of concurrent mark is best-effort
// anyway. Any work created by write barriers here will be cleaned up
// by mark termination.
func gcMarkDone() {
top:
semacquire(&work.markDoneSema)
// Re-check transition condition under transition lock.
if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) {
semrelease(&work.markDoneSema)
return
}
// 暂时禁止启动新的后台标记任务
// Disallow starting new workers so that any remaining workers
// in the current mark phase will drain out.
//
// TODO(austin): Should dedicated workers keep an eye on this
// and exit gcDrain promptly?
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, -0xffffffff)
atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, -0xffffffff)
// 判断本地标记队列是否已禁用
if !gcBlackenPromptly {
// 本地标记队列是否未禁用, 禁用然后重新开始后台标记任务
// Transition from mark 1 to mark 2.
//
// The global work list is empty, but there can still be work
// sitting in the per-P work caches.
// Flush and disable work caches.
// 禁用本地标记队列
// Disallow caching workbufs and indicate that we're in mark 2.
gcBlackenPromptly = true
// Prevent completion of mark 2 until we've flushed
// cached workbufs.
atomic.Xadd(&work.nwait, -1)
// GC is set up for mark 2. Let Gs blocked on the
// transition lock go while we flush caches.
semrelease(&work.markDoneSema)
// 把所有本地标记队列中的对象都推到全局标记队列
systemstack(func() {
// Flush all currently cached workbufs and
// ensure all Ps see gcBlackenPromptly. This
// also blocks until any remaining mark 1
// workers have exited their loop so we can
// start new mark 2 workers.
forEachP(func(_p_ *p) {
_p_.gcw.dispose()
})
})
// 除错用
// Check that roots are marked. We should be able to
// do this before the forEachP, but based on issue
// #16083 there may be a (harmless) race where we can
// enter mark 2 while some workers are still scanning
// stacks. The forEachP ensures these scans are done.
//
// TODO(austin): Figure out the race and fix this
// properly.
gcMarkRootCheck()
// 允许启动新的后台标记任务
// Now we can start up mark 2 workers.
atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 0xffffffff)
atomic.Xaddint64(&gcController.fractionalMarkWorkersNeeded, 0xffffffff)
// 如果确定没有更多的任务则可以直接跳到函数顶部
// 这样就当作是第二次调用了
incnwait := atomic.Xadd(&work.nwait, 1)
if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
// This loop will make progress because
// gcBlackenPromptly is now true, so it won't
// take this same "if" branch.
goto top
}
} else {
// 记录完成标记阶段开始的时间和STW开始的时间
// Transition to mark termination.
now := nanotime()
work.tMarkTerm = now
work.pauseStart = now
// 禁止G被抢占
getg().m.preemptoff = "gcing"
// 停止所有运行中的G, 并禁止它们运行
systemstack(stopTheWorldWithSema)
// !!!!!!!!!!!!!!!!
// 世界已停止(STW)...
// !!!!!!!!!!!!!!!!
// The gcphase is _GCmark, it will transition to _GCmarktermination
// below. The important thing is that the wb remains active until
// all marking is complete. This includes writes made by the GC.
// 标记对根对象的扫描已完成, 会影响gcMarkRootPrepare中的处理
// Record that one root marking pass has completed.
work.markrootDone = true
// 禁止辅助GC和后台标记任务的运行
// Disable assists and background workers. We must do
// this before waking blocked assists.
atomic.Store(&gcBlackenEnabled, 0)
// 唤醒所有因为辅助GC而休眠的G
// Wake all blocked assists. These will run when we
// start the world again.
gcWakeAllAssists()
// Likewise, release the transition lock. Blocked
// workers and assists will run when we start the
// world again.
semrelease(&work.markDoneSema)
// 计算下一次触发gc需要的heap大小
// endCycle depends on all gcWork cache stats being
// flushed. This is ensured by mark 2.
nextTriggerRatio := gcController.endCycle()
// 进入完成标记阶段, 会重新启动世界
// Perform mark termination. This will restart the world.
gcMarkTermination(nextTriggerRatio)
}
}
gcMarkTermination函数会进入完成标记阶段:
代码语言:javascript复制func gcMarkTermination(nextTriggerRatio float64) {
// World is stopped.
// Start marktermination which includes enabling the write barrier.
// 禁止辅助GC和后台标记任务的运行
atomic.Store(&gcBlackenEnabled, 0)
// 重新允许本地标记队列(下次GC使用)
gcBlackenPromptly = false
// 设置当前GC阶段到完成标记阶段, 并启用写屏障
setGCPhase(_GCmarktermination)
// 记录开始时间
work.heap1 = memstats.heap_live
startTime := nanotime()
// 禁止G被抢占
mp := acquirem()
mp.preemptoff = "gcing"
_g_ := getg()
_g_.m.traceback = 2
// 设置G的状态为等待中这样它的栈可以被扫描
gp := _g_.m.curg
casgstatus(gp, _Grunning, _Gwaiting)
gp.waitreason = "garbage collection"
// 切换到g0运行
// Run gc on the g0 stack. We do this so that the g stack
// we're currently running on will no longer change. Cuts
// the root set down a bit (g0 stacks are not scanned, and
// we don't need to scan gc's internal state). We also
// need to switch to g0 so we can shrink the stack.
systemstack(func() {
// 开始STW中的标记
gcMark(startTime)
// 必须立刻返回, 因为外面的G的栈有可能被移动, 不能在这之后访问外面的变量
// Must return immediately.
// The outer function's stack may have moved
// during gcMark (it shrinks stacks, including the
// outer function's stack), so we must not refer
// to any of its variables. Return back to the
// non-system stack to pick up the new addresses
// before continuing.
})
// 重新切换到g0运行
systemstack(func() {
work.heap2 = work.bytesMarked
// 如果启用了checkmark则执行检查, 检查是否所有可到达的对象都有标记
if debug.gccheckmark > 0 {
// Run a full stop-the-world mark using checkmark bits,
// to check that we didn't forget to mark anything during
// the concurrent mark process.
gcResetMarkState()
initCheckmarks()
gcMark(startTime)
clearCheckmarks()
}
// 设置当前GC阶段到关闭, 并禁用写屏障
// marking is complete so we can turn the write barrier off
setGCPhase(_GCoff)
// 唤醒后台清扫任务, 将在STW结束后开始运行
gcSweep(work.mode)
// 除错用
if debug.gctrace > 1 {
startTime = nanotime()
// The g stacks have been scanned so
// they have gcscanvalid==true and gcworkdone==true.
// Reset these so that all stacks will be rescanned.
gcResetMarkState()
finishsweep_m()
// Still in STW but gcphase is _GCoff, reset to _GCmarktermination
// At this point all objects will be found during the gcMark which
// does a complete STW mark and object scan.
setGCPhase(_GCmarktermination)
gcMark(startTime)
setGCPhase(_GCoff) // marking is done, turn off wb.
gcSweep(work.mode)
}
})
// 设置G的状态为运行中
_g_.m.traceback = 0
casgstatus(gp, _Gwaiting, _Grunning)
// 跟踪处理
if trace.enabled {
traceGCDone()
}
// all done
mp.preemptoff = ""
if gcphase != _GCoff {
throw("gc done but gcphase != _GCoff")
}
// 更新下一次触发gc需要的heap大小(gc_trigger)
// Update GC trigger and pacing for the next cycle.
gcSetTriggerRatio(nextTriggerRatio)
// 更新用时记录
// Update timing memstats
now := nanotime()
sec, nsec, _ := time_now()
unixNow := sec*1e9 int64(nsec)
work.pauseNS = now - work.pauseStart
work.tEnd = now
atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user
atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us
memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS)
memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(unixNow)
memstats.pause_total_ns = uint64(work.pauseNS)
// 更新所用cpu记录
// Update work.totaltime.
sweepTermCpu := int64(work.stwprocs) * (work.tMark - work.tSweepTerm)
// We report idle marking time below, but omit it from the
// overall utilization here since it's "free".
markCpu := gcController.assistTime gcController.dedicatedMarkTime gcController.fractionalMarkTime
markTermCpu := int64(work.stwprocs) * (work.tEnd - work.tMarkTerm)
cycleCpu := sweepTermCpu markCpu markTermCpu
work.totaltime = cycleCpu
// Compute overall GC CPU utilization.
totalCpu := sched.totaltime (now-sched.procresizetime)*int64(gomaxprocs)
memstats.gc_cpu_fraction = float64(work.totaltime) / float64(totalCpu)
// 重置清扫状态
// Reset sweep state.
sweep.nbgsweep = 0
sweep.npausesweep = 0
// 统计强制开始GC的次数
if work.userForced {
memstats.numforcedgc
}
// 统计执行GC的次数然后唤醒等待清扫的G
// Bump GC cycle count and wake goroutines waiting on sweep.
lock(&work.sweepWaiters.lock)
memstats.numgc
injectglist(work.sweepWaiters.head.ptr())
work.sweepWaiters.head = 0
unlock(&work.sweepWaiters.lock)
// 性能统计用
// Finish the current heap profiling cycle and start a new
// heap profiling cycle. We do this before starting the world
// so events don't leak into the wrong cycle.
mProf_NextCycle()
// 重新启动世界
systemstack(startTheWorldWithSema)
// !!!!!!!!!!!!!!!
// 世界已重新启动...
// !!!!!!!!!!!!!!!
// 性能统计用
// Flush the heap profile so we can start a new cycle next GC.
// This is relatively expensive, so we don't do it with the
// world stopped.
mProf_Flush()
// 移动标记队列使用的缓冲区到自由列表, 使得它们可以被回收
// Prepare workbufs for freeing by the sweeper. We do this
// asynchronously because it can take non-trivial time.
prepareFreeWorkbufs()
// 释放未使用的栈
// Free stack spans. This must be done between GC cycles.
systemstack(freeStackSpans)
// 除错用
// Print gctrace before dropping worldsema. As soon as we drop
// worldsema another cycle could start and smash the stats
// we're trying to print.
if debug.gctrace > 0 {
util := int(memstats.gc_cpu_fraction * 100)
var sbuf [24]byte
printlock()
print("gc ", memstats.numgc,
" @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ",
util, "%: ")
prev := work.tSweepTerm
for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} {
if i != 0 {
print(" ")
}
print(string(fmtNSAsMS(sbuf[:], uint64(ns-prev))))
prev = ns
}
print(" ms clock, ")
for i, ns := range []int64{sweepTermCpu, gcController.assistTime, gcController.dedicatedMarkTime gcController.fractionalMarkTime, gcController.idleMarkTime, markTermCpu} {
if i == 2 || i == 3 {
// Separate mark time components with /.
print("/")
} else if i != 0 {
print(" ")
}
print(string(fmtNSAsMS(sbuf[:], uint64(ns))))
}
print(" ms cpu, ",
work.heap0>>20, "->", work.heap1>>20, "->", work.heap2>>20, " MB, ",
work.heapGoal>>20, " MB goal, ",
work.maxprocs, " P")
if work.userForced {
print(" (forced)")
}
print("n")
printunlock()
}
semrelease(&worldsema)
// Careful: another GC cycle may start now.
// 重新允许当前的G被抢占
releasem(mp)
mp = nil
// 如果是并行GC, 让当前M继续运行(会回到gcBgMarkWorker然后休眠)
// 如果不是并行GC, 则让当前M开始调度
// now that gc is done, kick off finalizer thread if needed
if !concurrentSweep {
// give the queued finalizers, if any, a chance to run
Gosched()
}
}
gcSweep函数会唤醒后台清扫任务: 后台清扫任务会在程序启动时调用的gcenable函数中启动.
代码语言:javascript复制func gcSweep(mode gcMode) {
if gcphase != _GCoff {
throw("gcSweep being done but phase is not GCoff")
}
// 增加sweepgen, 这样sweepSpans中两个队列角色会交换, 所有span都会变为"待清扫"的span
lock(&mheap_.lock)
mheap_.sweepgen = 2
mheap_.sweepdone = 0
if mheap_.sweepSpans[mheap_.sweepgen/2%2].index != 0 {
// We should have drained this list during the last
// sweep phase. We certainly need to start this phase
// with an empty swept list.
throw("non-empty swept list")
}
mheap_.pagesSwept = 0
unlock(&mheap_.lock)
// 如果非并行GC则在这里完成所有工作(STW中)
if !_ConcurrentSweep || mode == gcForceBlockMode {
// Special case synchronous sweep.
// Record that no proportional sweeping has to happen.
lock(&mheap_.lock)
mheap_.sweepPagesPerByte = 0
unlock(&mheap_.lock)
// Sweep all spans eagerly.
for sweepone() != ^uintptr(0) {
sweep.npausesweep
}
// Free workbufs eagerly.
prepareFreeWorkbufs()
for freeSomeWbufs(false) {
}
// All "free" events for this mark/sweep cycle have
// now happened, so we can make this profile cycle
// available immediately.
mProf_NextCycle()
mProf_Flush()
return
}
// 唤醒后台清扫任务
// Background sweep.
lock(&sweep.lock)
if sweep.parked {
sweep.parked = false
ready(sweep.g, 0, true)
}
unlock(&sweep.lock)
}
后台清扫任务的函数是bgsweep:
代码语言:javascript复制func bgsweep(c chan int) {
sweep.g = getg()
// 等待唤醒
lock(&sweep.lock)
sweep.parked = true
c <- 1
goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1)
// 循环清扫
for {
// 清扫一个span, 然后进入调度(一次只做少量工作)
for gosweepone() != ^uintptr(0) {
sweep.nbgsweep
Gosched()
}
// 释放一些未使用的标记队列缓冲区到heap
for freeSomeWbufs(true) {
Gosched()
}
// 如果清扫未完成则继续循环
lock(&sweep.lock)
if !gosweepdone() {
// This can happen if a GC runs between
// gosweepone returning ^0 above
// and the lock being acquired.
unlock(&sweep.lock)
continue
}
// 否则让后台清扫任务进入休眠, 当前M继续调度
sweep.parked = true
goparkunlock(&sweep.lock, "GC sweep wait", traceEvGoBlock, 1)
}
}
gosweepone函数会从sweepSpans中取出单个span清扫:
代码语言:javascript复制//go:nowritebarrier
func gosweepone() uintptr {
var ret uintptr
// 切换到g0运行
systemstack(func() {
ret = sweepone()
})
return ret
}
sweepone函数如下:
代码语言:javascript复制// sweeps one span
// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
//go:nowritebarrier
func sweepone() uintptr {
_g_ := getg()
sweepRatio := mheap_.sweepPagesPerByte // For debugging
// 禁止G被抢占
// increment locks to ensure that the goroutine is not preempted
// in the middle of sweep thus leaving the span in an inconsistent state for next GC
_g_.m.locks
// 检查是否已完成清扫
if atomic.Load(&mheap_.sweepdone) != 0 {
_g_.m.locks--
return ^uintptr(0)
}
// 更新同时执行sweep的任务数量
atomic.Xadd(&mheap_.sweepers, 1)
npages := ^uintptr(0)
sg := mheap_.sweepgen
for {
// 从sweepSpans中取出一个span
s := mheap_.sweepSpans[1-sg/2%2].pop()
// 全部清扫完毕时跳出循环
if s == nil {
atomic.Store(&mheap_.sweepdone, 1)
break
}
// 其他M已经在清扫这个span时跳过
if s.state != mSpanInUse {
// This can happen if direct sweeping already
// swept this span, but in that case the sweep
// generation should always be up-to-date.
if s.sweepgen != sg {
print("runtime: bad span s.state=", s.state, " s.sweepgen=", s.sweepgen, " sweepgen=", sg, "n")
throw("non in-use span in unswept list")
}
continue
}
// 原子增加span的sweepgen, 失败表示其他M已经开始清扫这个span, 跳过
if s.sweepgen != sg-2 || !atomic.Cas(&s.sweepgen, sg-2, sg-1) {
continue
}
// 清扫这个span, 然后跳出循环
npages = s.npages
if !s.sweep(false) {
// Span is still in-use, so this returned no
// pages to the heap and the span needs to
// move to the swept in-use list.
npages = 0
}
break
}
// 更新同时执行sweep的任务数量
// Decrement the number of active sweepers and if this is the
// last one print trace information.
if atomic.Xadd(&mheap_.sweepers, -1) == 0 && atomic.Load(&mheap_.sweepdone) != 0 {
if debug.gcpacertrace > 0 {
print("pacer: sweep done at heap size ", memstats.heap_live>>20, "MB; allocated ", (memstats.heap_live-mheap_.sweepHeapLiveBasis)>>20, "MB during sweep; swept ", mheap_.pagesSwept, " pages at ", sweepRatio, " pages/byten")
}
}
// 允许G被抢占
_g_.m.locks--
// 返回清扫的页数
return npages
}
版权申明:内容来源网络,版权归原创者所有。除非无法确认,我们都会标明作者及出处,如有侵权烦请告知,我们会立即删除并表示歉意。谢谢。