通过go创建一个goroutine。底层通过runtime.newproc()创建。
创建一个goroutine包括两部分:
go my_goroutine();
//对应汇编
0x0034 00052 (mygoroutine.go:13) CALL runtime.newproc(SB)
切换到系统栈(g0,或者gsinal),执行newproc1创建一个p。
func newproc(siz int32, fn *funcval) {
argp := add(unsafe.Pointer(&fn), sys.PtrSize)
gp := getg()
pc := getcallerpc()
systemstack(func() {
newproc1(fn, (*uint8)(argp), siz, gp, pc)
})
}
切换到系统栈,然后调用传入函数。
// systemstack runs fn on a system stack.
// If systemstack is called from the per-OS-thread (g0) stack, or
// if systemstack is called from the signal handling (gsignal) stack,
// systemstack calls fn directly and returns.
// Otherwise, systemstack is being called from the limited stack
// of an ordinary goroutine. In this case, systemstack switches
// to the per-OS-thread stack, calls fn, and switches back.
// It is common to use a func literal as the argument, in order
// to share inputs and outputs with the code around the call
// to system stack:
//
// ... set up y ...
// systemstack(func() {
// x = bigcall(y)
// })
// ... use x ...
//
//go:noescape
func systemstack(fn func())
// func systemstack(fn func())
TEXT runtime·systemstack(SB), NOSPLIT, $0-8
MOVD fn+0(FP), R3 // R3 = fn
MOVD R3, R26 // context
MOVD g_m(g), R4 // R4 = m
// 判断当前的g是否gsignal
MOVD m_gsignal(R4), R5 // R5 = gsignal
CMP g, R5
BEQ noswitch //在当前栈直接执行
// 判断当前的g是否g0
MOVD m_g0(R4), R5 // R5 = g0
CMP g, R5
BEQ noswitch //在当前栈直接执行
// 不是gsignal,也不是g0,栈切换到g0
MOVD m_curg(R4), R6
CMP g, R6
BEQ switch
// 异常处理,进程退出
// Bad: g is not gsignal, not g0, not curg. What is it?
// Hide call from linker nosplit analysis.
MOVD $runtime·badsystemstack(SB), R3
BL (R3)
B runtime·abort(SB)
switch:
// save our state in g->sched. Pretend to
// be systemstack_switch if the G stack is scanned.
MOVD $runtime·systemstack_switch(SB), R6
ADD $8, R6 // get past prologue
MOVD R6, (g_sched+gobuf_pc)(g)
MOVD RSP, R0
MOVD R0, (g_sched+gobuf_sp)(g)
MOVD R29, (g_sched+gobuf_bp)(g)
MOVD $0, (g_sched+gobuf_lr)(g)
MOVD g, (g_sched+gobuf_g)(g)
// switch to g0
MOVD R5, g
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R3
// make it look like mstart called systemstack on g0, to stop traceback
SUB $16, R3
AND $~15, R3
MOVD $runtime·mstart(SB), R4
MOVD R4, 0(R3)
MOVD R3, RSP
MOVD (g_sched+gobuf_bp)(g), R29
// call target function
MOVD 0(R26), R3 // code pointer
BL (R3)
// switch back to g
MOVD g_m(g), R3
MOVD m_curg(R3), g
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R0
MOVD R0, RSP
MOVD (g_sched+gobuf_bp)(g), R29
MOVD $0, (g_sched+gobuf_sp)(g)
MOVD $0, (g_sched+gobuf_bp)(g)
RET
noswitch:
// already on m stack, just call directly
// Using a tail call here cleans up tracebacks since we won't stop
// at an intermediate systemstack.
MOVD 0(R26), R3 // code pointer
MOVD.P 16(RSP), R30 // restore LR
SUB $8, RSP, R29 // restore FP
B (R3)
分三步:
runtime.main()
,对于业务代码来说,就是go指定的函数)。_Gdead
–>_Grunnable
,将新创建的g放入等待运行队列。如果本地队列满了,则将本地队列一半的g放入全局队列。wakep()
。startm(nil, true)
。sched.pidle
为空,则什么也不做。// Create a new g running fn with narg bytes of arguments starting
// at argp. callerpc is the address of the go statement that created
// this. The new g is put on the queue of g's waiting to run.
func newproc1(fn *funcval, argp *uint8, narg int32, callergp *g, callerpc uintptr) {
// 1. 参数大小校验
if siz >= _StackMin-4*sys.RegSize-sys.RegSize {
throw("newproc: function arguments too large for new goroutine")
}
// 2. 从本地、或者全局gfree列表获取g对象,并分配栈空间
_p_ := _g_.m.p.ptr()
newg := gfget(_p_)
// 3. 如本地、全局gfree都为空,则新创建一个g对象,并且分配栈空间。设置g状态_Gidle-->_Gdead。添加到全局g列表allgs。
if newg == nil {
newg = malg(_StackMin)
// newg := new(g)
casgstatus(newg, _Gidle, _Gdead)
allgadd(newg)
}
// 4. 原子读取g的状态,新的g对象状态必须是 _Gdead 状态,否则抛出异常。
// 5. 准备goroutine执行环节,将gobuf.pc设置为fn。
gostartcallfn(&newg.sched, fn)
// 保存调用者地址
newg.gopc = callerpc
// 6. 如果是系统goroutine,系统计数器+1.
// 7. 设置goroutine状态,_Gdead-->_Grunnable
casgstatus(newg, _Gdead, _Grunnable)
7.1.这里会循环等待将状态从_Gdead修改为_Grunnable。
7.2 如果状态直接由_Gdead修改为_Grunnable,则进程空执行10个CPU周期。
TEXT runtime·procyield(SB),NOSPLIT,$0-0
MOVL cycles+0(FP), AX
again:
PAUSE
SUBL $1, AX
JNZ again
RET
7.3 如果g当前状态不等于_Gdead,则调用系统线程调用,让出cpu等待状态的变更。
src\runtime\sys_linux_amd64.s
TEXT runtime·osyield(SB),NOSPLIT,$0
MOVL $SYS_sched_yield, AX
SYSCALL
RET
// 8. 如果开启了trace,则调用traceGoCreate
traceGoCreate(newg, newg.startpc)
// 9. 将g放入本地待执行队列。p.runq
runqput(_p_, newg, true)
9.1 如果开启了随机调度,randomizeScheduler,则进行随机选择(随机数模2==0)即,50%概率作为下一个执行的g。
9.2 如果可以作为下个执行的g,设置到p.runnext.
9.3 如果不能作为下个执行的g,将g放入p的待执行队列,p.runq
9.4 如果p.runq队列满了,则调用runqputslow,把g放到全局运行队列
runqputslow会把本地运行队列中一半的g放到全局运行队列。
// 10. 如果有线程空闲,则唤醒,没有则新建一个线程。
// Tries to add one more P to execute G's.
// Called when a G is made runnable (newproc, ready).
wakep()
添加一个或多个p来执行g。当g处于可执行状态时调用。
10.1 交换nmspinning为1,多线程并发时,只有一个线程会成功。
if !atomic.Cas(&sched.nmspinning, 0, 1) {
return
}
10.2 startm(nil, true)
1. 获取一个空闲的p,如果没有获取到什么也不做。
2. 从midle列表获取一个m。
3. 如果获取m失败,则新创建一个新的系统线程
newm(fn, _p_)
4. 如果获取到m,则唤醒线程
notewakeup(&mp.park)
}
// Get from gfree list.
// If local list is empty, grab a batch from global list.
func gfget(_p_ *p) *g {
retry:
// 1.如果本地的gfree为空,并且全局的栈上、或者非栈gfree不为空,则从全局gfree获取一批g对象(最多32个),并放入本地gfree列表。
// 过程需要对全局sched.gfree加锁。
if _p_.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) {
lock(&sched.gFree.lock)
// Move a batch of free Gs to the P.
for _p_.gFree.n < 32 {
// Prefer Gs with stacks.
gp := sched.gFree.stack.pop()
if gp == nil {
gp = sched.gFree.noStack.pop()
if gp == nil {
break
}
}
sched.gFree.n--
_p_.gFree.push(gp)
_p_.gFree.n++
}
unlock(&sched.gFree.lock)
goto retry
}
// 2. 从本地g空闲列表获取一个g对象
gp := _p_.gFree.pop()
if gp == nil {
return nil
}
_p_.gFree.n--
// 3. 如果g对象还未设置栈空间,则申请栈空间
if gp.stack.lo == 0 {
// Stack was deallocated in gfput. Allocate a new one.
systemstack(func() {
gp.stack = stackalloc(_FixedStack)
})
gp.stackguard0 = gp.stack.lo + _StackGuard
} else {
if raceenabled {
racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
}
if msanenabled {
msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
}
}
return gp
}
p.runq
)中,如果队列已经满了(最大256)个,则放入全局队列(同时会将当前p上一半的的待运行g放入全局队列sched.runq
)。