openGauss 3.1.0 版本gs_stack功能解密

2022-12-13 10:00:46 浏览数 (1)

不管是测试还是研发,工作中总有遇到各种各样的问题。比如,你有没有遇到过在数据库中执行某个SQL,却一直不返回结果,这时候的你是不是非常想看一下代码执行到了哪个函数?或者是数据库不响应连接,需要查看数据库当前线程的执行情况呢?而在实际生产中,获取生产系统进程堆栈比较麻烦,需要在服务端后台执行gstack命令。本期为大家介绍的openGauss 3.1.0版本中内置gs_stack工具,则可以通过函数调用的方式输出指定线程的堆栈,用于解决现网环境缺少gs_stack工具无法获取调用栈的问题。

内置gs_stack工具介绍

在openGauss的很多客户场景中,会出现gdb、gstack等工具无法使用或当系统出现hang、慢等问题时,无法通过调用栈进行进一步的定位;还有一种情况是登录客户数据库的流程非常繁杂,需要经过层层审批,这时通过gsql等工具连接数据库就相对容易一些。针对以上痛点,通过复用openGauss未使用操作系统信号,并在信号处理函数中获取调用栈的方式开发了调用栈工具,以获得服务端openGauss的调用栈。

获取调用栈主要包含两种方式,一种是通过执行SQL语句获取,另一种是通过gs_ctl工具执行命令获取。

1

在客户端工具执行gs_stack([tid])函数

使用具有monadmin或者sysadmin用户权限的用户,通过gsql或者其他工具连接数据库;

执行命令:

代码语言:javascript复制
openGuass=# select * from gs_stack();

返回当前openGauss所有线程的调用栈:

代码语言:javascript复制
   tid      | lwtid |  stack
  --------------- ------  ------------------------------------------------------------------
  14026731434848 | 2626  | _poll   0x2d                                                     
                 |       | WaitLatch0rSocket(Latch volatile*,int,int,long)   0x29f          
                 |       | WaitLatch(Latch voatile*,int,long)   0x2e                        
                 |       | start_thread  oxc5                                               
                 |       | clone   OXC5                                                     
  140116075071232| 23864 |__poll   0x2d                                                                       |       | poll   0x81                                                      
                 |       | WaitLatchOrSocket(Latch volatile*, int, int, long)   0x6af       
                 |       | WaitLatch(Latch volatile*, int, long)   0x2e                     
                 |       | ckpt_pagewriter_sub_thread_loop()   0x284                        
                 |       | ckpt_pagewriter_main()   0x92e                                   
                 |       | int GaussDbAuxiliaryThreadMain<(knl_thread_role)46>(knl_thread_arg*)   0x482                                                                   
                 |       | int GaussDbThreadMain<(knl_thread_role)46>(knl_thread_arg*)       0x854                                                                                       
                 |       | InternalThreadFunc(void*)   0x5c                                 
                 |       | ThreadStarterFunc(void*)   0xa4                                  
                 |       | start_thread   0xc5                                              
                 |       | clone   0x6d                                                     

只需要查看某一个线程的调用栈时,执行命令:

代码语言:javascript复制
openGuass=# select gs_stack(xxx);

说明

xxx为某个线程的thread_id,能够返回thread_id为xxx的线程的调用栈:

代码语言:javascript复制
                                        gs_stack
  ------------------------------------------------------------------------------------------
 pthread_sigmask   0x2a                                                                      
 gs_signal_recover_mask(__sigset_t)   0x17                                                  
 gs_signal_send(unsigned long, int, int)   0x2f9                                            
 signal_child(unsigned long, int, int)   0x36                                               
 get_stack_according_to_tid(unsigned long, StringInfoData*)   0x191                         
 gs_stack(FunctionCallInfoData*)   0xcb                                                     
 unsigned long ExecMakeFunctionResult<false, false, true>(FuncExprState*, ExprContext*, bool*, ExprDoneCond*)   0x554                                                               
 ExecEvalFunc(FuncExprState*, ExprContext*, bool*, ExprDoneCond*)   0x147                   
 ExecTargetList(List*, ExprContext*, unsigned long*, bool*, ExprDoneCond*, ExprDoneCond*)    0x15d                                                                                       
 ExecProject(ProjectionInfo*, ExprDoneCond*)   0x40f                                        
 ExecResult(ResultState*)   0x1da                                                           
 ExecResultWrap(PlanState*)   0x18                                                          
 ExecProcNode(PlanState*)   0xde                                                            
 ExecutePlan(EState*, PlanState*, CmdType, bool, long, ScanDirection, _DestReceiver*)        0x1a6                                                                                       
 standard_ExecutorRun(QueryDesc*, ScanDirection, long)   0x3d9                              
 explain_ExecutorRun(QueryDesc*, ScanDirection, long)   0x109                               
 ExecutorRun(QueryDesc*, ScanDirection, long)   0x1ad                                       
 PortalRunSelect(PortalData*, bool, long, _DestReceiver*)   0x294                           
 PortalRun(PortalData*, long, bool, _DestReceiver*, _DestReceiver*, char*)   0x62e          
 exec_simple_query(char const*, MessageType, StringInfoData*)   0x12b0                      
 PostgresMain(int, char**, char const*, char const*)   0x2e10                               
 BackendRun(Port*)   0x327                                                                  
 int GaussDbThreadMain<(knl_thread_role)1>(knl_thread_arg*)   0x5a8                         
 InternalThreadFunc(void*)   0x2d                                                           
 ThreadStarterFunc(void*)   0xa4                                                            
 start_thread   0xc5                                                                        
 clone   0x6d                                                                               
  
代码语言:javascript复制
openGauss=# select gs_stack(140115727259392);
                                          gs_stack
--------------------------------------------------------------------------------------------
 __select   0x33                                                                            
 pg_usleep(long)   0xa1                                                                     
 pg_sleep(FunctionCallInfoData*)   0xeb                                                     
 unsigned long ExecMakeFunctionResultNoSets<false, false>(FuncExprState*, ExprContext*, bool*, ExprDoneCond*)   0x206f                                                              
 ExecEvalFunc(FuncExprState*, ExprContext*, bool*, ExprDoneCond*)   0x622                   
 ExecTargetList(List*, ExprContext*, unsigned long*, bool*, ExprDoneCond*, ExprDoneCond*)   0x45d                                                                                       
 ExecProject(ProjectionInfo*, ExprDoneCond*)   0xc2d                                        
 ExecResult(ResultState*)   0x79b                                                           
 ExecResultWrap(PlanState*)   0x18                                                          
 ExecProcNode(PlanState*)   0x2db                                                           
 ExecutePlan(EState*, PlanState*, CmdType, bool, long, ScanDirection, _DestReceiver*)   0x765                                                                                       
 standard_ExecutorRun(QueryDesc*, ScanDirection, long)   0xbb5                              
 explain_ExecutorRun(QueryDesc*, ScanDirection, long)   0x1f7                               
 ExecutorRun(QueryDesc*, ScanDirection, long)   0x947                                       
 PortalRunSelect(PortalData*, bool, long, _DestReceiver*)   0x7d2                           
 PortalRun(PortalData*, long, bool, _DestReceiver*, _DestReceiver*, char*)   0xe11          
 exec_simple_query(char const*, MessageType, StringInfoData*)   0x3929                      
 PostgresMain(int, char**, char const*, char const*)   0x61f8                               
 BackendRun(Port*)   0x64d                                                                  
 int GaussDbThreadMain<(knl_thread_role)1>(knl_thread_arg*)   0x9c7                         
 InternalThreadFunc(void*)   0x5c                                                           
 ThreadStarterFunc(void*)   0xa4                                                            
 start_thread   0xc5                                                                        
 clone   0x6d                                                                               

2

在服务器端使用gs_ctl stack –D data_dir命令

当线程池满,无法通过gsql连接数据库的时候,可以使用gs_ctl工具执行命令获取线程调用栈:

使用集群用户登录服务器,执行命令gs_ctl stack –D data_dir,data_dir是指定gaussdb的数据目录的绝对路径:

代码语言:javascript复制
gs_ctl stack –D /path/to/install/data/

可以取gaussdb所有线程的调用栈。

代码语言:javascript复制
[user@euler omm]$ gs_ctl stack -D /path/to/install/data/opengauss
[2022-11-03 20:17:59.288][19256][][gs_ctl]: gs_stack start:
Thread 0 tid<140120252633600> lwtid<23675>
__poll   0x2d
poll   0x81
CommWaitPollParam::caller(int (*)(pollfd*, unsigned long, int), unsigned long)   0xb1
int comm_socket_call<CommWaitPollParam, int (*)(pollfd*, unsigned long, int)>(CommWaitPollParam*, int (*)(pollfd*, unsigned long, int))   0x28
comm_poll(pollfd*, unsigned long, int)   0x388
ServerLoop()   0xb77
PostmasterMain(int, char**)   0x612e
main   0xaeb
__libc_start_main   0xf5
0x55feac9a9907

Thread 1 tid<140116236076800> lwtid<23848>
__poll   0x2d
poll   0x81
WaitLatchOrSocket(Latch volatile*, int, int, long)   0x6af
SysLoggerMain(int)   0x17c9
int GaussDbThreadMain<(knl_thread_role)17>(knl_thread_arg*)   0x860
InternalThreadFunc(void*)   0x5c
ThreadStarterFunc(void*)   0xa4
start_thread   0xc5
clone   0x6d

只需要查看某一个线程的调用栈时,执行命令:

代码语言:javascript复制
gs_ctl stack –D data_dir –I xx

说明

data_dir是指定gaussdb的数据目录的绝对路径,xxx指的是线程的lwpid(taskid),可以通过top –Hp的方式获取线程的lwpid, 也可以通过cat /proc/yyyy/task获取线程的lwpid 。yyyy指的是进程id,可以通过ps –ux | grep gaussdb获取。

代码语言:javascript复制
[uesr@euler omm]$ gs_ctl stack -D /path/to/install/data -I 23860
[2022-11-03 20:22:01.327][40608][][gs_ctl]: gs_stack start:
tid<140116142843648> lwtid<23860>
__poll   0x2d
poll   0x81
WaitLatchOrSocket(Latch volatile*, int, int, long)   0x6af
WaitLatch(Latch volatile*, int, long)   0x2e
ckpt_pagewriter_sub_thread_loop()   0x284
ckpt_pagewriter_main()   0x92e
int GaussDbAuxiliaryThreadMain<(knl_thread_role)46>(knl_thread_arg*)   0x482
int GaussDbThreadMain<(knl_thread_role)46>(knl_thread_arg*)   0x854
InternalThreadFunc(void*)   0x5c
ThreadStarterFunc(void*)   0xa4
start_thread   0xc5
clone   0x6d

总结

通过以上我们介绍的openGauss的gs_stack功能,我们可以很方便地定位某个openGauss线程正在做的事情,并可以根据这些函数调用情况判断当前openGauss任务是否出现了问题,以及发现性能瓶颈。后续,我们将会进一步在这个功能上进行演进,不断增强openGauss的核心竞争力。

0 人点赞