在日常运维sqlserver的过程中,偶发慢事务或存储过程与DDL语句(改表或者修改索引)需要锁定相同的资源,造成锁等待,如果不及时发现和处理,将影响到业务系统的稳定性。
下面是一个基于python的巡检脚本,逻辑很简单就是对巡检sql的包装后再加个dingding告警。
代码语言:python代码运行次数:0复制# 参考文档
# 锁等待 https://help.aliyun.com/document_detail/41801.html
# https://blog.csdn.net/zlbdmm/article/details/104608195
# pip install pymssql==2.2.7
# DingtalkChatbot==1.5.7
import pymssql
from dingtalkchatbot.chatbot import DingtalkChatbot
alerts_webhook = "https://oapi.dingtalk.com/robot/send?access_token=ef1b9d42de16cad003e9f4xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
alerts_secret = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
msg = DingtalkChatbot(webhook=alerts_webhook, secret=alerts_secret, fail_notice=True)
def lock_wait(host, port, db, user, passwd):
print(
f"=========================== {host}:{port} 锁等待巡检 ==========================="
)
conn = pymssql.connect(
host=host, port=port, user=user, password=passwd, database=db, charset="utf8"
)
cursor = conn.cursor()
sql = """
use master;
WITH CTE_SID ( BSID, SID, sql_handle,name )
AS ( SELECT a.[Blocking_Session_ID],
a.[Session_ID] ,
a.sql_handle,
bb.name
FROM sys.dm_exec_requests a inner join sys.sysdatabases bb on a.database_id = bb.dbid
WHERE a.[Blocking_Session_ID] <> 0
UNION ALL
SELECT A.[Blocking_Session_ID] ,
A.[Session_ID] ,
A.sql_handle,
bb.name
FROM sys.dm_exec_requests A inner join sys.sysdatabases bb on a.database_id = bb.dbid
JOIN CTE_SID B ON A.[Session_ID] = B.BSID
)
SELECT C.BSID,
C.SID ,
S.login_name ,
S.host_name ,
S.status ,
S.cpu_time ,
S.memory_usage ,
S.last_request_start_time ,
S.last_request_end_time ,
S.logical_reads ,
S.row_count ,
q.text,
c.name
FROM CTE_SID C
JOIN sys.dm_exec_sessions S ON C.sid = s.[Session_ID]
CROSS APPLY sys.dm_exec_sql_text(C.sql_handle) Q
ORDER BY sid ;
"""
cursor.execute(sql)
res = cursor.fetchall()
for i in res:
BSID = i[0] # 持有锁的sessionID
SID = i[1] # 等待锁的sessionID
login_name = i[2] # 被阻塞的用户名
host_name = i[3] # 被阻塞的用户地址
# status = i[4]
# cpu_time = i[5]
# memory_usage = i[6]
# last_request_start_time = i[7]
# last_request_end_time = i[8]
# logical_reads = i[9]
# row_count = i[10]
text = i[11].replace("rn", " ") # 具体的被阻塞的SQL
dbname = i[12] # 阻塞发生的数据库
print(
f"持有锁的会话ID: {BSID}, 等待锁的会话ID: {SID}, 被阻塞的用户名:{login_name}, 被阻塞的用户地址: {host_name}, 库名: {dbname}, 被阻塞的SQL: {text}"
)
# 发送钉钉告警消息
msg_title = "MSSQL锁等待巡检"
msg_content = "---- MSSQL锁等待巡检 ----" "nn"
"持有锁的会话ID: " str(BSID) "nn"
"等待锁的会话ID: " str(SID) "nn"
"被阻塞的用户名: " str(login_name) "nn"
"被阻塞的用户地址: " str(host_name) "nn"
"库名: " str(dbname) "nn"
"被阻塞的SQL: " str(text)[0:2000]
msg.send_markdown(title=msg_title, text=msg_content, is_at_all=False)
if __name__ == "__main__":
port = "1433"
db = "master"
user = "sa"
passwd = "Abcd1234"
host_list = ["127.0.0.1"]
for host in host_list:
lock_wait(host, port, db, user, passwd)