# run openresty
/usr/local/openresty/nginx/sbin/nginx -p $PWD -c nginx.conf -g "daemon off;"
# run systemtap script
./samples/blocking-bt.sxx -D STP_NO_OVERLOAD --arg time=60 --skip-badvars -x 144081
# trigger the workload
time curl localhost:10000/run
32000
status=200
{"access-control-allow-credentials":"true","connection":"close","content-type":"application\/json","server":"gunicorn\/19.9.0","access-control-allow-origin":"*","date":"Thu, 29 Sep 2022 10:30:42 GMT","content-length":"287"}
real 0m3.090s
user 0m0.006s
sys 0m0.006s
# then you could see the systemtap output
--- lua_resume block 2424 ms
>>> Hot CPU backtraces
__memcpy_avx_unaligned_erms
@./workload.lua:8
@./workload.lua:18
@./workload.lua:26
=content_by_lua(nginx.conf:17):2
367
ngx_md5_body
@./workload.lua:16
@./workload.lua:26
=content_by_lua(nginx.conf:17):2
169
propagatemark
@./workload.lua:8
@./workload.lua:18
@./workload.lua:26
=content_by_lua(nginx.conf:17):2
38
>>> Time-consuming syscall backtraces
0x0
C:meth_connect
@/usr/local/share/lua/5.1/socket/http.lua:130
@/usr/local/share/lua/5.1/socket/http.lua:354
C:protected_
@./workload.lua:28
=content_by_lua(nginx.conf:17):2
235173 us
0x0
C:meth_receive
@/usr/local/share/lua/5.1/socket/http.lua:160
@/usr/local/share/lua/5.1/socket/http.lua:362
C:protected_
@./workload.lua:28
=content_by_lua(nginx.conf:17):2
1472811 us
-
-
Save kingluo/50b8589069f5f11a78d2136c1155d613 to your computer and use it in GitHub Desktop.
How to trace nginx blocking issue via systemtap?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env stap++ | |
@use nginx.lua | |
@use luajit | |
global tracing = 0 | |
global start_time, syscall_start_time | |
global bts, syscall_bts | |
function lua_bt() | |
{ | |
if (@defined(@var("globalL", "$^exec_path"))) { | |
mL = @var("globalL", "$^exec_path") | |
} else { | |
mL = ngx_lua_get_main_lua_vm() | |
} | |
if (mL == 0) { | |
return "" | |
} | |
g = luajit_G(mL) | |
if (g == 0) { | |
return "" | |
} | |
L = luajit_cur_thread(g) | |
if (L == 0) { | |
return "" | |
} | |
bt = luajit_backtrace(L, g, 0) | |
if (bt != "") { | |
f = probefunc() | |
if (f != "") { | |
bt = f . "\n" . bt | |
} | |
return bt | |
} | |
return "" | |
} | |
probe $^arg_probe :default(timer.profile) | |
{ | |
if (tid() == target() && tracing) { | |
bts[lua_bt()] <<< 1 | |
} | |
} | |
%( "$^arg_time :default()" != "" %? | |
probe timer.s($^arg_time) | |
{ | |
warn("Time's up. Quitting now...\n") | |
exit() | |
} | |
%) | |
probe begin | |
{ | |
warn(sprintf("Start tracing %d ($^exec_path)\n", target())) | |
%( "$^arg_time" != "" %? | |
warn(sprintf("Please wait for $^arg_time seconds...\n")) | |
%: | |
warn("Hit Ctrl-C to end.\n") | |
%) | |
} | |
probe process("$^libluajit_path").function("lua_resume") | |
{ | |
if (tid() == target()) { | |
tracing = 1 | |
start_time = gettimeofday_ms() | |
} | |
} | |
probe process("$^libluajit_path").function("lua_resume").return | |
{ | |
if (tid() == target()) { | |
tracing = 0 | |
elapsed = gettimeofday_ms() - start_time | |
if (elapsed >= $^arg_min_elapsed :default(200)) { | |
printf("--- lua_resume block %d ms\n", elapsed) | |
banner = 0 | |
foreach (bt in bts- limit $^arg_limit :default(3)) { | |
cnt = @count(bts[bt]) | |
if (cnt <= $^arg_min_bt :default(2)) { | |
break | |
} | |
if (banner == 0) { | |
printf(">>> Hot CPU backtraces\n") | |
banner = 1 | |
} | |
printf("%s\t%d\n", bt, cnt) | |
} | |
banner = 0 | |
foreach (lbt in syscall_bts- limit $^arg_syscall_limit :default(2)) { | |
total_us = @sum(syscall_bts[lbt]) | |
if (total_us >= 1000) { | |
if (banner == 0) { | |
printf(">>> Time-consuming syscall backtraces\n") | |
banner = 1 | |
} | |
printf("%s\t%d us\n", lbt, total_us) | |
} | |
} | |
printf("\n") | |
} | |
delete bts | |
delete syscall_bts | |
} | |
} | |
probe kernel.trace("sys_enter") { | |
if (tid() == target() && tracing) { | |
syscall_start_time = gettimeofday_us() | |
} | |
} | |
probe kernel.trace("sys_exit") { | |
if (tid() == target() && tracing) { | |
elapsed = gettimeofday_us() - syscall_start_time | |
if (elapsed >= $^arg_syscall_min_elapsed_us :default(400)) { | |
syscall_bts[lua_bt()] <<< elapsed | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
error_log /dev/stderr info; | |
worker_processes 1; | |
events {} | |
http { | |
init_by_lua_block { | |
-- jit.off() | |
} | |
server { | |
listen 10000; | |
location /run { | |
content_by_lua_block { | |
require("workload").run() | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local http = require("socket.http") | |
local cjson = require("cjson") | |
math.randomseed(os.time()) | |
local _M = {} | |
local function random_str(length) | |
local res = "" | |
for i = 1, length do | |
res = res .. string.char(math.random(97, 122)) | |
end | |
return res | |
end | |
local function run_md5() | |
local cnt = 0 | |
local buf = random_str(81920) | |
for _= 1,1000 do | |
cnt = cnt + #ngx.md5(buf) | |
end | |
return cnt | |
end | |
function _M.run() | |
local cnt = run_md5() | |
ngx.say(cnt) | |
local r, c, h = http.request { | |
method = "GET", | |
url = "http://httpbin.org/delay/1" | |
} | |
assert(r == 1) | |
ngx.say("status=", c) | |
ngx.say(cjson.encode(h)) | |
end | |
return _M |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment