Nginx日志分析awk脚本

简单地写了一个awk脚本,用来分析nginx的日志,方便定位问题(建议分发至cdn的/home/5iops/tools目录下).
前提条件:Nginx日志格式定义如下:
log_format main ‘$remote_addr $host $remote_user [$time_local] “$request” ‘
‘$status $body_bytes_sent “$http_referer” ‘
‘”$http_user_agent” “$http_x_forwarded_for” “$upstream_response_time” “$upstream_addr” “$upstream_status” “$request_time”‘;

access_log /home/logs/nginx/access.log main;

awk脚本见下(命名为analyze_awk.awk):
BEGIN {
OFS=”;”
}

{
gsub(/”/,””);
client=$1;
domain=$2;
url=$7;
status=$9;
bytes=$10;
upstream=$(NF-2);
time=$NF;
#gsub(/”/,””);
#printf “%.6f\n”, $NF;
#get right access
if (status>0) {
sum_access++;
++arr_status_num[status];
arr_status_bytes[status]+=bytes;
arr_status_time[status]+=time;

++arr_domain_num[domain];
++arr_domain_status[domain,status];
++arr_domain_url[domain,url];
#printf “%.6f\n”, arr_status_time[200];

if (status==200 || status==206) {
#about domain
arr_domain_bytes[domain]+=bytes;
arr_domain_time[domain]+=time;
#about client(usualy ip)
arr_client_bytes[client]+=bytes;
arr_client_time[client]+=time;

if (time < = 0.1) { ++arr_status_speed1[status]; ++arr_domain_speed1[domain]; } if ((time <= 0.2) && (time >= 0.1)) {
++arr_status_speed2[status]; ++arr_domain_speed2[domain];
}

if ((time < = 0.5) && (time >= 0.2)) {
++arr_status_speed5[status]; ++arr_domain_speed5[domain];
}

if ((time < = 1) && (time >= 0.5)) {
++arr_status_speed10[status]; ++arr_domain_speed10[domain];
}

if ((time < = 2) && (time >= 1)) {
++arr_status_speed20[status]; ++arr_domain_speed20[domain];
}

if ((time < = 5) && (time >= 2)) {
++arr_status_speed50[status]; ++arr_domain_speed50[domain];
}

if (time >= 5) {
++arr_status_speed5x[status]; ++arr_domain_speed5x[domain];
}
}
}
}

END {
#http status statistic
print “\033[40;33m##########################################################################################################################\033[0m”
print “\033[40;32m#get http code summary: code,sum,rate,speed(Kb),avg_time(s),<0.1 rate,<0.2 rate,<0.5 rate,<1 rate,<2 rate,<5 rate,>5 rate#\033[0m”
print “\033[40;33m##########################################################################################################################\033[0m”
status_sort=asorti(arr_status_num,sort_status);
#print arr_status_time[200]
for (i=1; i< =status_sort; i++) { s=sort_status[i]; if (s==200 || s==206) { printf "%s:%d,%.2f|%.2f|%.3f|%.2f,%.2f,%.2f,%.2f,%.2f,%.2f\n", s,arr_status_num[s],arr_status_num[s]/sum_access,arr_status_bytes[s]*8/(1024*arr_status_time[s]),arr_status_time[s]/arr_status_num[s], arr_status_speed1[s]/arr_status_num[s]*100,arr_status_speed2[s]/arr_status_num[s]*100,arr_status_speed5[s]/arr_status_num[s]*100, arr_status_speed10[s]/arr_status_num[s]*100,arr_status_speed20[s]/arr_status_num[s]*100,arr_status_speed50[s]/arr_status_num[s]*100,arr_status_speed5x[s]/arr_status_num[s]*100; } else if (s==404) { printf "%s:%d,%.2f\n", s,arr_status_num[s],arr_status_num[s]/sum_access; } else if (s ~ "3..") { printf "%s:%d,%.2f\n", s,arr_status_num[s],arr_status_num[s]/sum_access; } else if (s==499) { printf "%s:%d,%.2f\n", s,arr_status_num[s],arr_status_num[s]/sum_access; } else if (s ~ "5..") { printf "%s:%d,%.2f\n", s,arr_status_num[s],arr_status_num[s]/sum_access; } } #get domain statistics print "===================================================" print "\033[40;33m###########################################################################\033[0m" print "\033[40;32m#get domain summary: domain,sum,rate,valid rate,4xx rate,5xx rate,data(G)#\033[0m" print "\033[40;33m###########################################################################\033[0m" domain_sort=asorti(arr_domain_num,sort_domain); for (i=1; i<=domain_sort; i++) { g=sort_domain[i]; arr_domain_num_valid=arr_domain_status[g,200]+arr_domain_status[g,206]+arr_domain_status[g,302]+arr_domain_status[g,301]; arr_domain_num_4xx=arr_domain_status[g,404]+arr_domain_status[g,499]; arr_domain_num_5xx=arr_domain_status[g,502]+arr_domain_status[g,504]; arr_domain_num_404=arr_domain_status[g,404] arr_domain_num_499=arr_domain_status[g,499] if (arr_domain_time[g]>0) {
printf “%s:%d,%.2f|%.2f,%.2f,%.2f,%.2f\n”,
g,arr_domain_num[g],arr_domain_num[g]/sum_access,arr_domain_num_valid/arr_domain_num[g],arr_domain_num_4xx/arr_domain_num[g],arr_domain_num_5xx/arr_domain_num[g],arr_domain_bytes[g]*8/(1024*1024*1024) | “sort -t ‘,’ -k 2 -nr|head -20”
# close(“sort -t ‘,’ -k 2 -nr|head -20”)
}
}
}

使用方法:

zcat /log/nginx/old/2012-06-11-20-45.log.gz | awk -f analyze_awk.awk –

######################################
#get http code summary: code,sum,rate# #返回状态码的分布
######################################
200,1783274,0.96
206,11120,0.01
301,1408,0.00
302,2745,0.00
304,51092,0.03
404,1705,0.00
499,3030,0.00
503,13,0.00
504,2,0.00
===================================================
##############################################################################
#get domain summary: domain,sum,rate,valid rate,4xx rate,5xx rate,traffic(G)# #返回前20的域名状态分布
##############################################################################
img1.5iops.cn,499579,0.27,0.99,0.00,0.00,77.85
webpic.5iops.cn,254274,0.14,0.99,0.00,0.00,22.44
static9.5iops.cn,148452,0.08,0.97,0.01,0.00,6.20
v.img.5iops.cn,85975,0.05,0.99,0.01,0.00,3.15
static1.5iops.cn,70666,0.04,0.97,0.00,0.00,6.01
img3.5iops.cn,69755,0.04,0.99,0.00,0.00,7.73
img6.5iops.cn,63908,0.03,0.99,0.00,0.00,4.68
img2.5iops.cn,58702,0.03,0.99,0.00,0.00,4.67
img8.5iops.cn,55865,0.03,0.99,0.00,0.00,2.74
asimgs.5iops.cn,51231,0.03,0.91,0.00,0.00,14.73
img5.5iops.cn,45377,0.02,0.99,0.00,0.00,2.15
client.qudao123.com,43153,0.02,0.94,0.01,0.00,3.67
img7.5iops.cn,42772,0.02,0.99,0.00,0.00,1.87
live2image0.5iops.cn,38327,0.02,0.81,0.00,0.00,2.12
focus.qudao123.com,35959,0.02,0.99,0.00,0.00,2.35
img26.5iops.cn,31084,0.02,1.00,0.00,0.00,3.74
bubble.qudao123.com,23983,0.01,1.00,0.00,0.00,0.10
tinydrag.huilaitech.com,22808,0.01,1.00,0.00,0.00,0.07
static.g.qudao123.com,20988,0.01,0.30,0.01,0.00,0.12
comment.qudao123.com,19181,0.01,1.00,0.00,0.00,0.51

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注