#!/bin/bash
#function:check band from squid log
#auther:zhanglejie
#date:2014/7/3
#清理文件
mkdir /tmp/domain &> /dev/null
rm -f /tmp/domain/domain_all.log
rm -f /tmp/domain/pbl_report`date +%Y%m%d`.csv
rm -f /tmp/domain/domain_name.log
#rm -f /tmp/domain/domain_name_tmp.log
#过滤日志
zcat $1|awk '$4~/200/ && $7!~/\?/{print $5" "$7" "$9}' |awk -F "/" '{print $3" "$0}'| sed -e 's/DIRECT\///' -e 's/NONE\///'> /tmp/domain/domain_all.log
#zcat $1|awk '$4~/200/ && $7!~/\?/{print $5" "$7" "$9}' > /tmp/domain/domain_all.log
#/usr/bin/split -b 1500000 /tmp/domain/domain_all.log
#整理总日志
#function out_tmp()
#{
#while read line
#do
##d1 size; d2 url; d3 ip; d4 domain
# d1=`echo $line |awk '{print $1}'`
# d2=`echo $line |awk '{print $2}'`
# d3=`echo $line |awk '{print $3}'|awk -F "/" '{print $2}'`
# d4=`echo $line |awk '{print $2}'|awk -F "/" '{print $3}'`
#if [ $d3 == "-" ]
#then
#d3=`/usr/bin/nslookup $d4|grep "Address"|tail -n 1|awk -F ":" '{print $2}'`
#fi
# echo "$d4 $d1 $d2 $d3" >> /tmp/domain/domain_name_tmp.log
#done < $1
#}
#for i in `ls /tmp/domain/x*`
#do
#out_tmp $i &
#done
#检查并发是否结束
#pid_num=0
#while [ ! $pid_num -eq 1 ]
#do
#pid_num=`ps aux|grep band_find.sh |wc -l`
#sleep 5
#done
#去重
sort /tmp/domain/domain_all.log|uniq > /tmp/domain/domain_name_all.log
##排名
cat /tmp/domain/domain_name_all.log|awk '{print $1}'|sort | uniq -c|sort -n > /tmp/domain/domain_sort.log
##分段取
cat -n /tmp/domain/domain_sort.log |awk '$1>=1 && $1<=10 {print $3}' > /tmp/domain/domain_top10.log
cat -n /tmp/domain/domain_sort.log |awk '$1>=100 && $1<=120 {print $3}' > /tmp/domain/domain_mid20.log
cat -n /tmp/domain/domain_sort.log |awk '$1>=500 && $1<=520 {print $3}' > /tmp/domain/domain_end20.log
###过滤指定大小
cat /tmp/domain/domain_name_all.log|awk '$2<100000 && $2>50000 {print $0}' > /tmp/domain/domain_size.log
cat /tmp/domain/domain_name_all.log|awk '$2>100000 || $2<50000 {print $0}' > /tmp/domain/domain_size_other.log
##输出
function out()
{
while read line
do
num=`grep $line $2|wc -l`
if [ $num -eq 0 ]
then
grep $line /tmp/domain/domain_size_other.log|head -n 3 >> /tmp/domain/pbl_report`date +%Y%m%d`.csv
else
grep $line $2|head -n 3 >> /tmp/domain/pbl_report`date +%Y%m%d`.csv
fi
done < $1
echo "**********" >> /tmp/domain/pbl_report`date +%Y%m%d`.csv
}
out /tmp/domain/domain_top10.log /tmp/domain/domain_size.log
out /tmp/domain/domain_mid20.log /tmp/domain/domain_size.log
out /tmp/domain/domain_end20.log /tmp/domain/domain_size.log
echo "Report is here /tmp/domain/pbl_report`date +%Y%m%d`.csv"