代码语言:javascript复制
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/9/16 上午2:00
# @Author : BrownWang
# @Email : 277215243@qq.com
# @File : Analysis.py
# @Software: PyCharm
import re
import heapq
import threading
from multiprocessing import Pool
dic={}
fdic={}
def readconfig():
with open('./ipdb_cn.txt',mode='r') as f:
for i in f:
nn=i.split()
tn= nn[2].decode('utf-8')
if dic.has_key(tn):
dic[tn].add('.'.join(nn[0].split('.')[0:3]))
else:
dic[tn]=set()
dic[tn].add('.'.join(nn[0].split('.')[0:3]))
fdic[tn]=0
t=threading.Thread(target=readconfig)
t.start()
tf=open('./flowdata.log','r')
tf.seek(0,2)
total=tf.tell()
def run(arg):
start=arg[0]
end=arg[1]
with open('./flowdata.log','r') as f:
sets=set()
f.seek(start,0)
for i in f:
if f.tell() > end:
return sets
if '_ip' in i:
sets.add(re.findall(r'_ip:s*(d .d .d ).d ',i)[0])
return sets
p=Pool(8)
runl=[]
for i in range(8):
runl.append((total*i/8,total*(i 1)/8))
result=p.map_async(run,runl)
p.close()
p.join()
fil=[]
results=result.get()
for i in results:
fil =i
filset=set(fil)
sumfil=len(filset)
t.join()
for k in dic:
for i in filset:
if i in dic[k]:
fdic[k] =1
ret=[{'n':k,'v':fdic[k]/float(sumfil)*100} for k in fdic]
sortl=heapq.nlargest(len(ret),ret,key=lambda s:s['v'])
for i in sortl:
print i['n'] ' ' str(round(i['v'],2)) '%'