第一句子网 - 唯美句子、句子迷、好句子大全
第一句子网 > python网站访问日志分析_python分析apache网站日志web日志的代码

python网站访问日志分析_python分析apache网站日志web日志的代码

时间:2022-09-08 06:03:47

相关推荐

python网站访问日志分析_python分析apache网站日志web日志的代码

python分析apache网站日志的代码

1、分析日志的python框架awk.py 代码示例:

#

# Custom awk.py module

#

class controller:

def __init__(self, f):

self.m_file = f

self.m_handlers = []

def subscribe(self, o):

self.m_handlers.append(o)

def run(self):

for o in self.m_handlers:

o.begin()

s = self.m_file.readline()

while s != "":

for o in self.m_handlers:

o.process_line(s)

s = self.m_file.readline()

for o in self.m_handlers:

o.end()

def print_results(self):

print

print "Results:"

print

for o in self.m_handlers:

print "------------------------------------------------------"

print o.description()

print "------------------------------------------------------"

print o.result()

2、统计日志的点击量count_line.py 代码示例:

# Standard sys module

import sys

# Custom awk.py module

import awk

class count_lines:

def begin(self):

self.m_count = 0

def process_line(self, s):

self.m_count += 1

def end(self):

pass

def description(self):

return "# of lines in the file"

def result(self):

return self.m_count

#

# Step 1: Create the Awk controller

#

ac = awk.controller(sys.stdin)

#

# Step 2: Subscribe the handler

#

ac.subscribe(count_lines())

#

# Step 3: Run

#

ac.run()

#

# Step 4: Print the results

#

ac.print_results()

3、使用方法是shell中执行 代码示例:

# cat apachelog.log|

python count_lines.py

4、统计浏览次数超过n次的访问者 visitors.py 代码示例:

import re;

import sys

imort awk

class return_visitors:

def __init__(self, n):

self.m_n = n;

self.m_ip_days = {};

def begin(self):

pass;

def process_line(self, s):

try:

array = s.split();

ip = array[0];

day = array[3][1:7];

if self.m_ip_days.has_key(ip):

if day not in self.m_ip_days[ip]:

self.m_ip_days[ip].append(day);

else:

self.m_ip_days[ip] = [];

self.m_ip_days[ip].append(day);

except IndexError:

pass;

def end(self):

ips = self.m_ip_days.keys();

count = 0;

for ip in ips:

if len(self.m_ip_days[ip]) > self.m_n:

count += 1;

self.m_count = count;

def description(self):

return "# of IP addresses that visited more than %s days" % self.m_n;

def result(self):

return self.m_count;

ac = awk.controller(sys.stdin)

ac.subscribe(return_visitors(2))

ac.run()

ac.print_results()

# cat

apachelog.log|python visitors.py

5、按照域名统计访问量domain.py 代码示例:

import re;

import sys

imort awk

class referring_domains:

def __init__(self):

self.m_domains = {};

def begin(self):

pass;

def process_line(self, line):

try:

array = line.split();

referrer = array[10];

m = re.search('//[a-zA-Z0-9\-\.]*\.[a-zA-z]{2,3}/',

referrer);

length = len(m.group(0));

domain = m.group(0)[2:length-1];

if self.m_domains.has_key(domain):

self.m_domains[domain] += 1;

e

lse:

self.m_domains[domain] = 1;

except AttributeError:

pass;

except IndexError:

pass;

def end(self):

pass;

def description(self):

return "Referring domains";

def sort(self, key1, key2):

if self.m_domains[key1] > self.m_domains[key2]:

return -1;

elif self.m_domains[key1] == self.m_domains[key2]:

return 0;

else:

return 1;

def result(self):

s = "";

keys = self.m_domains.keys();

keys.sort(self.sort);

for domain in keys:

s += domain;

s += " ";

s += str(self.m_domains[domain]);

s += "\n";

s += "\n\n";

return s;

ac = awk.controller(sys.stdin)

ac.subscribe(referring_domains())

ac.run()

ac.print_results()

#

cat apachelog.log|python domain.py

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。