生成激活码
代码语言:javascript复制#!/usr/bin/env python
#encoding:utf-8
#Author:sean
import string
import random
#激活码中的字符和数字
field = string.letters string.digits
#获得四个字母和数字的随机组合
def getRandom():
return ''.join(random.sample(field,4))
#生成的每个激活码中有几组
def concatenate(group):
return '-'.join([getRandom() for i in range(group)])
#生成n组激活码
def generate(n):
return [concatenate(4) for i in range(n)]
if __name__ == '__main__':
print generate(10)
统计单词
代码语言:javascript复制#!/usr/bin/env python
#encoding:utf-8
import re
from collections import Counter
FileSource = './media/abc.txt'
def getMostCommonWord(articlefilesource):
'''输入一个英文的纯文本文件,统计其中的单词出现的个数'''
pattern = r'[A-Za-z] |$?d %?$'
with open(articlefilesource) as f:
r = re.findall(pattern,f.read())
return Counter(r).most_common()
if __name__ == '__main__':
print getMostCommonWord(FileSource)
提取网页正文
代码语言:javascript复制#!/usr/bin/env python
#encoding:utf-8
from goose import Goose
from goose.text import StopWordsChinese
import sys
#要分析的网页url
url = '
def extract(url):
'''
提取网页正文
'''
g = Goose({'stopwords_class':StopWordsChinese})
artlcle = g.extract(url=url)
return artlcle.cleaned_text
if __name__ == '__main__':
print extract(url)