一、清理本地文件
代码语言:javascript复制import datetime
from utils import confUtils, hadoop
import os
import shutil
import time
import sys
def cleandir(path, duration):
time_now = time.time()
for root, dirs, files in os.walk(path):
for name in files:
mtime = os.stat(os.path.join(root, name)).st_mtime
if time_now - mtime > 3600 * 24 * duration:
print name
os.remove(os.path.join(root, name))
for dirname in dirs:
if (datetime.datetime.now() - datetime.datetime.strptime(dirname , '%Y%m%d')).days > duration:
print dirname
shutil.rmtree(os.path.join(root, dirname))
二、清除HDFS文件
代码语言:javascript复制def clean_hdfs(hdfs_path, duration):
log_time = (datetime.datetime.now() - datetime.timedelta(days=duration)).strftime('%Y%m%d')
hadoop.hdfs_rm('%s/%s' % (hdfs_path, log_time), recursive=True)