Python 删除HDFS过期文件

2022-11-26 10:59:21 浏览数 (2)

一、清理本地文件

代码语言:javascript复制
import datetime
from utils import confUtils, hadoop
import os
import shutil
import time
import sys


def cleandir(path, duration):
    time_now = time.time()
    for root, dirs, files in os.walk(path):
        for name in files:
            mtime = os.stat(os.path.join(root, name)).st_mtime
            if time_now - mtime > 3600 * 24 * duration:
                print name
                os.remove(os.path.join(root, name))
        for dirname in dirs:
            if (datetime.datetime.now() - datetime.datetime.strptime(dirname , '%Y%m%d')).days > duration:
                print dirname
                shutil.rmtree(os.path.join(root, dirname))

二、清除HDFS文件

代码语言:javascript复制
def clean_hdfs(hdfs_path, duration):
    log_time = (datetime.datetime.now() - datetime.timedelta(days=duration)).strftime('%Y%m%d')
    hadoop.hdfs_rm('%s/%s' % (hdfs_path, log_time), recursive=True)

0 人点赞