CMORPH降水数据下载

结合官方网站和下面代码，可自由定制符合自己需求的下载任务。
https://www.ncei.noaa.gov/data/cmorph-high-resolution-global-precipitation-estimates/access/
代码语言：javascript复制
#-*- coding: utf-8 -*-
import calendar
import os
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

def getLegalUrl(year, mon, day, time):
    base_url = "https://www.ncei.noaa.gov/data/cmorph-high-resolution-global-precipitation-estimates/access/30min/8km/"
    url_preletter = 'CMORPH_V1.0_ADJ_8km-30min_'
    url = base_url   str(year)   '/'   str(mon).zfill(2)   '/'   str(day).zfill(2)   '/'   url_preletter   str(year)   str(mon).zfill(2)   str(day).zfill(2)   str(time).zfill(2)   '.nc'
    return url

def getfilename(year, mon, day, time):
    filename_preletter = 'CMORPH_V1.0_ADJ_8km-30min_'
    filename = filename_preletter   str(year)   str(mon).zfill(2)   str(day).zfill(2)   str(time).zfill(2)   '.nc'
    return filename

def download_file(url, save_path):
    session = requests.Session()
    
    # 创建一个重试机制
    retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
    session.mount('https://', HTTPAdapter(max_retries=retries))

    try:
        response = session.get(url, timeout=60)
        if response.status_code == 200:
            with open(save_path, 'wb') as file:
                file.write(response.content)
            return True
        else:
            return False
    except requests.exceptions.RequestException as e:
        print(f"网络错误: {e}")
        return False

if __name__ == '__main__':
    f = open("D:\CMORPH_data\all_file_url.txt", "a")
    failed_files = []
    success_count = 0
    fail_count = 0

    for year in [2021, 2022]:
        for mon in [6, 7, 8]:
            days = calendar.monthrange(year, mon)[1]  # 查找某年某月有多少天
            save_path = 'D:\CMORPH_data\'   str(year)   '\'   str(mon).zfill(2)   '\'  # 保存的目的文件夹

            # 检查目录是否存在，如果不存在则创建
            current_dir = 'D:\CMORPH_data'
            for dir in [str(year), str(mon).zfill(2)]:
                current_dir = os.path.join(current_dir, dir)
                if not os.path.exists(current_dir):
                    print(f"目录 {current_dir} 不存在，尝试创建...")
                    try:
                        os.mkdir(current_dir)
                        print(f"成功创建目录：{current_dir}")
                    except Exception as e:
                        print(f"创建目录 {current_dir} 时发生错误：{e}")
                        continue

            for day in range(1, days 1):  # 第一次下载文件
                for time in range(0, 24):
                    print(year, mon, day, time)
                    url = getLegalUrl(year, mon, day, time)
                    filename = getfilename(year, mon, day, time)
                    file_path = os.path.join(save_path, filename)
                    f.write(url   'n')
                    
                    if not os.path.exists(file_path):
                        if download_file(url, file_path):
                            print(f"文件 {filename} 已成功下载")
                            success_count  = 1
                        else:
                            print(f"文件 {filename} 下载失败")
                            fail_count  = 1
                            failed_files.append(file_path)
                    else:
                        print(f"文件 {filename} 已存在，跳过下载")

    # 重试下载失败的文件
    retry_count = 0
    while failed_files and retry_count < 10:
        retry_count  = 1
        print(f"开始第 {retry_count} 次重试...")
        for file_path in failed_files:
            url = getLegalUrl(year, mon, day, time)
            if download_file(url, file_path):
                print(f"文件 {filename} 已成功下载")
                success_count  = 1
                fail_count -= 1
                failed_files.remove(file_path)
            else:
                print(f"文件 {filename} 下载失败")

    # 如果仍有下载失败的文件，将它们的名字保存到一个文件中
    if failed_files:
        with open("D:\CMORPH_data\failed_files.txt", "w") as fail_file:
            for file_path in failed_files:
                fail_file.write(file_path   'n')

    f.close()
    print(f'下载完成，成功下载 {success_count} 个文件，失败 {fail_count} 个文件.')
count file path url 数据
0 人点赞