Source code for dnppy.download.fetch_GPM_IMERG

__author__ = ['jwely']
__all__ = ["fetch_GPM_IMERG"]

from list_ftp import list_ftp
from download_url import download_url
from datetime import datetime, timedelta
import os


[docs]def fetch_GPM_IMERG(start_dto, end_dto, outdir, product = "gis", time_res = "1day"): """ Fetches 30 minute resolution GPM IMERG data from an ftp server. Several restrictions exist for this relatively new dataset, please read in the input section carefully. http://pps.gsfc.nasa.gov/Documents/GPM_Data_Info_140616.pdf :param start_dto: datetime object for starting time of study boundary :param end_dto: datetime object for ending time of study boundary :param outdir: output directory to save the data :param product: either "early" , "late" or "final" for full HDF5 data stacks of the respective runs, which are all at 30minute resolutions. OR product can be set equal to "gis" (default) to find only tif averages of the precipitation estimates. This gis tif data is ONLY provided for data less than one year old. :param time_res: if "product" is set to "gis", specify what time average period you want. options are "30min", "3hr", "1day", "3day", "7day". Defaults to "1day" :return: Returns a list of filepaths to freshly downloaded files learn more at [http://pmm.nasa.gov/data-access/downloads/gpm] """ # set up empty list of downloaded filepaths on local dir download_list = [] # username and password info, should eventually be some DEVELOP credential. # this information is not at all sensitive. login = "jeff.ely.08@gmail.com" # special filtering for gis type tif data to minimize data representation overlap. if product == "gis": if time_res == "30min": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 30)] elif time_res == "3hr": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 180)] else: ok_minutes = "0000" # assemble address information pps_server = r"ftp://jsimpson.pps.eosdis.nasa.gov" # set product directory prod_server = "/".join(["NRTPUB/imerg", product]) # log in and list available month folders. foldnames, foldpaths = list_ftp(site = pps_server, dir = prod_server, username = login, password = login) # perform a simple quick filtering of folders that definitely don't have data we want. for foldname in foldnames: try: int(foldname) except: foldnames.remove(foldname) for foldname in foldnames: print("exploring directory '{0}'".format(foldname)) subdir = "/".join([prod_server, foldname]) filenames, filepaths = list_ftp(site = pps_server, dir = subdir, username = login, password = login) for filepath in filepaths: filename = os.path.basename(filepath) finfo = filename.split(".") prod = finfo[3] date_cords = finfo[4] minutes = finfo[5] time = finfo[7] date_str = date_cords.split("-")[0] date = datetime.strptime(date_str, "%Y%m%d") + timedelta(minutes = int(minutes)) # see if this file meets criteria for download good_date = start_dto <= date <= end_dto if product == "gis": good_minutes = minutes in ok_minutes good_time = time_res == time else: good_minutes = True good_time = True # download the files if good_date and good_time and good_minutes: outname = os.path.join(outdir, date.strftime("%Y-%m-%d"), filename) download_url(filepath, outname, username = login, password = login) print("saved '{0}' in '{1}'".format(filename, outdir)) download_list.append(outname) return download_list
if __name__ == "__main__": start = datetime(2015, 8, 1) end = datetime(2015, 12, 31) testdir = r"C:\Users\jwely\Desktop\troubleshooting\GPM" prod = "late" time = "3day" dlist = fetch_GPM_IMERG(start, end, testdir, product = prod, time_res = time)