Source code for dnppy.download.fetch_Landsat8

__author__ = ['djjensen', 'jwely']

from dnppy import textio
from dnppy import core

from download_url import download_url

import datetime
import urllib
import site
import os
import gzip

__all__ = ["fetch_Landsat8"]


[docs]def fetch_Landsat8(path_row_pairs, start_dto, end_dto, outdir,
                   max_cloud_cover = 100, bands = None):
    """
    This function downloads all landsat 8 tiles for the input path_row_pairs and
    within the bounds of the start_dto and the end_dto, and saves them to the output directory.
    It uses the amazon web service at
    [https://aws.amazon.com/public-data-sets/landsat/]

    :param path_row_pairs:  tupled integer values of path,row coordinates of tile. may be a list of several                                 tuples. example: [(1,1),(1,2)]
    :param start_dto:       python datetime object of start date of range
    :param end_dto:         python datetime object of end date of range
    :param outdir:          the folder to save the output landsat files in
    :param max_cloud_cover: maximum percent cloud cover that is acceptable to download the file.

    :return output_filelist: A list of tile names downloaded by this function.
    """

    # fetch an updated scene list with custom function.
    scene_list = fetch_Landsat8_scene_list()

    path_row_pairs = core.enf_list(path_row_pairs)
    output_tilenames = []

    for path_row_pair in path_row_pairs:
        #format input strings
        path, row = path_row_pair
        path_str  = str(path).zfill(3)
        row_str   = str(row).zfill(3)

        # loop through the scene list
        # if the date for the given path/row scene is within the date range, download it with landsat_8_scene

        for row in scene_list:
            tilename    = row[0]
            datestring  = row[1].split(".")[0] # removes fractional seconds from datestring
            date        = datetime.datetime.strptime(datestring, "%Y-%m-%d %H:%M:%S")
            pathrow_id  = "LC8{0}{1}".format(path_str, row_str)
            cloud_cover = float(row[2])

            if cloud_cover < max_cloud_cover:
                if pathrow_id in row[0]:
                    if start_dto <=  date  <= end_dto:
                        amazon_url = row[-1]
                        fetch_Landsat8_tile(amazon_url, tilename, outdir, bands)
                        output_tilenames.append(os.path.join(outdir, tilename))

    print("Finished retrieving landsat 8 data!")
    return output_tilenames



def fetch_Landsat8_tile(amazon_url, tilename, outdir, bands = None):
    """
    This function makes use of the amazon web service hosted Landsat 8 OLI data.
    It recieves an amazon web url for a single landsat tile, and downloads the desired files

    :param amazon_url:  url to amazons page hosting these landsat tiles
    :param tilename:    landsat tile name
    :param outdir:      output directory to place landsat data
    :param bands:       list of bands to download when not all are desired, options include
                        any of [1,2,3,4,5,6,7,8,9,10,11,"QA"]. The MTL file is ALWAYS downloaded.

    :return tilepath:   returns a filepath to the new landsat tile folder with .TIFs in it
    """

    if bands is None:
        bands = map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, "QA"])
    else:
        bands = map(str, (core.enf_list(bands)))

    # create the scene name from the input parameters and use that to generate the scene's unique url
    connection = urllib.urlopen(amazon_url)
    page       = connection.read().split("\n")

    print("Downloading landsat tile {0}".format(tilename))

    for line in page:
        if "<li><a href=" in line:

            # pull filename from html code
            filename = line.split('"')[1]

            # pull out band information
            band_id   = filename.replace(tilename + "_","").split(".")[0].replace("B","")
            good_band = band_id in bands
            mtl_file  = "MTL" in band_id

            # download desired files.
            if good_band or mtl_file:
                link     = amazon_url.replace("index.html",filename)
                savename = os.path.join(outdir, tilename, filename)

                # try twice if filepath doesn't already exist
                if not os.path.isfile(savename):
                    try:    download_url(link, savename)
                    except: download_url(link, savename)
                    print("\tDownloaded {0}".format(filename))
                else:
                    print("\t Found {0}".format(filename))

    return os.path.join(outdir, tilename)


def fetch_Landsat8_scene_list():
    """
    Simple downloads and extracts the most recent version of the scene_list
    text file for reference

        http://landsat-pds.s3.amazonaws.com/scene_list.gz

    :return scene_list_text_data:   returns a text data object with all
                                    the data on scene inventory on amazon WS.
    """

    print("Updating scene list")
    # define save path for new scene list
    directory  = site.getsitepackages()[1]
    gz_path    = "{0}/dnppy/landsat/metadata/scene_list.gz".format(directory)
    txt_path   = "{0}/dnppy/landsat/metadata/scene_list.txt".format(directory)

    # download then extract the gz file to a txt file.
    download_url("http://landsat-pds.s3.amazonaws.com/scene_list.gz", gz_path)
    with gzip.open(gz_path,'rb') as gz:
        content = gz.read()
        with open(txt_path, 'wb+') as f:
            f.writelines(content)

    # build a new text data object from the fresh scene list
    scene_list_text_data = textio.text_data()
    scene_list_text_data.read_csv(txt_path, delim = ",", has_headers = True)

    return scene_list_text_data


if __name__ == "__main__":

    aoutdir = r"D:\dh_dev\WA_test_data\44_27"
    start = datetime.datetime(2015, 5, 1)
    end   = datetime.datetime(2015, 7, 18)
    path_row_pairs = (44, 27)

    fetch_Landsat8(path_row_pairs, start, end, aoutdir, bands = [2, 3, 4, 5, 6, 7, 10, 11])