Source code for dnppy.download.fetch_Landsat8

__author__ = ['djjensen', 'jwely']

from dnppy import textio
from dnppy import core

from download_url import download_url

import datetime
import urllib
import site
import os
import gzip

__all__ = ["fetch_Landsat8"]


[docs]def fetch_Landsat8(path_row_pairs, start_dto, end_dto, outdir, max_cloud_cover = 100, bands = None): """ This function downloads all landsat 8 tiles for the input path_row_pairs and within the bounds of the start_dto and the end_dto, and saves them to the output directory. It uses the amazon web service at [https://aws.amazon.com/public-data-sets/landsat/] :param path_row_pairs: tupled integer values of path,row coordinates of tile. may be a list of several tuples. example: [(1,1),(1,2)] :param start_dto: python datetime object of start date of range :param end_dto: python datetime object of end date of range :param outdir: the folder to save the output landsat files in :param max_cloud_cover: maximum percent cloud cover that is acceptable to download the file. :return output_filelist: A list of tile names downloaded by this function. """ # fetch an updated scene list with custom function. scene_list = fetch_Landsat8_scene_list() path_row_pairs = core.enf_list(path_row_pairs) output_tilenames = [] for path_row_pair in path_row_pairs: #format input strings path, row = path_row_pair path_str = str(path).zfill(3) row_str = str(row).zfill(3) # loop through the scene list # if the date for the given path/row scene is within the date range, download it with landsat_8_scene for row in scene_list: tilename = row[0] datestring = row[1].split(".")[0] # removes fractional seconds from datestring date = datetime.datetime.strptime(datestring, "%Y-%m-%d %H:%M:%S") pathrow_id = "LC8{0}{1}".format(path_str, row_str) cloud_cover = float(row[2]) if cloud_cover < max_cloud_cover: if pathrow_id in row[0]: if start_dto <= date <= end_dto: amazon_url = row[-1] fetch_Landsat8_tile(amazon_url, tilename, outdir, bands) output_tilenames.append(os.path.join(outdir, tilename)) print("Finished retrieving landsat 8 data!") return output_tilenames
def fetch_Landsat8_tile(amazon_url, tilename, outdir, bands = None): """ This function makes use of the amazon web service hosted Landsat 8 OLI data. It recieves an amazon web url for a single landsat tile, and downloads the desired files :param amazon_url: url to amazons page hosting these landsat tiles :param tilename: landsat tile name :param outdir: output directory to place landsat data :param bands: list of bands to download when not all are desired, options include any of [1,2,3,4,5,6,7,8,9,10,11,"QA"]. The MTL file is ALWAYS downloaded. :return tilepath: returns a filepath to the new landsat tile folder with .TIFs in it """ if bands is None: bands = map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, "QA"]) else: bands = map(str, (core.enf_list(bands))) # create the scene name from the input parameters and use that to generate the scene's unique url connection = urllib.urlopen(amazon_url) page = connection.read().split("\n") print("Downloading landsat tile {0}".format(tilename)) for line in page: if "<li><a href=" in line: # pull filename from html code filename = line.split('"')[1] # pull out band information band_id = filename.replace(tilename + "_","").split(".")[0].replace("B","") good_band = band_id in bands mtl_file = "MTL" in band_id # download desired files. if good_band or mtl_file: link = amazon_url.replace("index.html",filename) savename = os.path.join(outdir, tilename, filename) # try twice if filepath doesn't already exist if not os.path.isfile(savename): try: download_url(link, savename) except: download_url(link, savename) print("\tDownloaded {0}".format(filename)) else: print("\t Found {0}".format(filename)) return os.path.join(outdir, tilename) def fetch_Landsat8_scene_list(): """ Simple downloads and extracts the most recent version of the scene_list text file for reference http://landsat-pds.s3.amazonaws.com/scene_list.gz :return scene_list_text_data: returns a text data object with all the data on scene inventory on amazon WS. """ print("Updating scene list") # define save path for new scene list directory = site.getsitepackages()[1] gz_path = "{0}/dnppy/landsat/metadata/scene_list.gz".format(directory) txt_path = "{0}/dnppy/landsat/metadata/scene_list.txt".format(directory) # download then extract the gz file to a txt file. download_url("http://landsat-pds.s3.amazonaws.com/scene_list.gz", gz_path) with gzip.open(gz_path,'rb') as gz: content = gz.read() with open(txt_path, 'wb+') as f: f.writelines(content) # build a new text data object from the fresh scene list scene_list_text_data = textio.text_data() scene_list_text_data.read_csv(txt_path, delim = ",", has_headers = True) return scene_list_text_data if __name__ == "__main__": aoutdir = r"D:\dh_dev\WA_test_data\44_27" start = datetime.datetime(2015, 5, 1) end = datetime.datetime(2015, 7, 18) path_row_pairs = (44, 27) fetch_Landsat8(path_row_pairs, start, end, aoutdir, bands = [2, 3, 4, 5, 6, 7, 10, 11])