__author__ = ["Jwely"]
__all__ = ["rast_series"]
# local imports
from dnppy import core
from dnppy import raster
import time_series
# standard imports
import os
from datetime import datetime, timedelta
[docs]class rast_series(time_series.time_series):
"""
This is an extension of the time_series class
It is built to handle just like a time_series object, with
the simplification that the "row_data" attribute, is comprised
of nothing more than filepaths and filenames. All attributes
for tracking the time domain are the same.
some time_series methods for manipulating and viewing text data
will not apply to raster_series. It is unknown how they will behave.
"""
[docs] def from_directory(self, directory, fmt, fmt_unmask = None):
"""
creates a list of all rasters in a directory, then passes this
list to self.from_rastlist
see ``from_rastlist``
"""
filepaths = core.list_files(False, directory, ".tif")
filepaths = raster.enf_rastlist(filepaths)
self.from_rastlist(filepaths, fmt, fmt_unmask)
return
[docs] def from_rastlist(self, filepaths, fmt, fmt_unmask = None):
"""
Loads up a list of filepaths as a time series. If filenames contain
variant characters that are not related to date and time, a
"fmt_unmask" may be used to isolate the datestrings from the rest
of the filenames by character index.
For an example filename ``"MYD11A1.A2013001_day_clip_W05_C2014001_Avg_K_C_p_GSC.tif"``
We only want the part with the year and julian day in it ``"2013001"``
So we can use
.. code-block:: python
fmt = "%Y%j"
fmt_unmask = [10,11,12,13,14,15,16]
To indicate that we only want the 10th through 16th characters of the filenames
to be used for anchoring each raster in a physical time.
"""
self.headers = ['filepaths','filenames','fmt_names']
self.row_data = []
for filepath in filepaths:
head, filename = os.path.split(filepath)
if fmt_unmask is not None:
fmt_name = ""
for char_index in fmt_unmask:
fmt_name = fmt_name + filename[char_index]
else:
fmt_name = filename
self.row_data.append([filepath, filename, fmt_name])
self.build_col_data()
self.define_time('fmt_names', fmt)
print("Imported and interpreted {0} raster filepath datetimes!".format(len(self.row_data)))
return
[docs] def null_set_range(self, high_thresh = None, low_thresh = None, NoData_Value = None):
"""
Applies the dnppy.raster.null_set_range() function to every raster in rast_series
"""
raster.null_set_range(self.col_data['filepaths'],
high_thresh, low_thresh, NoData_Value)
return
[docs] def series_stats(self, outdir, saves = ['AVG','NUM','STD','SUM'],
low_thresh = None, high_thresh = None):
"""
Applies the dnppy.raster.many_stats() function to each
of the lowest level subsets of this rast_series.
"""
self.outdir = outdir
self.saves = saves
# only at the lowest discretezation level should stats be taken.
if self.subsetted:
for subset in self.subsets:
subset.series_stats(outdir, saves)
else:
raster.many_stats(self.col_data['filepaths'],
outdir, self.name, saves, low_thresh, high_thresh)
return
[docs] def make_subsets(self, subset_units, overlap_width = 0,
cust_center_time = False, discard_old = False):
"""
splits the time series into individual time chunks
used for taking advanced statistics, usually periodic
in nature. Also useful for exploiting temporal relationships
in a dataset for a variety of purposes including data
sanitation, periodic curve fitting, etc.
:param subset_units:
subset_units follows convention of fmt. For example:
%Y groups files by year
%m groups files by month
%j groups file by julian day of year
:param overlap_width:
this variable can be set to greater than 0
to allow "window" type statistics, so each subset may contain
data points from adjacent subsets.
overlap_width = 1 is like a window size of 3
overlap_width = 2 is like a window size of 5
WARNING: this function is imperfect for making subsets by months.
the possible lengths of a month are extremely variant, so sometimes
data points at the ends of a month will get placed in the adjacent
month. If you absolutely require accurate month subsetting,
you should use this function to subset by year, then use the
"group_bins" function to bin each year by month. so,
.. code-block:: python
ts.subset("%b") # subset by month
ts.subset("%Y") # subset by year
ts.group_bins("%b") # bin by month
:param cust_center_time:
Allows a custom center time to be used! This was added so that
days could be centered around a specific daily acquisition time.
for example, its often useful to define a day as
satellite data acquisition time +/- 12 hours.
if used, "cust_center_time" must be a datetime object!
:param discard_old:
By default, performing a subsetting on a time series that already
has subsets does not subset the master time series, but instead
the lowest level subsets. Setting "discard_old" to "True" will
discard all previous subsets of the time series and start subsetting
from scratch.
"""
if discard_old:
self.subsetted = False
self.subsets = []
if self.subsetted:
for subset in self.subsets:
subset.make_subsets(subset_units, overlap_width)
else:
self.subsetted = True
# sanitize overlap
if overlap_width < 0:
print("overlap_width must be 0 or more, setting it to 0!")
overlap_width = 0
# convert units into subset units and into terms of seconds
# timedelta objecs can only have units of days or seconds, so we use seconds
subset_units = self._fmt_to_units(subset_units)
# initial step width
step_width = self._units_to_seconds(subset_units)
if subset_units not in ['year','month','day','minute']:
raise Exception("Data is too high resolution to subset by {0}".format(subset_units))
print("Subsetting data by {0}".format(subset_units))
if self.time_dom == False:
raise Exception("must call 'define_time' method before taking subsets!")
# determine subset lists starting, end points and incriment
time_s = self.time_dom[0]
time_f = self.time_dom[-1]
# set up starttime with custom center times.
if cust_center_time:
print("using custom center time")
if subset_units == "month":
ustart = datetime(time_s.year, time_s.month,
cust_center_time.day, cust_center_time.hour,
cust_center_time.minute, cust_center_time.second,
cust_center_time.microsecond)
elif subset_units == "hour":
ustart = datetime(time_s.year, time_s.month, time_s.day, time_s.hour,
cust_center_time.minute, cust_center_time.second,
cust_center_time.microsecond)
elif subset_units == "minute":
ustart = datetime(time_s.year, time_s.month, time_s.day, time_s.hour, time_s.minute,
cust_center_time.second, cust_center_time.microsecond)
else: #subset_units == "day":
ustart = datetime(time_s.year, time_s.month, time_s.day,
cust_center_time.hour, cust_center_time.minute,
cust_center_time.second, cust_center_time.microsecond)
td = time_f - time_s
uend = cust_center_time + timedelta(seconds = td.total_seconds())
# otherwise, set the centers with no offest.
else:
ustart = self._center_datetime(time_s, subset_units)
uend = self._center_datetime(time_f, subset_units) + timedelta(seconds = step_width)
# Iterate through entire dataset one time step unit at a time.
delta = uend - ustart
center_time = ustart
while center_time < uend:
step_width = self._units_to_seconds(subset_units, center_time)
wind_seconds = step_width * (overlap_width + 0.5)
temp_data = []
for j, current_time in enumerate(self.time_dom):
dt = abs(center_time - current_time)
if dt.total_seconds() < wind_seconds:
temp_data.append(self.row_data[j])
# create the subset only if some data was found to populate it
if len(temp_data) > 0:
new_subset = rast_series(units = subset_units, parent = self)
new_subset.center_time = center_time
new_subset.from_list(temp_data, self.headers, self.time_header, self.fmt)
new_subset.define_time(self.time_header, self.fmt)
new_subset._name_as_subset()
self.subsets.append(new_subset)
center_time += timedelta(seconds = step_width)
return
[docs] def group_bins(self, fmt_units, overlap_width = 0, cyclical = True):
"""
Sorts the time series into time chunks by common bin_unit
used for grouping data rows together. For example, if one used
this function on a 5 year dataset with a bin_unit of month,
then the time_series would be subseted into 12 sets (1 for each
month), which each set containing all entries for that month,
regardless of what year they occurred in.
:param fmt_units:
%Y groups files by year
%m groups files by month
%j groups file by julian day of year
:param overlap_width:
similarly to "make_subsets" the "overlap_width" variable can be
set to greater than 1 to allow "window" type statistics, so
each subset may contain data points from adjacent subsets.
However, for group_bins, overlap_width must be an integer.
:param cyclical:
"cyclical" of "True" will allow end points to be considered adjacent.
So, for example, January will be considered adjacent to December,
day 1 will be considered adjacent to day 365.
"""
ow = int(overlap_width)
if self.subsetted:
for subset in self.subsets:
subset.group_bins(fmt_units, overlap_width, cyclical)
else:
self.subsetted = True
# ensure proper unit format is present
fmt = self._units_to_fmt(fmt_units)
units = self._fmt_to_units(fmt_units)
# set up cyclical parameters
if fmt == "%j": cylen = 365
if fmt == "%d": cylen = 365
if fmt == "%m": cylen = 12
if fmt == "%b": cylen = 12
# initialize a grouping array to idenfity row indices for each subset
grouping = [int(obj.strftime(fmt)) for obj in self.time_dom]
for i in xrange(min(grouping),max(grouping) + 1):
subset_units = self._fmt_to_units(fmt)
new_subset = rast_series( units = subset_units, parent = self)
# only take rows whos grouping is within ow of i
subset_rows = [j for j,g in enumerate(grouping) if g <= i+ow and g >=i-ow]
# fix endpoints
if cyclical:
if i <= ow:
subset_rows = subset_rows + [j for j,g in enumerate(grouping) if
i+ow >= cylen - g - cylen >= i-ow]
elif i >= cylen - ow:
subset_rows = subset_rows + [j for j,g in enumerate(grouping) if
i+ow >= cylen + g + cylen >= i-ow]
# grab row indeces from parent matrix to put in the subset
subset_data = [self.row_data[row] for row in subset_rows]
# run naming methods and definitions on the new subset
if not len(subset_data) == 0:
new_subset.from_list(subset_data, self.headers, self.time_header, self.fmt)
new_subset.define_time(self.time_header, self.fmt)
new_subset.center_time = self.time_dom[grouping.index(i)]
new_subset._name_as_subset(binned = True)
self.subsets.append(new_subset)
return
if __name__ == "__main__":
rs = rast_series()
rastdir = r"C:\Users\jwely\Desktop\troubleshooting\test\MOD10A1\frac_snow\FracSnowCover\Mosaic"
fmt = "%Y%j"
fmtmask = range(9,16)
rs.from_directory(rastdir, fmt, fmtmask)
statsdir = r"C:\Users\jwely\Desktop\troubleshooting\test\MOD10A1\statistics"
rs.null_set_range(high_thresh = 100, NoData_Value = 101)
rs.make_subsets("%d", overlap_width = 3)
rs.interrogate()
rs.series_stats(statsdir, low_thresh = 0.0, high_thresh = 100.0)