Source code for dnppy.textio.text_data

__author__ = ["Jwely"]

import json


class text_data():
[docs] """ A text data object is a very simple template structure for passing text type data (usually lists of weather or climate data entries) around between functions. """ def __init__(self, headers = None, row_data = None): self.headers = headers # headers (1d list) self.row_data = row_data # data (2d list) self.col_data = {} # column wise data (dict) if row_data is not None: self._build_col_data() def __getitem__(self, index): """ used to return row data when using __getitem__ on this object type """ return self.row_data[index] @staticmethod def _enf_unique_headers(headers):
[docs] """ Appends digits to duplicate items in a list. Used to ensure each header is a unique string so a column-wise dictionary can be built. For example, a text file with headers ``["name", "tag", "tag", "tag"]`` will be changed to have headers ``["name", "tag1", "tag2", "tag3"]``. :param headers: list of string elements intended for use as headers. """ # build list of duplicate values duplicates = [] for i,header in enumerate(headers): if i > 0 and header in headers[:(i-1)] and not header in duplicates: duplicates.append(header) # for each duplicate name, number each occurrence for dup in duplicates: count = 0 for i,header in enumerate(headers): if header == dup: headers[i] = header + (str(count)) count += 1 return headers def _build_col_data(self):
[docs] """ Builds column wise data dictionary structure out of row_data. (row_data is a list of rows, where each row is a list, so row_data is a list of lists). Col data is a single dictionary, where the keys are the "headers" and the value is the list of all values in that column from the top down. """ temp_col = zip(*self.row_data) self.col_data = {} for i, col in enumerate(temp_col): self.col_data[self.headers[i]] = list(col) return self.col_data def _build_row_data(self):
[docs] """ Builds row wise data from existing column data. The opposite of _build_col_data. """ num_rows = len(self.col_data[self.headers[0]]) temp_rows = [] for i in range(num_rows): temp_rows.append([self.col_data[header][i] for header in self.headers]) self.row_data = temp_rows def write_csv(self, text_filepath, delim = ','):
[docs] """ writes the contents of this text file object as a CSV :param text_filepath: output filepath to write csv .txt :param delim: delimiter to use. defaults to comma """ # write the file as a csv with open(text_filepath, 'w+') as f: if self.headers: f.write(delim.join(self.headers) + '\n') for row in self.row_data: row = map(str,row) entry = delim.join(row) + '\n' f.write(entry) f.close() return def read_csv(self, text_filepath, delim = ',', has_headers = True):
[docs] """ simple default reader of a delimited file. Does not read fixed-width :param text_filepath: csv filepath to read from :param delim: delimiter to use, defaults to comma :param has_headers: Set "False" if csv file has no headers (this is bad, you should give your file headers) """ with open(text_filepath, 'r+') as f: self.row_data = [] if has_headers: headers = next(f).replace('\n','').split(delim) headers = [x for x in headers if x != ""] self.headers = self._enf_unique_headers(headers) else: self.headers = None for line in f: # check for a delimiter in the line is used to prevent # blank rows from getting into the row_data. if delim in line: entry = line.replace('\n','').split(delim) self.row_data.append(entry) f.close() return def write_json(self, json_filepath, row_wise = None, col_wise = None):
[docs] """ Writes the contents of this text data object to a json file. Note that json format does `not` support complex numbers with imaginary components. Also note that json formats are dictionary like, in that they preserve relationships, but do not display the list of relationships in any particular order. :param json_filepath: output filepath to write json :param row_wise: set to TRUE to save each row as its own structure :param col_wise: set to True to save each col as its own structure """ # structures json data if row_wise: json_dict = [self.headers] + self.row_data elif col_wise: json_dict = self._build_col_data() else: raise ValueError("Either 'row_wise' or 'col_wise' args must be set to True!") # write the file as a json with open(json_filepath, 'w+') as f: f.write(json.dumps(json_dict, json_filepath, indent = 4)) def read_json(self, json_filepath, row_wise = None, col_wise = None):
[docs] """ Reads the contents of this tdo from a json file created by the ``text_data.write_json()`` function. Please note that this text_data class is designed for use with tabular type data, so this should function will not read ALL json files in a satisfactory manner. Users wishing to read json files in a general sense should simply use the ``json`` module and invoke ``json.loads`` and ``json.dumps`` directly on their data. :param json_filepath: json filepath to read from :param row_wise: read json file objects in as rows :param col_wise: read json file objects in as columns """ json_data = open(json_filepath).read() data = json.loads(json_data) if row_wise: self.row_data = data[1:] self.headers = self._enf_unique_headers(data[0]) self._build_col_data() if col_wise: self.col_data = data self.headers = self._enf_unique_headers([key for key in self.col_data]) self._build_row_data() return # testing if __name__ == "__main__":
wd = text_data() wd.read_csv("test_data/weather_example.csv") wd.write_csv("test_data/weather_test_out.csv") wd.write_json("test_data/weather_test_cols.json", col_wise = True) wd.write_json("test_data/weather_test_rows.json", row_wise = True) del wd wd = text_data() wd.read_json("test_data/weather_test_rows.json", row_wise = True) wd.write_csv("test_data/weather_test_rows.csv") del wd wd = text_data() wd.read_json("test_data/weather_test_cols.json", col_wise = True) wd.write_csv("test_data/weather_test_cols.csv")