import csv def csv_to_list(csv_file, delimiter=','): """ Reads in a CSV file and returns the contents as list, where every row is stored as a sublist, and each element in the sublist represents 1 cell in the table. """ with open(csv_file, 'r') as csv_con: reader = csv.reader(csv_con, delimiter=delimiter) return list(reader) csv_cont = csv_to_list('../Data/test.csv') print('first 3 rows:') for row in range(3): print(csv_cont[row]) def print_csv(csv_content): """ Prints CSV file to standard output.""" print(50*'-') for row in csv_content: row = [str(e) for e in row] print('\t'.join(row)) print(50*'-') csv_cont = csv_to_list('../Data/test.csv') print('\n\nOriginal CSV file:') print_csv(csv_cont) def convert_cells_to_floats(csv_cont): """ Converts cells to floats if possible (modifies input CSV content list). """ for row in range(len(csv_cont)): for cell in range(len(csv_cont[row])): try: csv_cont[row][cell] = float(csv_cont[row][cell]) except ValueError: pass print('first 3 rows:') for row in range(3): print(csv_cont[row]) import operator def sort_by_column(csv_cont, col, reverse=False): """ Sorts CSV contents by column name (if col argument is type ) or column index (if col argument is type ). """ header = csv_cont[0] body = csv_cont[1:] if isinstance(col, str): col_index = header.index(col) else: col_index = col body = sorted(body, key=operator.itemgetter(col_index), reverse=reverse) body.insert(0, header) return body csv_cont = csv_to_list('../Data/test.csv') print('\n\nOriginal CSV file:') print_csv(csv_cont) print('\n\nCSV sorted by column "column3":') convert_cells_to_floats(csv_cont) csv_sorted = sort_by_column(csv_cont, 'column3') print_csv(csv_sorted) def mark_minmax(csv_cont, col, mark_max=True, marker='*'): """ Sorts a list of CSV contents by a particular column (see sort_by_column function). Puts a marker on the maximum value if mark_max=True, or puts a marker on the minimum value mark_max=False (modifies input CSV content list). """ sorted_csv = sort_by_column(csv_cont, col, reverse=mark_max) if isinstance(col, str): col_index = sorted_csv[0].index(col) else: col_index = col sorted_csv[1][col_index] = str(sorted_csv[1][col_index]) + marker return None def mark_all_col(csv_cont, mark_max=True, marker='*'): """ Marks all maximum (if mark_max=True) or minimum (if mark_max=False) values in all columns of a CSV contents list - except the first column. Returns a new list that is sorted by the names in the first column (modifies input CSV content list). """ for c in range(1, len(csv_cont[0])): mark_minmax(csv_cont, c, mark_max, marker) marked_csv = sort_by_column(csv_cont, 0, False) return marked_csv import copy csv_cont = csv_to_list('../Data/test.csv') csv_marked = copy.deepcopy(csv_cont) convert_cells_to_floats(csv_marked) mark_all_col(csv_marked, mark_max=False, marker='*') print_csv(csv_marked) print('*: min-value') def write_csv(dest, csv_cont): """ Writes a comma-delimited CSV file. """ with open(dest, 'w') as out_file: writer = csv.writer(out_file, delimiter=',') for row in csv_cont: writer.writerow(row) write_csv('../Data/test_marked.csv', csv_marked) csv_cont = csv_to_list('../Data/test_marked.csv') print('\n\nWritten CSV file:') print_csv(csv_cont) import os in_dir = '../Data' out_dir = '../Data/processed' csvs = [ (os.path.join(in_dir, csv), os.path.join(out_dir, csv)) for csv in os.listdir(in_dir) if csv.endswith('.csv') ] for i in csvs: print(i) def process_csv(csv_in, csv_out): """ Takes an input- and output-filename of an CSV file and marks minimum values for every column. """ csv_cont = csv_to_list(csv_in) csv_marked = copy.deepcopy(csv_cont) convert_cells_to_floats(csv_marked) mark_all_col(csv_marked, mark_max=False, marker='*') write_csv(csv_out, csv_marked) for inout in csvs: process_csv(inout[0], inout[1])