mirror of
https://github.com/scoobybejesus/cryptools.git
synced 2025-01-18 03:10:15 +00:00
Add helper script for sanitizing CSV input file.
This commit is contained in:
parent
beeee221f3
commit
1c20ff1329
161
clean_input_csv.py
Executable file
161
clean_input_csv.py
Executable file
@ -0,0 +1,161 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
## Purpose: Allows user to keep additional data in their CSV Input File to increase its usefulness and
|
||||
## enhance readability, yet be able to properly format it prior to importing into `cryptools`.
|
||||
## e.g.:
|
||||
## -Keep an additional first column for flagging/noting important transactions
|
||||
## -Keep additional columns for tracking a running balance
|
||||
## -Rows beneath transactions for life-to-date totals and other calculations and notes
|
||||
## -Ability to use number formatting with parenthesis for negative numbers and commas
|
||||
## -This script will change (1,000.00) to 1000.00
|
||||
##
|
||||
## If a column doesn't have a header, this script will exclude it from the sanitized output.
|
||||
## Similarly, this script will exclude transaction rows missing data in either of the first
|
||||
## two fields of the row.
|
||||
|
||||
## Usage:
|
||||
|
||||
# 1. Export/Save crypto activity as csv
|
||||
# 2. Move the csv file to your desired directory
|
||||
# 3. Rename file to <unedited>.csv (see variable below)
|
||||
# 4. Build/run this file in an editor or on command line (from same directory), creating the input file
|
||||
# 5. Import the input file into cryptools
|
||||
|
||||
import csv
|
||||
import re
|
||||
import os
|
||||
|
||||
unedited = "DigiTrnx.csv" # To be replaced with a launch arg, presumably
|
||||
|
||||
stage1 = "stage1.csv"
|
||||
|
||||
## First, writes all header rows. Then attempts to write all transaction rows.
|
||||
## In the transaction rows, if it finds blank/empty transaction date or proceeds fields,
|
||||
## it discards the row.
|
||||
|
||||
## This allows notes/sums/calculations/etc under the transaction rows to be discarded
|
||||
|
||||
with open(unedited) as fin, open(stage1, 'a') as fout:
|
||||
rdr = csv.reader(fin)
|
||||
wtr = csv.writer(fout)
|
||||
header = next(rdr)
|
||||
header2 = next(rdr)
|
||||
header3 = next(rdr)
|
||||
header4 = next(rdr)
|
||||
wtr.writerow(header)
|
||||
wtr.writerow(header2)
|
||||
wtr.writerow(header3)
|
||||
wtr.writerow(header4)
|
||||
|
||||
for row in rdr:
|
||||
if row[0] == "" or row[1] == "":
|
||||
pass
|
||||
else:
|
||||
wtr.writerow(row)
|
||||
|
||||
stage2 = "stage2.csv"
|
||||
|
||||
## Iterates over the fields in the first header row to search for empty/blank cells.
|
||||
## Keeps a list of every column index that does contain data, and disregards all the
|
||||
## indices for columns with a blank.
|
||||
|
||||
## Using the indicies of valid columns, writes a new CSV file using only valid columns.
|
||||
|
||||
## This is useful when the input file is also used to manually keep a running tally or
|
||||
## columns with additional notes, but which must be discarded to prepare a proper
|
||||
## CSV input file.
|
||||
|
||||
with open(stage1) as fin, open(stage2, 'a') as fout:
|
||||
rdr = csv.reader(fin)
|
||||
wtr = csv.writer(fout)
|
||||
header = next(rdr)
|
||||
header2 = next(rdr)
|
||||
header3 = next(rdr)
|
||||
header4 = next(rdr)
|
||||
|
||||
colListKept = []
|
||||
|
||||
for col in header:
|
||||
if col == "":
|
||||
pass
|
||||
else:
|
||||
colListKept.append(header.index(col))
|
||||
|
||||
output = [v for (i,v) in enumerate(header) if i in colListKept]
|
||||
wtr.writerow(output)
|
||||
|
||||
output = [v for (i,v) in enumerate(header2) if i in colListKept]
|
||||
wtr.writerow(output)
|
||||
|
||||
output = [v for (i,v) in enumerate(header3) if i in colListKept]
|
||||
wtr.writerow(output)
|
||||
|
||||
output = [v for (i,v) in enumerate(header4) if i in colListKept]
|
||||
wtr.writerow(output)
|
||||
|
||||
for row in rdr:
|
||||
output = [v for (i,v) in enumerate(row) if i in colListKept]
|
||||
wtr.writerow(output)
|
||||
|
||||
|
||||
stage3 = "InputFile-pycleaned.csv"
|
||||
|
||||
## Performs final formatting changes to ensure values can be successfully parsed.
|
||||
## Numbers must have commas removed. Negative numbers must have parentheses replaced
|
||||
## with a minus sign. Could also be used to substitute the date separation character.
|
||||
|
||||
## i.e., (1.01) -> -1.01 (1,000.00) -> -1000.00
|
||||
|
||||
with open(stage2) as fin, open(stage3, 'w', newline='') as fout:
|
||||
|
||||
rdr = csv.reader(fin, quoting=csv.QUOTE_ALL)
|
||||
wtr = csv.writer(fout)
|
||||
|
||||
header = next(rdr)
|
||||
header2 = next(rdr)
|
||||
header3 = next(rdr)
|
||||
header4 = next(rdr)
|
||||
wtr.writerow(header)
|
||||
wtr.writerow(header2)
|
||||
wtr.writerow(header3)
|
||||
wtr.writerow(header4)
|
||||
|
||||
for row in rdr:
|
||||
listRow = []
|
||||
for field in row:
|
||||
fieldStr = str(field) # cast as string, just so there's no funny business
|
||||
try:
|
||||
# Handles negative numbers
|
||||
if fieldStr[0] == "(":
|
||||
fieldStr = fieldStr.replace('(','-').replace(')', '').replace(',', '')
|
||||
listRow.append(fieldStr)
|
||||
continue
|
||||
|
||||
# Uncomment the below and modify as necessary if you want to change date formatting
|
||||
# elif re.search(r'\d\d-\d\d-\d\d',fieldStr):# Find dates and change formatting
|
||||
# fieldStr = fieldStr.replace('-', '/')
|
||||
# listRow.append(fieldStr)
|
||||
# continue
|
||||
|
||||
# Handle commas in remaining fields
|
||||
else:
|
||||
try:
|
||||
# if you remove commas from a string and are able to convert to float...
|
||||
fieldStr_test = fieldStr.replace(',', '')
|
||||
fieldStr_float = float(fieldStr_test)
|
||||
# then it is definitely a positive number, so remove the comma.
|
||||
fieldStr = fieldStr.replace(',', '')
|
||||
listRow.append(fieldStr)
|
||||
continue
|
||||
except: # If the 'try' block fails, it's a memo, not a number, so leave any commas
|
||||
listRow.append(fieldStr)
|
||||
continue
|
||||
except: # If the `try` block fails, it's a blank/empty string
|
||||
listRow.append(fieldStr)
|
||||
continue
|
||||
wtr.writerow(listRow)
|
||||
|
||||
os.remove(stage1)
|
||||
os.remove(stage2)
|
||||
|
||||
print("Input file ready")
|
Loading…
Reference in New Issue
Block a user