Compare commits

...

5 Commits

Author SHA1 Message Date
scoobybejesus a0f062cdf5 Script: newline caused append behavior (undesirable). 2020-11-14 20:35:25 -05:00
scoobybejesus 8ec8c2f302 Script: changed to /usr/bin/env python3. 2020-11-14 19:21:29 -05:00
scoobybejesus 3d2ab6ee34 README: mention Python helper script. 2020-11-14 17:35:28 -05:00
scoobybejesus 1c20ff1329 Add helper script for sanitizing CSV input file. 2020-11-14 17:33:29 -05:00
scoobybejesus beeee221f3 README: modify Excel warning; include workaround. 2020-11-14 17:11:00 -05:00
2 changed files with 170 additions and 5 deletions

View File

@ -23,6 +23,9 @@ containing the user's entire cryptocurrency transaction history, the software wi
*The tracking isn't pooled by `ticker`. Rather, it's tracked at the account/wallet level.
There is a helper Python script at the root of the repo that will assist you in sanitizing your CSV file
so it can be successfully imported into `cryptools`.
---
### Features
@ -52,18 +55,20 @@ when appreciated cryptocurrency was used to make a tax-deductible charitable con
* Precision is limited to eight decimal places. Additional digits will be stripped during
import and may cause unintended rounding issues.
* Microsoft Excel. Don't let this cause you to bang your head against a wall.
* Microsoft Excel. Don't let Excel cause you to bang your head against a wall.
Picture this scenario. You keep your transactions for your input file in a Google Sheet,
and you're meticulous about making sure it's perfect.
You then download it as a CSV file and import it into `cryptools`.
You then download it as a CSV file and import it into `cryptools`.
It works perfectly, and you have all your reports.
Then you realize you'd like to quickly change a memo and re-run the reports, so you open the CSV file in Excel and edit it.
Then you import it into `cryptools` again and the program panics!
What happened is most likely that Excel changed the rounding of your precise decimals underneath you!
As a result, it appears your input file has been clearly incorrectly prepared
because it appears that you're spending more coins than you actually owned at that time.
Depending on the rounding, `cryptools` may think your input file has been incorrectly prepared
because you've supposedly spent more coins than you actually owned at that time.
`Cryptools` does not let you spend coins you don't own, and it will exit upon finding such a condition.
The program is right, and your data is right, but Excel modified your data, and it can be infuriating when the program crashes for "no reason."
The program is right, and your data is right, but Excel modified your data, so the program crashed for "no reason."
The solution is to have Excel already open, then in the ribbon's Data tab, you'll import your CSV file "From Text."
You'll choose Delimited, and Comma, and then highlight every column and choose Text as the data type.
## Installation

160
clean_input_csv.py Executable file
View File

@ -0,0 +1,160 @@
#!/usr/bin/env python3
## Purpose: Allows user to keep additional data in their CSV Input File to increase its usefulness and
## enhance readability, yet be able to properly format it prior to importing into `cryptools`.
## e.g.:
## -Keep an additional first column for flagging/noting important transactions
## -Keep additional columns for tracking a running balance
## -Rows beneath transactions for life-to-date totals and other calculations and notes
## -Ability to use number formatting with parenthesis for negative numbers and commas
## -This script will change (1,000.00) to 1000.00
##
## If a column doesn't have a header, this script will exclude it from the sanitized output.
## Similarly, this script will exclude transaction rows missing data in either of the first
## two fields of the row.
## Usage:
# 1. Export/Save crypto activity as csv
# 2. Move the csv file to your desired directory
# 3. Rename file to <unedited>.csv (see variable below)
# 4. Build/run this file in an editor or on command line (from same directory), creating the input file
# 5. Import the input file into cryptools
import csv
import re
import os
unedited = "DigiTrnx.csv" # To be replaced with a launch arg, presumably
stage1 = "stage1.csv"
## First, writes all header rows. Then attempts to write all transaction rows.
## In the transaction rows, if it finds blank/empty transaction date or proceeds fields,
## it discards the row.
## This allows notes/sums/calculations/etc under the transaction rows to be discarded
with open(unedited) as fin, open(stage1, 'a') as fout:
rdr = csv.reader(fin)
wtr = csv.writer(fout)
header = next(rdr)
header2 = next(rdr)
header3 = next(rdr)
header4 = next(rdr)
wtr.writerow(header)
wtr.writerow(header2)
wtr.writerow(header3)
wtr.writerow(header4)
for row in rdr:
if row[0] == "" or row[1] == "":
pass
else:
wtr.writerow(row)
stage2 = "stage2.csv"
## Iterates over the fields in the first header row to search for empty/blank cells.
## Keeps a list of every column index that does contain data, and disregards all the
## indices for columns with a blank.
## Using the indicies of valid columns, writes a new CSV file using only valid columns.
## This is useful when the input file is also used to manually keep a running tally or
## columns with additional notes, but which must be discarded to prepare a proper
## CSV input file.
with open(stage1) as fin, open(stage2, 'a') as fout:
rdr = csv.reader(fin)
wtr = csv.writer(fout)
header = next(rdr)
header2 = next(rdr)
header3 = next(rdr)
header4 = next(rdr)
colListKept = []
for col in header:
if col == "":
pass
else:
colListKept.append(header.index(col))
output = [v for (i,v) in enumerate(header) if i in colListKept]
wtr.writerow(output)
output = [v for (i,v) in enumerate(header2) if i in colListKept]
wtr.writerow(output)
output = [v for (i,v) in enumerate(header3) if i in colListKept]
wtr.writerow(output)
output = [v for (i,v) in enumerate(header4) if i in colListKept]
wtr.writerow(output)
for row in rdr:
output = [v for (i,v) in enumerate(row) if i in colListKept]
wtr.writerow(output)
stage3 = "InputFile-pycleaned.csv"
## Performs final formatting changes to ensure values can be successfully parsed.
## Numbers must have commas removed. Negative numbers must have parentheses replaced
## with a minus sign. Could also be used to substitute the date separation character.
## i.e., (1.01) -> -1.01 (1,000.00) -> -1000.00
with open(stage2) as fin, open(stage3, 'w') as fout:
rdr = csv.reader(fin, quoting=csv.QUOTE_ALL)
wtr = csv.writer(fout)
header = next(rdr)
header2 = next(rdr)
header3 = next(rdr)
header4 = next(rdr)
wtr.writerow(header)
wtr.writerow(header2)
wtr.writerow(header3)
wtr.writerow(header4)
for row in rdr:
listRow = []
for field in row:
fieldStr = str(field) # cast as string, just so there's no funny business
try:
# Handles negative numbers
if fieldStr[0] == "(":
fieldStr = fieldStr.replace('(','-').replace(')', '').replace(',', '')
listRow.append(fieldStr)
continue
# Uncomment the below and modify as necessary if you want to change date formatting
# elif re.search(r'\d\d-\d\d-\d\d',fieldStr):# Find dates and change formatting
# fieldStr = fieldStr.replace('-', '/')
# listRow.append(fieldStr)
# continue
# Handle commas in remaining fields
else:
try:
# if you remove commas from a string and are able to convert to float...
fieldStr_test = fieldStr.replace(',', '')
fieldStr_float = float(fieldStr_test)
# then it is definitely a positive number, so remove the comma.
fieldStr = fieldStr.replace(',', '')
listRow.append(fieldStr)
continue
except: # If the 'try' block fails, it's a memo, not a number, so leave any commas
listRow.append(fieldStr)
continue
except: # If the `try` block fails, it's a blank/empty string
listRow.append(fieldStr)
continue
wtr.writerow(listRow)
os.remove(stage1)
os.remove(stage2)
print("Input file ready")