"""DCaaS Services XLS to CSV converter |
"""DCaaS Services XLS to CSV converter |
Converts the DCaaS XLS file to CSV while removing line breaks and other special characters |
Converts the DCaaS XLS file to CSV while removing line breaks and other special characters |
Alex Sadleir <maxious@lambdacomplex.org> |
Alex Sadleir <maxious@lambdacomplex.org> |
|
|
""" |
""" |
|
|
from xlrd import * |
from xlrd import * |
import unicodecsv |
import unicodecsv |
import glob |
import glob |
import codecs |
import codecs |
|
|
def cleanseValue(column,value): |
def cleanseValue(column,value): |
if isinstance(value, basestring): |
if isinstance(value, basestring): |
value = "".join([x if ord(x) < 128 else '' for x in value]) |
value = "".join([x if ord(x) < 128 else '' for x in value]) |
value = value.replace('<','>').replace('>','<').replace('\n','<br/>').replace('\r','<br/>') |
value = value.replace('<','>').replace('>','<').replace('\n','<br/>').replace('\r','<br/>') |
else: |
else: |
value = str(value) |
value = str(value) |
if column == 'ACNABN' and value != None: |
if column == 'ACNABN' and value != None: |
value = value.replace(' ','').replace(".0","").replace('"','').replace(",","") |
value = value.replace(' ','').replace(".0","").replace('"','').replace(",","") |
value = value.replace("ABN"," ").replace("ACN"," ").replace("/"," ").replace(":"," ") |
value = value.replace("ABN"," ").replace("ACN"," ").replace("/"," ").replace(":"," ") |
value = value.replace(" "," ").strip() |
value = value.replace(" "," ").strip() |
valueParts = value.split(' ') |
valueParts = value.split(' ') |
if len(valueParts) == 2: |
if len(valueParts) == 2: |
if len(valueParts[0]) > len(valueParts[1]): |
if len(valueParts[0]) > len(valueParts[1]): |
value = valueParts[0] |
value = valueParts[0] |
if len(valueParts[0]) < len(valueParts[1]): |
if len(valueParts[0]) < len(valueParts[1]): |
value = valueParts[1] |
value = valueParts[1] |
if value == "351222885465": |
if value == "351222885465": |
value = "35122885465" |
value = "35122885465" |
if value == "067060937171": |
if value == "067060937171": |
value = "67060937171" |
value = "67060937171" |
if value == "148179846": |
if value == "148179846": |
value = "49148179846" |
value = "49148179846" |
if value == "": |
if value == "": |
value = "62081001194" #verizon |
value = "62081001194" #verizon |
return value |
return value |
|
|
for filename in glob.glob("*.xls"): |
for filename in glob.glob("*.xls"): |
print filename |
print filename |
b = open_workbook(filename) |
b = open_workbook(filename) |
for sheet in b.sheet_names()[2:]: |
for sheet in b.sheet_names()[2:]: |
outputfile = filename.replace(".xls","") + sheet.replace(".","-").replace(" ","_").replace("&","and") + ".csv" |
outputfile = filename.replace(".xls","") + sheet.replace(".","-").replace(" ","_").replace("&","and") + ".csv" |
print outputfile |
print outputfile |
s = b.sheet_by_name(sheet) |
s = b.sheet_by_name(sheet) |
rbc = open(outputfile,'w') |
rbc = open(outputfile,'w') |
bcw = unicodecsv.writer(rbc,unicodecsv.excel) |
bcw = unicodecsv.writer(rbc,unicodecsv.excel) |
rowNum = 0 |
rowNum = 0 |
headerRow = [] |
headerRow = [] |
for row in range(s.nrows): |
for row in range(s.nrows): |
this_row = [] |
this_row = [] |
for col in range(s.ncols): |
for col in range(s.ncols): |
this_row.append(cleanseValue(headerRow[col] if col < len(headerRow) else None , s.cell_value(row,col))) |
this_row.append(cleanseValue(headerRow[col] if col < len(headerRow) else None , s.cell_value(row,col))) |
bcw.writerow(this_row) |
bcw.writerow(this_row) |
if rowNum == 0: |
if rowNum == 0: |
headerRow = this_row |
headerRow = this_row |
rowNum = rowNum + 1 |
rowNum = rowNum + 1 |
|
|