gitphp 0.2.9.1 :: dcaas.git/blobdiff

blob:a/data/xls2csv.py -> blob:b/data/xls2csv.py

"""DCaaS Services XLS to CSV converter	"""DCaaS Services XLS to CSV converter
Converts the DCaaS XLS file to CSV while removing line breaks and other special characters	Converts the DCaaS XLS file to CSV while removing line breaks and other special characters
Alex Sadleir <maxious@lambdacomplex.org>	Alex Sadleir <maxious@lambdacomplex.org>

"""	"""

from xlrd import *	from xlrd import *
import unicodecsv	import unicodecsv
import glob	import glob
import codecs	import codecs

def cleanseValue(column,value):	def cleanseValue(column,value):
if isinstance(value, basestring):	if isinstance(value, basestring):
value = "".join([x if ord(x) < 128 else '' for x in value])	value = "".join([x if ord(x) < 128 else '' for x in value.replace(u"\u2022", u"\n•")])
value = value.replace('<','>').replace('>','<').replace('\n','<br/>').replace('\r','<br/>').strip()	value = value.replace('<','>').replace('>','<').replace('\n','<br/>').replace('\r','<br/>').strip()
else:	else:
value = str(value).strip()	value = str(value).strip()
if column == 'ACNABN' and value != None:	if column == 'ACNABN' and value != None:
value = value.replace(' ','').replace(".0","").replace('"','').replace(",","")	value = value.replace(' ','').replace(".0","").replace('"','').replace(",","")
value = value.replace("ABN"," ").replace("ACN"," ").replace("/"," ").replace(":"," ")	value = value.replace("ABN"," ").replace("ACN"," ").replace("/"," ").replace(":"," ")
value = value.replace(" "," ").strip()	value = value.replace(" "," ").strip()
valueParts = value.split(' ')	valueParts = value.split(' ')
if len(valueParts) == 2:	if len(valueParts) == 2:
if len(valueParts[0]) > len(valueParts[1]):	if len(valueParts[0]) > len(valueParts[1]):
value = valueParts[0]	value = valueParts[0]
if len(valueParts[0]) < len(valueParts[1]):	if len(valueParts[0]) < len(valueParts[1]):
value = valueParts[1]	value = valueParts[1]
if value == "351222885465":	if value == "351222885465":
value = "35122885465"	value = "35122885465"
if value == "067060937171":	if value == "067060937171":
value = "67060937171"	value = "67060937171"
if value == "148179846":	if value == "148179846":
value = "49148179846"	value = "49148179846"
if value == "":	if value == "":
value = "62081001194" #verizon	value = "62081001194" #verizon
return value	return value

for filename in glob.glob("*.xls"):	for filename in glob.glob("*.xls"):
print filename	print filename
b = open_workbook(filename)	b = open_workbook(filename)
for sheet in b.sheet_names()[2:]:	for sheet in b.sheet_names()[2:]:
outputfile = filename.replace(".xls","") + sheet.replace(".","-").replace(" ","_").replace("&","and") + ".csv"	outputfile = filename.replace(".xls","") + sheet.replace(".","-").replace(" ","_").replace("&","and") + ".csv"
print outputfile	print outputfile
s = b.sheet_by_name(sheet)	s = b.sheet_by_name(sheet)
rbc = open(outputfile,'w')	rbc = open(outputfile,'w')
bcw = unicodecsv.writer(rbc,unicodecsv.excel)	bcw = unicodecsv.writer(rbc,unicodecsv.excel)
rowNum = 0	rowNum = 0
headerRow = []	headerRow = []
for row in range(s.nrows):	for row in range(s.nrows):
this_row = []	this_row = []
for col in range(s.ncols):	for col in range(s.ncols):
this_row.append(cleanseValue(headerRow[col] if col < len(headerRow) else None , s.cell_value(row,col)))	this_row.append(cleanseValue(headerRow[col] if col < len(headerRow) else None , s.cell_value(row,col)))
bcw.writerow(this_row)	bcw.writerow(this_row)
if rowNum == 0:	if rowNum == 0:
headerRow = this_row	headerRow = this_row
rowNum = rowNum + 1	rowNum = rowNum + 1