gitphp 0.2.9.1 :: dcaas.git/blob

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57	"""DCaaS Services XLS to CSV converter Converts the DCaaS XLS file to CSV while removing line breaks and other special characters Alex Sadleir <maxious@lambdacomplex.org> """ from xlrd import * import unicodecsv import glob import codecs def cleanseValue(column,value): if isinstance(value, basestring): value = "".join([x if ord(x) < 128 else '' for x in value.replace(u"\u2022", u"\n•")]) value = value.replace('<','>').replace('>','<').replace('\n','<br/>').replace('\r','<br/>').strip() else: value = str(value).strip() if column == 'ACNABN' and value != None: value = value.replace(' ','').replace(".0","").replace('"','').replace(",","") value = value.replace("ABN"," ").replace("ACN"," ").replace("/"," ").replace(":"," ") value = value.replace(" "," ").strip() valueParts = value.split(' ') if len(valueParts) == 2: if len(valueParts[0]) > len(valueParts[1]): value = valueParts[0] if len(valueParts[0]) < len(valueParts[1]): value = valueParts[1] if value == "351222885465": value = "35122885465" if value == "067060937171": value = "67060937171" if value == "148179846": value = "49148179846" if value == "": value = "62081001194" #verizon return value for filename in glob.glob("*.xls"): print filename b = open_workbook(filename) for sheet in b.sheet_names()[2:]: outputfile = filename.replace(".xls","") + sheet.replace(".","-").replace(" ","_").replace("&","and") + ".csv" print outputfile s = b.sheet_by_name(sheet) rbc = open(outputfile,'w') bcw = unicodecsv.writer(rbc,unicodecsv.excel) rowNum = 0 headerRow = [] for row in range(s.nrows): this_row = [] for col in range(s.ncols): this_row.append(cleanseValue(headerRow[col] if col < len(headerRow) else None , s.cell_value(row,col))) bcw.writerow(this_row) if rowNum == 0: headerRow = this_row rowNum = rowNum + 1

"""DCaaS Services XLS to CSV converter
   Converts the DCaaS XLS file to CSV while removing line breaks and other special characters
   Alex Sadleir <maxious@lambdacomplex.org>
 
"""
 
from xlrd import *
import unicodecsv
import glob
import codecs
 
def cleanseValue(column,value):
        if isinstance(value, basestring):
                value = "".join([x if ord(x) < 128 else '' for x in value.replace(u"\u2022", u"\n&bull;")])
                value = value.replace('<','&gt;').replace('>','&lt;').replace('\n','<br/>').replace('\r','<br/>').strip()
        else:
                value = str(value).strip()
        if column == 'ACNABN' and value != None:
                value = value.replace(' ','').replace(".0","").replace('"','').replace(",","")
                value = value.replace("ABN"," ").replace("ACN"," ").replace("/"," ").replace(":"," ")
                value = value.replace("  "," ").strip()
                valueParts = value.split(' ')
                if len(valueParts) == 2:
                        if len(valueParts[0]) > len(valueParts[1]): 
                                value = valueParts[0]
                        if len(valueParts[0]) < len(valueParts[1]):
                                value = valueParts[1]
                if value == "351222885465":
                        value = "35122885465"                   
                if value == "067060937171":
                        value = "67060937171"                   
                if value == "148179846":
                        value = "49148179846"                   
                if value == "":
                        value = "62081001194" #verizon
        return value
 
for filename in glob.glob("*.xls"):
        print filename
        b = open_workbook(filename)
        for sheet in b.sheet_names()[2:]:
                outputfile = filename.replace(".xls","") + sheet.replace(".","-").replace(" ","_").replace("&","and") + ".csv"
                print outputfile
                s = b.sheet_by_name(sheet)
                rbc = open(outputfile,'w')
                bcw = unicodecsv.writer(rbc,unicodecsv.excel)
                rowNum = 0
                headerRow = []
                for row in range(s.nrows):
                    this_row = []
                    for col in range(s.ncols):
                        this_row.append(cleanseValue(headerRow[col] if col < len(headerRow) else None , s.cell_value(row,col)))
                    bcw.writerow(this_row)
                    if rowNum == 0:
                        headerRow = this_row
                    rowNum = rowNum + 1