import xlrd
import re
import os.path
import SurveyObject
from collections import OrderedDict
def run():
# Open the workbook
xl_workbook = xlrd.open_workbook("quota.xls")
xl_sheet,sheetName = getSheet(xl_workbook)
filename = raw_input("What filename should I output to? Default = transform.py: ")
if not filename:
filename = 'transform.py'
if os.path.isfile(filename):
yes = raw_input('This file already exists do you want to overwrite this file?')
if yes != 'y':
return
tables = separateTables(xl_sheet)
newTables,tablenames = getDefines(tables)
alllists = makeDicts(newTables,xl_workbook)
alllists = convertConds(alllists)
f = open(filename, 'w')
output = '''
def setMark(marker):
if not hasMarker(marker):
setMarker(marker)
'''
for lists,tablename in zip(alllists,tablenames):
output = output +writeCode(lists,sheetName,tablename)+'\n'
f.write(output)
#replaces .r1 with == 1
#looks at survey to set cond correctly
def convertConds(alllists):
surveyObject = SurveyObject.surveyObject()
for lists in alllists:
for list in lists:
for item in list:
subitems = list[item].strip().split(' ')
for subitem in subitems:
if '.' not in subitem:
continue
subitem = subitem.strip(' ()')
qlabel = subitem.split('.')[0].strip()
rlabel = subitem
#print qlabel
if qlabel in surveyObject.allQuestions:
if surveyObject.allQuestions[qlabel].checkbox:
list[item] = re.sub(qlabel+r'\.([a-zA-Z_]\w*)($| |\))', qlabel+r'\1 == 1\2',list[item])
elif surveyObject.allQuestions[qlabel].radio:
#print rlabel
if rlabel in surveyObject.allitemsdict:
element = surveyObject.allitemsdict[rlabel]
if element.value:
list[item] = re.sub(rlabel+r'($| |\))', qlabel+r' == '+element.value+r'\1',list[item])
else:
index = element.parent.objects[element.type].index(element) + 1
list[item] = re.sub(rlabel+r'($| |\))', qlabel+r' == '+str(index)+r'\1',list[item])
#list[item] = list[item].replace(rlabel,newlabel)
else:
newlabel = re.sub(r'\.[a-zA-Z_]+(\d*)($| |\))', r' == \1\2',rlabel)
list[item] = list[item].replace(rlabel,newlabel)
else:
pass
#just let it be
#newlabel = re.sub(r'\.[a-zA-Z_]+(\d*)($| |\))', r' == \1\2',rlabel)
#list[item] = list[item].replace(rlabel,newlabel)
return alllists
#Convert excel tables to a list of lists [[table1],[table2],[table3]]
def separateTables(xl_sheet):
tables = []
newTable = []
# Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
for rx in range(xl_sheet.nrows):
#print xl_sheet.row(rx)
allempty = True
for ry,cell in enumerate(xl_sheet.row(rx)):
cell_type = xl_sheet.cell_type(rx, ry)
cell_value = xl_sheet.cell_value(rx, ry)
#print ' ', cell_type, ':', cell_value
if cell_type != 0:
allempty = False
#append row
newTable.append(getValue(xl_sheet,rx))
break
if allempty:
tables.append(newTable)
newTable = []
tables.append(newTable)
return tables
# ask what sheet to transform
def getSheet(xl_workbook):
sheet_names = xl_workbook.sheet_names()
for index,sheet in enumerate(sheet_names):
print index,' Sheet', sheet
sheetIndex = raw_input("What is the index of the sheet you want to transform? ")
if not sheetIndex.isdigit() or not (int(sheetIndex) in range(0,len(sheet_names))):
print 'Bad Index'
return
sheetIndex = int(sheetIndex)
xl_sheet = xl_workbook.sheet_by_name(sheet_names[sheetIndex])
return xl_sheet,sheet_names[sheetIndex]
#makes a dictionary of defines and conds. dict['Male'] = ['Q2.r1']
def makeDicts(newTables,xl_workbook):
alllists = []
for table in newTables:
lists = []
for names in table:
dict1 = OrderedDict()
for name in names:
dict1[str(name)] = str(getCond(xl_workbook,name))
lists.append(dict1)
alllists.append(lists)
return alllists
def writeCode(lists,sheetname,tablename):
output = '\n#='+tablename
#converting the dict object to a usable string
for index,d in enumerate(lists):
output = output + "\nd" + str(index) + " = {"
for index2,item in enumerate(d):
if index2!=0:
output = output + ', '
output = output + "'"+item+"': " + d[item] #.replace("',",",").replace(": '",": ").replace("'}",'}')
output = output + "}"
#Begin setup of loop code
output += "\n\nif hasMarker('qualified'):\n"
spaces = 4
marker = 'removeMarker("/'+sheetname
for index,d in enumerate(lists):
i = str(index)
marker = marker+'/"+item'+i+'+"'
output = output + ' '*spaces+'for item' + i + ' in d'+ i +':\n'
spaces = spaces+4
output = output + ' ' *spaces + marker.rstrip('+"')+')\n'
spaces = 4
marker = 'setMark("/'+sheetname
for index,d in enumerate(lists):
i = str(index)
marker = marker+'/"+item'+i+'+"'
output = output + ' '*spaces+'for item' + i + ' in d'+ i +':\n'
spaces = spaces+4
output = output + ' '*spaces+'if d'+i+'[item'+i+']:\n'
spaces = spaces+4
#add set marker to the inside of the loop
output = output + ' ' *spaces + marker.rstrip('+"')+')\n'
return output
#This changes the tables from their current formatting to only give me the defines sorted by each columnm
# each column is a list [[male,female],[age1,age2,age3]]
def getDefines(tables):
newtables = []
tablenames = []
for table in tables:
rows = []
tablenames.append(table[0][0].split('=')[1])
topcell = table[0][0].split('=')[0]
topcell = topcell.replace('#','')
topcells = topcell.split(' ')
topcondition = None
for cell in topcells:
if cell and not cell.isdigit() and 'cells:' not in cell:
topcondition = cell
break
t = []
#if we have #US=US we need to add US as part of the table. Order Matters
if topcondition:
newrow = set()
newrow.add(topcondition)
t.append(newrow)
isgrid = False
for x in table[0]:
#if it's a grid the top row will be missing a #
if x and '#' not in x:
isgrid=True
break
newtable = []
if isgrid:
#sets up the grid to be flat like we expect
newtable = getDefines2(table)
else:
#transpose table and remove top row
newtable = zip(*table[1:])
#remove empty cols
newtable = removeEmpty(newtable)
#Removes empty columns
#Creates a set of defines
#Removes the limits which are at the end of the row
for row in newtable[:-1]:
newrow = []
for col in row:
if col and col not in newrow:
newrow.append(col)
#add only if it has something
if len(newrow):
t.append(newrow)
newtables.append(t)
return (newtables,tablenames)
def getDefines2(table):
#find end so we can remove all the limits
end = 1
for col in table[0][1:]:
if col =='#':
end = end + 1
else:
break
#remove top column with defines to put at very end
lastcolumn = table[0][end:]
#create a new table without limits
table = table[1:]
newtable = []
for row in table:
newtable.append(row[:end])
#transpose table and append top row to end
newtable = zip(*newtable)
newtable.append(tuple(lastcolumn))
newtable.append(('inf','inf'))
#remove empty cols
newtable = removeEmpty(newtable)
return newtable
#Returns an array of the rows values. i.e. ['male','female']
def getValue(xl_sheet,rx):
newrow = []
row = xl_sheet.row(rx)
for ry,cell in enumerate(row):
#cell_type = xl_sheet.cell_type(rx, ry)
cell_value = xl_sheet.cell_value(rx, ry)
newrow.append(cell_value)
return newrow
#remove empty cols
def removeEmpty(table):
table2 = []
for row in table:
good = False
for col in row:
if col:
good = True
break
if good:
table2.append(row)
return table2
#gets the cond from the defines sheet
def getCond(xl_workbook,name):
xl_sheet = xl_workbook.sheet_by_index(0)
for rx in range(xl_sheet.nrows):
cell_value = xl_sheet.cell_value(rx, 0)
if cell_value == name:
return xl_sheet.cell_value(rx, 1)
run()
#xl_sheet = xl_workbook.sheet_by_index(0)
#print ('Sheet name: %s' % xl_sheet.name)
# Pull the first row by index
# (rows/columns are also zero-indexed)
#
#row = xl_sheet.row(0) # 1st row
# Print 1st row values and types
#
#from xlrd.sheet import ctype_text
#print('(Column #) type:value')
#for idx, cell_obj in enumerate(row):
# cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
# print('(%s) %s %s' % (idx, cell_type_str, cell_obj.value))
# Print all values, iterating through rows and columns
#
#num_cols = xl_sheet.ncols # Number of columns
#for row_idx in range(0, xl_sheet.nrows): # Iterate through rows
# print ('-'*40)
# print ('Row: %s' % row_idx) # Print row number
# for col_idx in range(0, num_cols): # Iterate through columns
# cell_obj = xl_sheet.cell(row_idx, col_idx) # Get cell object by row, col
# print ('Column: [%s] cell_obj: [%s]' % (col_idx, cell_obj))