import pandas as pd
import numpy as np
def tiles(no_tiles=8, data=None, sum_cols=None, group_by_col=None, verbose=True, maxrecord=30):
if group_by_col==None:
dma_sales = pd.DataFrame({'sales': [data[sum_cols].sum()]})
else:
dma_sales = pd.DataFrame({'sales': data.groupby(group_by_col)[sum_cols].sum()})
# dma_sales_ind = dma_sales.reset_index(inplace=False).copy()
df = pd.DataFrame(data=None, columns=data.columns)
df['zip_score'] = np.nan
df['zip_intra_rank'] = np.nan
#formatted printing....
if group_by_col==None:
# setting the first line of summary print...
str_len = 0 + 49 - len(' Tiles Creation Summary ')
star_str = '='*int(str_len/2)
str_to_print = ''.join((star_str,' Tiles Creation Summary ',star_str))
if verbose: print('')
if verbose: print(str_to_print)
sl_no = 0
else:
# setting the first line of summary print...
str_gap = max(len(str(group_by_col))+12, max([len(str(c)) for c in dma_sales.index.tolist()])+2)
str_len = str_gap + 49 - len(' Tiles Creation Summary ')
star_str = '='*int(str_len/2)
str_to_print = ''.join((star_str,' Tiles Creation Summary ',star_str))
if verbose: print('')
if verbose: print(str_to_print)
sl_no = 0
for dma in dma_sales.index:
sales = dma_sales.loc[dma, 'sales'].sum()
if group_by_col==None:
df_dma_tmp = data.reset_index(drop=True).copy()
else:
cond_dic = {}
for i, col in enumerate(group_by_col):
cond_dic[i] = data[col].unique().tolist()
condition_dic = {}
for i in range(len(group_by_col)):
str_i = 'abc'+str(i)
if len(group_by_col)>1: condition_dic[str_i] = data[group_by_col[i]]==dma[i]
if len(group_by_col)==1: condition_dic[str_i] = data[group_by_col[i]]==dma
c_list = ' & '.join(["condition_dic['"+x+"']" for x in condition_dic.keys()])
df_dma_tmp = data[eval(c_list)].reset_index(drop=True).copy()
total_row = len(df_dma_tmp)-1
row = 0
zip_sales = 0
for zip_score in range(1, no_tiles + 1):
left_over_sales = sales - zip_sales
bucket_sales = left_over_sales/(no_tiles - (zip_score - 1))
row_sales = df_dma_tmp.loc[row, sum_cols].sum().copy()
zip_intra_rank = 0
while ((row_sales <= bucket_sales) & (row <= total_row)) | ((zip_score == no_tiles) & (row <= total_row)):
if row < total_row:
row_sales += df_dma_tmp.loc[row+1, sum_cols].sum().copy()
zip_sales += df_dma_tmp.loc[row+1, sum_cols].sum().copy()
zip_intra_rank += 1
df_tmp = df_dma_tmp[df_dma_tmp.index == row].copy()
df_tmp['zip_score'] = zip_score
df_tmp['zip_intra_rank'] = zip_intra_rank
df_tmp = pd.DataFrame(df_tmp)
df_tmp.reset_index(drop=True, inplace=True)
df = df.append(df_tmp, ignore_index=True)
row += 1
#formatted printing....
if verbose: # variable add print
if group_by_col==None:
if sl_no==0:
print('{:<6 span="">.format('score', 'left over value', 'bucket sales', width=0))
print('-'*int(0+48))
sl_no += 1
if sl_no <= int(maxrecord/1):
print('{:<6 span="">.format(zip_score, round(left_over_sales,2), round(row_sales,2), width=0))
elif (sl_no > int(maxrecord/1)) and (sl_no <= int(maxrecord/1)+3):
print('{:<6 span="">.format('...', '...', '...', width=0))
elif (sl_no == int(maxrecord/1)+4):
print('printing has been stopped... max reached.')
else:
if sl_no==0:
print('variable=={:<{width}} {:<6 span="">.format(str(group_by_col), 'score', 'left over value', 'bucket sales', width=str_gap-10))
print('-'*int(str_gap+48))
sl_no += 1
if sl_no <= int(maxrecord/1):
print('{:<{width}} {:<6 span="">.format(str(dma), zip_score, round(left_over_sales,2), round(row_sales,2), width=str_gap))
elif (sl_no > int(maxrecord/1)) and (sl_no <= int(maxrecord/1)+3):
print('{:<{width}} {:<6 span="">.format('...', '...', '...', '...', width=str_gap))
elif (sl_no == int(maxrecord/1)+4):
print('printing has been stopped... max iteration reached.')
if group_by_col==None:
# setting the last line of summary print...
str_len = 0 + 49 - len(' Tiles Creation Summary End ')
star_str = '='*int(str_len/2)
str_to_print = ''.join((star_str,' Tiles Creation Summary End ',star_str))
else:
# setting the last line of summary print...
str_len = str_gap + 49 - len(' Tiles Creation Summary End ')
star_str = '='*int(str_len/2)
str_to_print = ''.join((star_str,' Tiles Creation Summary End ',star_str))
if verbose: print(str_to_print)
return df
if __name__=='__main__':
# if data in in excel file...
from pandas import ExcelWriter
data_path = r'F:\Python_Modules\TileData.xlsx'
df_tile = pd.read_excel(open(data_path,'rb'), sheetname='Data')
df=tiles(no_tiles=8, data=df_tile, sum_cols='Total_ZIP_Sales', group_by_col=['STORE_DMA'], verbose=True, maxrecord=60)
# WRITE IN DATA FRAME TO 'ab analysis' SHEET...............................................................................
excel_file_path = r'F:\Python_Modules\TileData_Output.xlsx'
#..........................................................................................................................
writer = ExcelWriter(excel_file_path)
df.to_excel(writer, sheet_name='tiles', startrow=1, header=False, index=False)
workbook = writer.book
worksheet = writer.sheets['tiles']
# Add a header format.
header_format = workbook.add_format({'bold': True,'text_wrap': False,'valign': 'top','fg_color': '#D3D3D3','border': 0})
# Write the column headers with the defined format.
for col_num, value in enumerate(df.columns.values):
worksheet.write(0, col_num + 0, value, header_format)
writer.save()
#..........................................................................................................................6>6>6>6>6>6>