import pandas as pd import numpy as np def tiles(no_tiles=8, data=None, sum_cols=None, group_by_col=None, verbose=True, maxrecord=30): if group_by_col==None: dma_sales = pd.DataFrame({'sales': [data[sum_cols].sum()]}) else: dma_sales = pd.DataFrame({'sales': data.groupby(group_by_col)[sum_cols].sum()}) # dma_sales_ind = dma_sales.reset_index(inplace=False).copy() df = pd.DataFrame(data=None, columns=data.columns) df['zip_score'] = np.nan df['zip_intra_rank'] = np.nan #formatted printing.... if group_by_col==None: # setting the first line of summary print... str_len = 0 + 49 - len(' Tiles Creation Summary ') star_str = '='*int(str_len/2) str_to_print = ''.join((star_str,' Tiles Creation Summary ',star_str)) if verbose: print('') if verbose: print(str_to_print) sl_no = 0 else: # setting the first line of summary print... str_gap = max(len(str(group_by_col))+12, max([len(str(c)) for c in dma_sales.index.tolist()])+2) str_len = str_gap + 49 - len(' Tiles Creation Summary ') star_str = '='*int(str_len/2) str_to_print = ''.join((star_str,' Tiles Creation Summary ',star_str)) if verbose: print('') if verbose: print(str_to_print) sl_no = 0 for dma in dma_sales.index: sales = dma_sales.loc[dma, 'sales'].sum() if group_by_col==None: df_dma_tmp = data.reset_index(drop=True).copy() else: cond_dic = {} for i, col in enumerate(group_by_col): cond_dic[i] = data[col].unique().tolist() condition_dic = {} for i in range(len(group_by_col)): str_i = 'abc'+str(i) if len(group_by_col)>1: condition_dic[str_i] = data[group_by_col[i]]==dma[i] if len(group_by_col)==1: condition_dic[str_i] = data[group_by_col[i]]==dma c_list = ' & '.join(["condition_dic['"+x+"']" for x in condition_dic.keys()]) df_dma_tmp = data[eval(c_list)].reset_index(drop=True).copy() total_row = len(df_dma_tmp)-1 row = 0 zip_sales = 0 for zip_score in range(1, no_tiles + 1): left_over_sales = sales - zip_sales bucket_sales = left_over_sales/(no_tiles - (zip_score - 1)) row_sales = df_dma_tmp.loc[row, sum_cols].sum().copy() zip_intra_rank = 0 while ((row_sales <= bucket_sales) & (row <= total_row)) | ((zip_score == no_tiles) & (row <= total_row)): if row < total_row: row_sales += df_dma_tmp.loc[row+1, sum_cols].sum().copy() zip_sales += df_dma_tmp.loc[row+1, sum_cols].sum().copy() zip_intra_rank += 1 df_tmp = df_dma_tmp[df_dma_tmp.index == row].copy() df_tmp['zip_score'] = zip_score df_tmp['zip_intra_rank'] = zip_intra_rank df_tmp = pd.DataFrame(df_tmp) df_tmp.reset_index(drop=True, inplace=True) df = df.append(df_tmp, ignore_index=True) row += 1 #formatted printing.... if verbose: # variable add print if group_by_col==None: if sl_no==0: print('{:<6 span="">.format('score', 'left over value', 'bucket sales', width=0)) print('-'*int(0+48)) sl_no += 1 if sl_no <= int(maxrecord/1): print('{:<6 span="">.format(zip_score, round(left_over_sales,2), round(row_sales,2), width=0)) elif (sl_no > int(maxrecord/1)) and (sl_no <= int(maxrecord/1)+3): print('{:<6 span="">.format('...', '...', '...', width=0)) elif (sl_no == int(maxrecord/1)+4): print('printing has been stopped... max reached.') else: if sl_no==0: print('variable=={:<{width}} {:<6 span="">.format(str(group_by_col), 'score', 'left over value', 'bucket sales', width=str_gap-10)) print('-'*int(str_gap+48)) sl_no += 1 if sl_no <= int(maxrecord/1): print('{:<{width}} {:<6 span="">.format(str(dma), zip_score, round(left_over_sales,2), round(row_sales,2), width=str_gap)) elif (sl_no > int(maxrecord/1)) and (sl_no <= int(maxrecord/1)+3): print('{:<{width}} {:<6 span="">.format('...', '...', '...', '...', width=str_gap)) elif (sl_no == int(maxrecord/1)+4): print('printing has been stopped... max iteration reached.') if group_by_col==None: # setting the last line of summary print... str_len = 0 + 49 - len(' Tiles Creation Summary End ') star_str = '='*int(str_len/2) str_to_print = ''.join((star_str,' Tiles Creation Summary End ',star_str)) else: # setting the last line of summary print... str_len = str_gap + 49 - len(' Tiles Creation Summary End ') star_str = '='*int(str_len/2) str_to_print = ''.join((star_str,' Tiles Creation Summary End ',star_str)) if verbose: print(str_to_print) return df if __name__=='__main__': # if data in in excel file... from pandas import ExcelWriter data_path = r'F:\Python_Modules\TileData.xlsx' df_tile = pd.read_excel(open(data_path,'rb'), sheetname='Data') df=tiles(no_tiles=8, data=df_tile, sum_cols='Total_ZIP_Sales', group_by_col=['STORE_DMA'], verbose=True, maxrecord=60) # WRITE IN DATA FRAME TO 'ab analysis' SHEET............................................................................... excel_file_path = r'F:\Python_Modules\TileData_Output.xlsx' #.......................................................................................................................... writer = ExcelWriter(excel_file_path) df.to_excel(writer, sheet_name='tiles', startrow=1, header=False, index=False) workbook = writer.book worksheet = writer.sheets['tiles'] # Add a header format. header_format = workbook.add_format({'bold': True,'text_wrap': False,'valign': 'top','fg_color': '#D3D3D3','border': 0}) # Write the column headers with the defined format. for col_num, value in enumerate(df.columns.values): worksheet.write(0, col_num + 0, value, header_format) writer.save() #..........................................................................................................................6>6>6>6>6>6>
Wednesday, January 31, 2018
tiles logic and python code
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment