Wednesday, January 31, 2018

tiles logic and python code

import pandas as pd
import numpy as np

def tiles(no_tiles=8, data=None, sum_cols=None, group_by_col=None, verbose=True, maxrecord=30):

    if group_by_col==None:
        dma_sales = pd.DataFrame({'sales': [data[sum_cols].sum()]})
    else:
        dma_sales = pd.DataFrame({'sales': data.groupby(group_by_col)[sum_cols].sum()})
    # dma_sales_ind = dma_sales.reset_index(inplace=False).copy()

    df = pd.DataFrame(data=None, columns=data.columns)
    df['zip_score'] = np.nan
    df['zip_intra_rank'] = np.nan

    #formatted printing....

    if group_by_col==None:
        # setting the first line of summary print...
        str_len = 0 + 49 - len(' Tiles Creation Summary ')
        star_str = '='*int(str_len/2)
        str_to_print = ''.join((star_str,' Tiles Creation Summary ',star_str))
        if verbose: print('')
        if verbose: print(str_to_print)
        sl_no = 0
    else:
        # setting the first line of summary print...
        str_gap = max(len(str(group_by_col))+12, max([len(str(c)) for c in dma_sales.index.tolist()])+2)
        str_len = str_gap + 49 - len(' Tiles Creation Summary ')
        star_str = '='*int(str_len/2)
        str_to_print = ''.join((star_str,' Tiles Creation Summary ',star_str))
        if verbose: print('')
        if verbose: print(str_to_print)
        sl_no = 0

    for dma in dma_sales.index:
        sales = dma_sales.loc[dma, 'sales'].sum()
        if group_by_col==None: 
            df_dma_tmp = data.reset_index(drop=True).copy()
        else: 
            cond_dic = {}
            for i, col in enumerate(group_by_col):
                cond_dic[i] = data[col].unique().tolist()

            condition_dic = {}
            for i in range(len(group_by_col)):
                str_i = 'abc'+str(i)
                if len(group_by_col)>1: condition_dic[str_i] = data[group_by_col[i]]==dma[i]
                if len(group_by_col)==1: condition_dic[str_i] = data[group_by_col[i]]==dma

                c_list = ' & '.join(["condition_dic['"+x+"']" for x in condition_dic.keys()])

            df_dma_tmp = data[eval(c_list)].reset_index(drop=True).copy()

        total_row = len(df_dma_tmp)-1
        row = 0

        zip_sales = 0

        for zip_score in range(1, no_tiles + 1):
            left_over_sales = sales - zip_sales
            bucket_sales = left_over_sales/(no_tiles - (zip_score - 1))
            row_sales = df_dma_tmp.loc[row, sum_cols].sum().copy()
            zip_intra_rank = 0

            while ((row_sales <= bucket_sales) & (row <= total_row)) | ((zip_score == no_tiles) & (row <= total_row)):
                if row < total_row:
                    row_sales += df_dma_tmp.loc[row+1, sum_cols].sum().copy()
                    zip_sales += df_dma_tmp.loc[row+1, sum_cols].sum().copy()

                zip_intra_rank += 1

                df_tmp = df_dma_tmp[df_dma_tmp.index == row].copy()
                df_tmp['zip_score'] = zip_score
                df_tmp['zip_intra_rank'] = zip_intra_rank
                df_tmp = pd.DataFrame(df_tmp)

                df_tmp.reset_index(drop=True, inplace=True)
                df = df.append(df_tmp, ignore_index=True)

                row += 1

            #formatted printing....
            if verbose: # variable add print 
                if group_by_col==None:
                    if sl_no==0:
                        print('{:<6 span="">.format('score', 'left over value', 'bucket sales', width=0))
                        print('-'*int(0+48))

                    sl_no += 1
                    if sl_no <= int(maxrecord/1):
                        print('{:<6 span="">.format(zip_score, round(left_over_sales,2), round(row_sales,2), width=0))
                    elif (sl_no > int(maxrecord/1)) and (sl_no <= int(maxrecord/1)+3):
                        print('{:<6 span="">.format('...', '...', '...', width=0))
                    elif (sl_no == int(maxrecord/1)+4):
                        print('printing has been stopped... max reached.')
                else: 
                    if sl_no==0:
                        print('variable=={:<{width}} {:<6 span="">.format(str(group_by_col), 'score', 'left over value', 'bucket sales', width=str_gap-10))
                        print('-'*int(str_gap+48))

                    sl_no += 1
                    if sl_no <= int(maxrecord/1):
                        print('{:<{width}} {:<6 span="">.format(str(dma), zip_score, round(left_over_sales,2), round(row_sales,2), width=str_gap))
                    elif (sl_no > int(maxrecord/1)) and (sl_no <= int(maxrecord/1)+3):
                        print('{:<{width}} {:<6 span="">.format('...', '...', '...', '...', width=str_gap))
                    elif (sl_no == int(maxrecord/1)+4):
                        print('printing has been stopped... max iteration reached.')

    if group_by_col==None:
        # setting the last line of summary print...
        str_len = 0 + 49 - len(' Tiles Creation Summary End ')
        star_str = '='*int(str_len/2)
        str_to_print = ''.join((star_str,' Tiles Creation Summary End ',star_str))
    else:
        # setting the last line of summary print...
        str_len = str_gap + 49 - len(' Tiles Creation Summary End ')
        star_str = '='*int(str_len/2)
        str_to_print = ''.join((star_str,' Tiles Creation Summary End ',star_str))

    if verbose: print(str_to_print)
    return df

if __name__=='__main__':
    # if data in in excel file...
    from pandas import ExcelWriter

    data_path = r'F:\Python_Modules\TileData.xlsx'
    df_tile = pd.read_excel(open(data_path,'rb'), sheetname='Data')
    df=tiles(no_tiles=8, data=df_tile, sum_cols='Total_ZIP_Sales', group_by_col=['STORE_DMA'], verbose=True, maxrecord=60)

    # WRITE IN DATA FRAME TO 'ab analysis' SHEET...............................................................................
    excel_file_path = r'F:\Python_Modules\TileData_Output.xlsx'
    #..........................................................................................................................
    writer = ExcelWriter(excel_file_path)
    df.to_excel(writer, sheet_name='tiles', startrow=1, header=False, index=False)
    workbook  = writer.book
    worksheet = writer.sheets['tiles']

    # Add a header format.
    header_format = workbook.add_format({'bold': True,'text_wrap': False,'valign': 'top','fg_color': '#D3D3D3','border': 0})
    # Write the column headers with the defined format.
    for col_num, value in enumerate(df.columns.values):
      worksheet.write(0, col_num + 0, value, header_format)
    writer.save()
    #..........................................................................................................................

No comments:

Post a Comment