import statsmodels.formula.api as smf def forward_selected(data, y, Intercept=True, verbose=True): """ Linear model designed by forward selection. Parameters: ----------- data : pandas DataFrame with all possible predictors and y y : string, name of y column in data Intercept : bool parameter. Whether we want Intercept in regression model or not verbose : bool parameter. if True the it will print out the steps of adding variables Returns: -------- model: an "optimal" fitted statsmodels linear model with an intercept selected by forward selection evaluated by adjusted R-squared """ step = 1 result = [] reg_df_tmp = data.copy() remaining = set(reg_df_tmp.columns) str_gap = max([len(c) for c in list(remaining)])+2 # setting the first line of summary print... str_len = str_gap + 22 - len(' Forward Selection Summary ') star_str = '='*int(str_len/2) str_toverbose_print = ''.join((star_str,' Forward Selection Summary ',star_str)) if verbose: print('') if verbose: print(str_to_print) remaining.remove(y) selected = [] current_score, best_new_score = 0.0, 0.0 sl_no = 0 while remaining and current_score == best_new_score: scores_with_candidates = [] for candidate in remaining: if Intercept==True: formula = "{} ~ {} + 1".format(y, ' + '.join(selected + [candidate])) else: formula = "{} ~ {} + 0".format(y, ' + '.join(selected + [candidate])) score = smf.ols(formula, reg_df_tmp).fit().rsquared_adj scores_with_candidates.append((score, candidate)) scores_with_candidates.sort() best_new_score, best_candidate = scores_with_candidates.pop() if current_score <= best_new_score: remaining.remove(best_candidate) selected.append(best_candidate) current_score = best_new_score if verbose: # variable add print if sl_no==0: print('Sl.No. {:<{width}} Adj. R-squared'.format('Variable Added', width=str_gap)) print('-'*int(str_gap+22)) sl_no += 1 print('{:<6 span="" width="">.format(sl_no, best_candidate, best_new_score, width=str_gap)) # setting the last line of summary print... str_len = str_gap + 22 - len(' Forward Selection End ') star_str = '='*int(str_len/2) str_to_print = ''.join((star_str,' Forward Selection End ',star_str)) if verbose: print(str_to_print) if Intercept==True: formula = "{} ~ {} + 1".format(y, ' + '.join(selected)) else: formula = "{} ~ {} + 0".format(y, ' + '.join(selected)) model = smf.ols(formula, reg_df_tmp).fit() result.append([selected, model.rsquared_adj, model]) step += 1 if verbose: print('') if verbose: print(model.summary()) del reg_df_tmp return [selected, model]6>
Wednesday, January 24, 2018
Linear Regression / forward selection
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment