Sunday, August 27, 2017

Python Split a large file into multiple files....

def file_split(filehandler, delimiter='\t', 
               row_limit=25, 
               output_name_template='output_%s.txt', 
               output_path='F:\\Novus\\Decision Tree\\Data\\', keep_headers=True):
    import csv
    reader = csv.reader(filehandler, delimiter=delimiter)   #reading the source file using csv module
    current_piece = 1                                       #identifier of the out data int number
    current_out_path = ''.join([output_path, output_name_template % current_piece]) #create the full path of the out data
    
    of = open(current_out_path, 'w')                            #open the source data
    current_out_writer = csv.writer(of, delimiter=delimiter)    #put the source data into a variable
    current_limit = row_limit                                   #set the max row limit
    
    if keep_headers:                            #check if the header option is True or False
        headers = next(reader)
        current_out_writer.writerow(headers)    #if header is True write the header into the out file
        
    for i, row in enumerate(reader):
        if i + 1 > current_limit:
            of.close()
            current_piece += 1
            current_limit = row_limit * current_piece
            current_out_path = ''.join([output_path, output_name_template % current_piece])
            of = open(current_out_path, 'w')
            current_out_writer = csv.writer(of, delimiter=delimiter)  #if the row limit is achieved then create a new out file
            
            if keep_headers:
                current_out_writer.writerow(headers)
        current_out_writer.writerow(row)
    of.close()
        
               
file_split(open(r'F:\Novus\Decision Tree\Data\iris_data.txt', 'r'), row_limit=100,
           output_name_template='iris_split_%s.txt',
           output_path='F:\\Novus\\Decision Tree\\Data\\')

No comments:

Post a Comment