<>1 problem
Now there is a need , In a record of wechat chat records csv In the file , Remove the record of the same message sent by the same person on the same day .
<>2 Implementation code
# -*- coding: utf-8 -*- import pandas as pd import os from tqdm import * class
DeleteRepetition: def __init__(self, finished_filedir='newdata'): '''
After initialization data cleaning csv File storage address :param finished_filedir: Save path ''' self.finished_filedir =
finished_filedir def deleteCsvRepetition(self, filename): '''
Single csv Document to ID, time , Message is primary key , Remove the same words from the same people :param filename: file name :return:null ''' #
read csv file try: csvfile = pd.read_csv(filename, encoding="utf-8") # print(csvfile)
except Exception: raise Exception('file is not found!') # The first line has no data , remove csvfile =
csvfile[1:] # print(csvfile) # Copy one copy for restore time = csvfile[' time '].copy() #
print(time) # print(time.str.split(' ', expand=True)[0]) # In days , Extraction time
csvfile[' time '] = csvfile[' time '].str.split(' ', expand=True)[0] #
print(csvfile[' time ']) # Remove the data of the same day, the same person and the same message csvfile.drop_duplicates(subset=['ID', ' time ',
' news '], keep='first', inplace=True) # Recovery time format csvfile[' time '] = time #
print(csvfile) # write in csv file csvfile.to_csv(self.finished_filedir + '/' + filename,
index=0) def deleteCsvsRepetition(self, filedir): ''' Remove all files in the root directory :param
filedir: Folder name ''' # Read all the csv file name filenames_csv = os.listdir(filedir) for
filename in tqdm(filenames_csv): self.deleteCsvRepetition(filedir + '/' +
filename) if __name__ == '__main__': test = DeleteRepetition()
test.deleteCsvsRepetition('20190311_0408')
Technology