fix option 4 to remove all duplicates and more

This commit is contained in:
root 2025-05-04 23:46:22 +02:00
parent 6bc49c6786
commit 8048a52770

View file

@ -623,7 +623,9 @@ Maintenance:
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
row=csvdf.loc[i,:].values.tolist()
#print_colors(f"{row}")
# check the number of columns in said row,
# print('rowcolnum:',len(row),' colnum:',len(csvdf.columns))
# print_colors(f"{row}")
@ -669,6 +671,9 @@ Maintenance:
filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
#print('2)',filter_vdf)
#print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
# TODO DELETE ALL DUPLICATES OF UVDF !
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
newrow=row
@ -676,6 +681,7 @@ Maintenance:
uvdf.index = uvdf.index + 1 # shifting index
uvdf = uvdf.sort_index() # sorting by index
uvdf.to_csv(unverifiedcsvfile, index=False)
print("[+] NEW ROW =",newrow)
print_colors("[+] New row added to your own unverified.csv file!")
else: