mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-16 04:06:59 +00:00
fix option 4 to remove all duplicates and more
This commit is contained in:
parent
6bc49c6786
commit
8048a52770
1 changed files with 7 additions and 1 deletions
|
@ -623,7 +623,9 @@ Maintenance:
|
|||
rows2delete= [] # it is an empty list at first
|
||||
for i,j in csvdf.iterrows():
|
||||
row=csvdf.loc[i,:].values.tolist()
|
||||
#print_colors(f"{row}")
|
||||
# check the number of columns in said row,
|
||||
# print('rowcolnum:',len(row),' colnum:',len(csvdf.columns))
|
||||
# print_colors(f"{row}")
|
||||
|
||||
|
||||
|
||||
|
@ -669,6 +671,9 @@ Maintenance:
|
|||
filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
|
||||
#print('2)',filter_vdf)
|
||||
#print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
|
||||
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||
# TODO DELETE ALL DUPLICATES OF UVDF !
|
||||
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
|
||||
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
|
||||
newrow=row
|
||||
|
@ -676,6 +681,7 @@ Maintenance:
|
|||
uvdf.index = uvdf.index + 1 # shifting index
|
||||
uvdf = uvdf.sort_index() # sorting by index
|
||||
uvdf.to_csv(unverifiedcsvfile, index=False)
|
||||
|
||||
print("[+] NEW ROW =",newrow)
|
||||
print_colors("[+] New row added to your own unverified.csv file!")
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue