mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 12:56:40 +00:00
fix option 4 to remove all duplicates and more
This commit is contained in:
parent
6bc49c6786
commit
8048a52770
1 changed files with 7 additions and 1 deletions
|
@ -623,7 +623,9 @@ Maintenance:
|
||||||
rows2delete= [] # it is an empty list at first
|
rows2delete= [] # it is an empty list at first
|
||||||
for i,j in csvdf.iterrows():
|
for i,j in csvdf.iterrows():
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
row=csvdf.loc[i,:].values.tolist()
|
||||||
#print_colors(f"{row}")
|
# check the number of columns in said row,
|
||||||
|
# print('rowcolnum:',len(row),' colnum:',len(csvdf.columns))
|
||||||
|
# print_colors(f"{row}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -669,6 +671,9 @@ Maintenance:
|
||||||
filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
|
filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
|
||||||
#print('2)',filter_vdf)
|
#print('2)',filter_vdf)
|
||||||
#print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
|
#print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
|
||||||
|
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||||
|
# TODO DELETE ALL DUPLICATES OF UVDF !
|
||||||
|
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||||
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
|
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
|
||||||
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
|
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
|
||||||
newrow=row
|
newrow=row
|
||||||
|
@ -676,6 +681,7 @@ Maintenance:
|
||||||
uvdf.index = uvdf.index + 1 # shifting index
|
uvdf.index = uvdf.index + 1 # shifting index
|
||||||
uvdf = uvdf.sort_index() # sorting by index
|
uvdf = uvdf.sort_index() # sorting by index
|
||||||
uvdf.to_csv(unverifiedcsvfile, index=False)
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
||||||
|
|
||||||
print("[+] NEW ROW =",newrow)
|
print("[+] NEW ROW =",newrow)
|
||||||
print_colors("[+] New row added to your own unverified.csv file!")
|
print_colors("[+] New row added to your own unverified.csv file!")
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue