fix lantern: remove duplicates upon adding new link

This commit is contained in:
root 2025-05-09 15:56:04 +02:00
parent a418270b4a
commit a39cf7fdb0

View file

@ -229,8 +229,28 @@ Maintenance:
newrow=[instance,category,name,url,sensi,desc,'YES','100']
print_colors(f"[+] NEWROW= {newrow}")
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# TODO check if the entry doesnt already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
# delete existing entries in verified.csv
vdf_same_url_filter = vdf["URL"] == url # check for same url
vdf_same_url_filter_count = vdf_same_url_filter.sum() # total url matches
if vdf_same_url_filter_count > 0:
print(f"Found {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
for index, row in vdf[vdf_same_url_filter].iterrows():
print_colors(f"[+] ROW[{index}]= {list(row)}")
vdf = vdf[~vdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
print(f"Deleted {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
if desc == '': # if the description is empty = it means that it goes in unverified.csv, so save modified verified.csv file now
vdf.to_csv(verifiedcsvfile, index=False)
# delete existing entries in unverified.csv
uvdf_same_url_filter = uvdf["URL"] == url # check for same url
uvdf_same_url_filter_count = uvdf_same_url_filter.sum() # total url matches
if uvdf_same_url_filter_count > 0:
print(f"Found {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
for index, row in uvdf[uvdf_same_url_filter].iterrows():
print_colors(f"[+] ROW[{index}]= {list(row)}")
uvdf = uvdf[~uvdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
print(f"Deleted {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
if desc != '': # if the description isnt empty = it means that it goes in verified.csv, so save modified unverified.csv file now
uvdf.to_csv(unverifiedcsvfile, index=False)
if desc == '': # if the description is empty = it means that it goes in unverified.csv
print("Adding new row in unverified.csv since description is empty")
uvdf.loc[-1] = newrow # adding a row
@ -674,7 +694,6 @@ Maintenance:
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
# TODO DELETE ALL DUPLICATES OF UVDF !
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
uvdf.to_csv(unverifiedcsvfile, index=False)
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
newrow=row
@ -710,13 +729,6 @@ Maintenance:
csvdf.drop(i, inplace= True)
csvdf.to_csv(csvfilepath, index=False)
rows2delete= [] # it is an empty list at first
# TODO DELETE ALL DUPLICATES OF UVDF !
uvdf = uvdf.sort_index() # sorting by index
uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
uvdf.to_csv(unverifiedcsvfile, index=False)
break
@ -1446,3 +1458,4 @@ Maintenance:
if __name__ == '__main__':
main()