diff --git a/scripts/lantern.py b/scripts/lantern.py index 746a1da..a4d973d 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -229,8 +229,28 @@ Maintenance: newrow=[instance,category,name,url,sensi,desc,'YES','100'] print_colors(f"[+] NEWROW= {newrow}") # (rest is automatic: status, score, instance is = '' because it is your own instance) - # TODO check if the entry doesnt already exist in verified.csv and in unverified.csv - # if it doesnt exist, add it into unverified.csv + # delete existing entries in verified.csv + vdf_same_url_filter = vdf["URL"] == url # check for same url + vdf_same_url_filter_count = vdf_same_url_filter.sum() # total url matches + if vdf_same_url_filter_count > 0: + print(f"Found {vdf_same_url_filter_count} row(s) with the same url in verified.csv") + for index, row in vdf[vdf_same_url_filter].iterrows(): + print_colors(f"[+] ROW[{index}]= {list(row)}") + vdf = vdf[~vdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter + print(f"Deleted {vdf_same_url_filter_count} row(s) with the same url in verified.csv") + if desc == '': # if the description is empty = it means that it goes in unverified.csv, so save modified verified.csv file now + vdf.to_csv(verifiedcsvfile, index=False) + # delete existing entries in unverified.csv + uvdf_same_url_filter = uvdf["URL"] == url # check for same url + uvdf_same_url_filter_count = uvdf_same_url_filter.sum() # total url matches + if uvdf_same_url_filter_count > 0: + print(f"Found {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv") + for index, row in uvdf[uvdf_same_url_filter].iterrows(): + print_colors(f"[+] ROW[{index}]= {list(row)}") + uvdf = uvdf[~uvdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter + print(f"Deleted {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv") + if desc != '': # if the description isnt empty = it means that it goes in verified.csv, so save modified unverified.csv file now + uvdf.to_csv(unverifiedcsvfile, index=False) if desc == '': # if the description is empty = it means that it goes in unverified.csv print("Adding new row in unverified.csv since description is empty") uvdf.loc[-1] = newrow # adding a row @@ -674,7 +694,6 @@ Maintenance: uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip') # TODO DELETE ALL DUPLICATES OF UVDF ! uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False) - uvdf.to_csv(unverifiedcsvfile, index=False) filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)] if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0: newrow=row @@ -710,13 +729,6 @@ Maintenance: csvdf.drop(i, inplace= True) csvdf.to_csv(csvfilepath, index=False) rows2delete= [] # it is an empty list at first - # TODO DELETE ALL DUPLICATES OF UVDF ! - uvdf = uvdf.sort_index() # sorting by index - uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories - - uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False) - uvdf.to_csv(unverifiedcsvfile, index=False) - break @@ -1446,3 +1458,4 @@ Maintenance: if __name__ == '__main__': main() +