Cleaned unnecesary code + changed name and url to be unique with priority to verified.csv

2025-07-01 22:16:41 +00:00 · 2025-05-25 10:56:49 +00:00 · 2025-05-25 10:56:49 +00:00 · 358c8594a6
commit 358c8594a6
parent 0adba9be1f
2 changed files with 56 additions and 38 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,5 @@ scripts/__pycache__/**
 .env
 env/
 submissions/submission.csv
+venv/

--- a/scripts/lantern.py
+++ b/scripts/lantern.py
@ -229,43 +229,60 @@ Maintenance:
                    newrow=[instance,category,name,url,sensi,desc,'YES','100']
                    print_colors(f"[+] NEWROW= {newrow}")
                    # (rest is automatic: status, score, instance is = '' because it is your own instance)                    
-                    # delete existing entries in verified.csv
-                    vdf_same_url_filter = vdf["URL"] == url # check for same url
-                    vdf_same_url_filter_count = vdf_same_url_filter.sum() # total url matches
-                    if vdf_same_url_filter_count > 0:
-                        print(f"Found {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
-                        for index, row in vdf[vdf_same_url_filter].iterrows():
-                            print_colors(f"[+] ROW[{index}]= {list(row)}")
-                        vdf = vdf[~vdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
-                        print(f"Deleted {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
-                        if desc == '': # if the description is empty = it means that it goes in unverified.csv, so save modified verified.csv file now
-                            vdf.to_csv(verifiedcsvfile, index=False)
-                    # delete existing entries in unverified.csv
-                    uvdf_same_url_filter = uvdf["URL"] == url # check for same url
-                    uvdf_same_url_filter_count = uvdf_same_url_filter.sum() # total url matches
-                    if uvdf_same_url_filter_count > 0:
-                        print(f"Found {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
-                        for index, row in uvdf[uvdf_same_url_filter].iterrows():
-                            print_colors(f"[+] ROW[{index}]= {list(row)}")
-                        uvdf = uvdf[~uvdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
-                        print(f"Deleted {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
-                        if desc != '': # if the description isnt empty = it means that it goes in verified.csv, so save modified unverified.csv file now
-                            uvdf.to_csv(unverifiedcsvfile, index=False)
-                    if desc == '': # if the description is empty = it means that it goes in unverified.csv 
-                        print("Adding new row in unverified.csv since description is empty")
-                        uvdf.loc[-1] = newrow  # adding a row
-                        uvdf.index = uvdf.index + 1  # shifting index
-                        uvdf = uvdf.sort_index()  # sorting by index
-                        uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
-                        print_colors("[+] New row added! now writing the csv file")
+
+                    ###### REMOVED THE NEXT CODE - changed this to clean both the verified and unverified csvs
+                    ###### from any duplications(will only clean the unverified if no desc)
+                    #
+                    #
+                    # vdf_same_url_filter = vdf["URL"] == url # check for same url
+                    # vdf_same_url_filter_count = vdf_same_url_filter.sum() # total url matches
+                    # if vdf_same_url_filter_count > 0:
+                    #     print(f"Found {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
+                    #     for index, row in vdf[vdf_same_url_filter].iterrows():
+                    #         print_colors(f"[+] ROW[{index}]= {list(row)}")
+                    #     vdf = vdf[~vdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
+                    #     print(f"Deleted {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
+                    #     if desc == '': # if the description is empty = it means that it goes in unverified.csv, so save modified verified.csv file now
+                    #         vdf.to_csv(verifiedcsvfile, index=False)
+                    # # delete existing entries in unverified.csv
+                    # uvdf_same_url_filter = uvdf["URL"] == url # check for same url
+                    # uvdf_same_url_filter_count = uvdf_same_url_filter.sum() # total url matches
+                    # if uvdf_same_url_filter_count > 0:
+                    #     print(f"Found {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
+                    #     for index, row in uvdf[uvdf_same_url_filter].iterrows():
+                    #         print_colors(f"[+] ROW[{index}]= {list(row)}")
+                    #     uvdf = uvdf[~uvdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
+                    #     print(f"Deleted {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
+                    #     if desc != '': # if the description isnt empty = it means that it goes in verified.csv, so save modified unverified.csv file now
+                    #         uvdf.to_csv(unverifiedcsvfile, index=False)
+
+                    ##### THIS NEW CODE WILL NOT WORK ON RETRO DATA, ONLY ON NEW ENTRIES AND THEIR DUPLICATIONS AFTER THE CHANGE, please advise if to add or its another issue
+
+                    # Checks if new input is in verified.csv and prevents if exist
+                    if (vdf['URL'] == url).any() or (vdf['Name'] == name).any():
+                        print("Name or url already exist in verified.csv")
+
+                    else:
+                        # Delete existing entries from unverified.csv
+                        uvdf = uvdf[(uvdf['URL'] != url) & (uvdf['Name'] != name)]
+                        
+                        # if the description is empty = it means that it goes in unverified.csv else it goes in verified.csv
+                        if desc == '':
+                            print("Adding new row in unverified.csv since description is empty")
+                            uvdf.loc[-1] = newrow  # adding a row
+                            ###DEAD CODE uvdf.index = uvdf.index + 1  # shifting index
+                            ###DEAD CODE uvdf = uvdf.sort_index()  # sorting by index
+                            uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
+                            print_colors("[+] New row added! now writing the csv file")
+                        else:
+                            print("Adding new row in verified.csv since descriptioln is not empty")
+                            vdf.loc[-1] = newrow  # adding a row
+                            ###DEAD CODE vdf.index = vdf.index + 1  # shifting index
+                            ###DEAD CODE vdf = vdf.sort_index()  # sorting by index
+                            vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
+                            print_colors("[+] New row added! now writing the csv file")
+
                        uvdf.to_csv(unverifiedcsvfile, index=False)
-                    else: # if the description isnt empty = it means that it goes in verified.csv 
-                        print("Adding new row in verified.csv since description is not empty")
-                        vdf.loc[-1] = newrow  # adding a row
-                        vdf.index = vdf.index + 1  # shifting index
-                        vdf = vdf.sort_index()  # sorting by index
-                        vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
-                        print_colors("[+] New row added! now writing the csv file")
                        vdf.to_csv(verifiedcsvfile, index=False)

                    choice=input("\n[+] Want to add another website ? (y/n) ")