Merge pull request 'Cleaned unnecesary code + changed name and url to be unique with priority to verified.csv' (#70) from doctor_dev/darknet-lantern:main into main

Reviewed-on: http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern/pulls/70
2025-07-01 22:36:40 +00:00 · 2025-05-26 18:12:04 +02:00 · 2025-05-26 18:12:04 +02:00 · b8b4a770ce
commit b8b4a770ce
parent 0adba9be1f 4b33e51d11
2 changed files with 27 additions and 38 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,5 @@ scripts/__pycache__/**
 .env
 env/
 submissions/submission.csv
 venv/
--- a/scripts/lantern.py
+++ b/scripts/lantern.py
@ -229,43 +229,31 @@ Maintenance:
                    newrow=[instance,category,name,url,sensi,desc,'YES','100']
                    print_colors(f"[+] NEWROW= {newrow}")
                    # (rest is automatic: status, score, instance is = '' because it is your own instance)                    
-                    # delete existing entries in verified.csv
+
-                    vdf_same_url_filter = vdf["URL"] == url # check for same url
+                    ##### THIS NEW CODE WILL NOT WORK ON RETRO DATA, ONLY ON NEW ENTRIES AND THEIR DUPLICATIONS AFTER THE CHANGE, please advise if to add or its another issue
-                    vdf_same_url_filter_count = vdf_same_url_filter.sum() # total url matches
+
-                    if vdf_same_url_filter_count > 0:
+                    # Checks if new input is in verified.csv and prevents if exist
-                        print(f"Found {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
+                    if (vdf['URL'] == url).any() or (vdf['Name'] == name).any():
-                        for index, row in vdf[vdf_same_url_filter].iterrows():
+                        print_colors("Name or url already exist in verified.csv")
-                            print_colors(f"[+] ROW[{index}]= {list(row)}")
+
-                        vdf = vdf[~vdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
+                    else:
-                        print(f"Deleted {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
+                        # Delete existing entries from unverified.csv
-                        if desc == '': # if the description is empty = it means that it goes in unverified.csv, so save modified verified.csv file now
+                        uvdf = uvdf[(uvdf['URL'] != url) & (uvdf['Name'] != name)]
-                            vdf.to_csv(verifiedcsvfile, index=False)
+                        
-                    # delete existing entries in unverified.csv
+                        # if the description is empty = it means that it goes in unverified.csv else it goes in verified.csv
-                    uvdf_same_url_filter = uvdf["URL"] == url # check for same url
+                        if desc == '':
-                    uvdf_same_url_filter_count = uvdf_same_url_filter.sum() # total url matches
+                            print("Adding new row in unverified.csv since description is empty")
-                    if uvdf_same_url_filter_count > 0:
+                            uvdf.loc[-1] = newrow  # adding a row
-                        print(f"Found {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
+                            uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
-                        for index, row in uvdf[uvdf_same_url_filter].iterrows():
+                            print_colors("[+] New row added! now writing the csv file")
-                            print_colors(f"[+] ROW[{index}]= {list(row)}")
+                        else:
-                        uvdf = uvdf[~uvdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
+                            print("Adding new row in verified.csv since descriptioln is not empty")
-                        print(f"Deleted {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
+                            vdf.loc[-1] = newrow  # adding a row
-                        if desc != '': # if the description isnt empty = it means that it goes in verified.csv, so save modified unverified.csv file now
+                            vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
-                            uvdf.to_csv(unverifiedcsvfile, index=False)
+                            print_colors("[+] New row added! now writing the csv file")
-                    if desc == '': # if the description is empty = it means that it goes in unverified.csv 
+
-                        print("Adding new row in unverified.csv since description is empty")
+                        # saving both csv's data
                        uvdf.loc[-1] = newrow  # adding a row
                        uvdf.index = uvdf.index + 1  # shifting index
                        uvdf = uvdf.sort_index()  # sorting by index
                        uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
                        print_colors("[+] New row added! now writing the csv file")
                        uvdf.to_csv(unverifiedcsvfile, index=False)
                    else: # if the description isnt empty = it means that it goes in verified.csv 
                        print("Adding new row in verified.csv since description is not empty")
                        vdf.loc[-1] = newrow  # adding a row
                        vdf.index = vdf.index + 1  # shifting index
                        vdf = vdf.sort_index()  # sorting by index
                        vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
                        print_colors("[+] New row added! now writing the csv file")
                        vdf.to_csv(verifiedcsvfile, index=False)
                    choice=input("\n[+] Want to add another website ? (y/n) ")