option 7 and 8 done

2025-07-01 16:06:40 +00:00 · 2025-01-18 11:31:14 +01:00 · 2025-01-18 11:31:14 +01:00 · 95474141f4
commit 95474141f4
parent c863f71951
4 changed files with 114 additions and 45 deletions
--- a/README.md
+++ b/README.md
@ -9,11 +9,15 @@ DONE:
 -php : if valid make it filter your own verified.csv and unverified.csv files
 -py : option 9)  cleanup all duplicates in your own unverified.csv and verified.csv
 -py : option 10) perform sanity checks on all csv files (to mark them as sensitive or remove the ones that are blacklisted)
 -py : option 7) Add/Remove words in the sensitive list			
 -py : option 8) Add/Remove words in the blacklist				
 TODO:
-py : option 7) Add/Remove words in the sensitive list			(assigned to anon)
+-manual work: fit all the existing links into the current format one by one
-py : option 8) Add/Remove words in the blacklist				(assigned to anon)
+-php/css: make the search page preety
-
+-doc: redo the documentation for the project
 -doc: finish the blogpost about it
 -release it
 ``` 
--- a/scripts/darknet_exploration.py
+++ b/scripts/darknet_exploration.py
@ -103,12 +103,12 @@ Managing Webring Participants:
 6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
 Managing Wordlists:
- 7) Add/Remove words or links in the sensitive list (ex: drug)
+ 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
- 8) Add/Remove words or links in the blacklist (ex: porn)
+ 8) Add/Remove Words/URLs or links in the blacklist (ex: porn)
 Maintenance:
-9) remove the duplicate URLs for your own instance
+ 9) Remove the duplicate URLs for your own instance
-10) perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted) 
+ 10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted) 
 0) Exit
 		""")
@ -712,73 +712,133 @@ Maintenance:
 ################### MANAGING WORDLISTS #################
 #Managing Wordlists:
-# 7) Add/Remove words or links in the sensitive list (ex: drug)
+# 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
 # 8) Add/Remove words or links in the blacklist (ex: porn)
 #Maintenance:
-#9) remove the duplicate URLs for your own instance
+# 9) remove the duplicate URLs for your own instance
-#10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted) 
+# 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted) 
 #########################################################
 			case "7":
-				print("[+] Add/Remove words in the sensitive list (ex: drug)")
+				print("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)")
-				print("do you want to 1) add words  or 2) remove words ?")
+				#secsvfile=instancepath+'/sensitive.csv' #fyi
 				#sedf = pd.read_csv(secsvfile) #fyi
 				option="0"
 				done = False
 				while(done == False):
-					while option != "1" and option != "2" and option != "exit":
+					while option != "1" and option != "2" and option != "-1":
-						option=input("do you want to 1) add words  or 2) remove words ? (type exit to exit)")
+						option=input("[+] do you want to 1) add  or 2) remove Words/URLs? (type exit to exit) ")
-					#  TODO display the contents of sensitive.csv file
+					if option == "1": 
-					if option == 1: 
+						word=input("[+] which Sensitive word do you want to add? (write -1 to exit) ")
-						word=input("which word do you want to add? (write 0 to exit")
+						if word == "-1":
 						if word == "0":
 							done = True
 							#True to get out of the while loop
 						else:
-							print("checking if word is valid")
+							print("[+] checking if the Word/URL is valid: ")
-							# TODO check if word is valid in a while loop (dont check if 
+							if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
-							#  TODO if invalid! remove word at index 
+								print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
-					else:
+								print('[+] Word/URL is valid, adding the word into the sensitive wordlist ')
-						index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)")
+								# add it to the sensitive wordlist
-						if index == "exit": 
+								newrow=[word]
 								print("[+] NEWROW=",newrow)
 								# (rest is automatic: status, score, instance is = '' because it is your own instance)
 								# check if the entry doesn't already exist in verified.csv and in unverified.csv
 								# if it doesnt exist, add it into unverified.csv
 								sedf.loc[-1] = newrow  # adding a row
 								sedf.index = sedf.index + 1  # shifting index
 								sedf = sedf.sort_index()  # sorting by index
 								print("[+] New row added! now writing the csv file: ")
 								sedf.to_csv(secsvfile, index=False)
 					if option == "2": 
 						print(sedf)
 						index=""
 						while (index not in sedf.index) and index != -1:
 							index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
 						if index == -1: 
 							done = True
 							#True to get out of the while loop
 						else:
-							print("checking if index is valid")
+							if (index in sedf.index):
-							# TODO check if index is valid or not
+								#if index exists, remove it
-								# TODO  if valid! remove word at index 
+								print("[+] removing selected index: ")
-								# TODO  if invalid! just pass to ask for another word
+								sedf.drop(index, inplace= True)
 								sedf.to_csv(secsvfile, index=False)
 							else:
 								print('[-] Error, invalid index')
 					else: 
 						pass
 			case "8":
 				print("[+] Add/Remove words in the blacklist list (ex: porn)")
 				#blcsvfile=instancepath+'/sensitive.csv' #fyi
 				#bldf = pd.read_csv(secsvfile) #fyi
 				option="0"
 				done = False
 				while(done == False):
-					while option != "1" and option != "2" and option != "exit":
+					while option != "1" and option != "2" and option != "-1":
-						option=input("do you want to 1) add words  or 2) remove words ? (type exit to exit)")
+						option=input("[+] Do you want to 1) add  or 2) remove Words/URLs? (type exit to exit) ")
-					#  TODO display the contents of blacklist.csv file
+					if option == "1": 
-					if option == 1: 
+						word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ")
-						word=input("which word do you want to add? (write 0 to exit")
+						if word == "-1":
 						if word == "0":
 							done = True
 							#True to get out of the while loop
 						else:
-							print("checking if word is valid")
+							print("[+] Checking if the Word/URL is valid: ")
-							# TODO check if word is valid in a while loop (dont check if 
+							if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
-							#  TODO if invalid! remove word at index 
+								print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
-					else:
+								print('[+] Word/URL is valid, adding the word into the blacklist ')
-						index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)")
+								# add it to the sensitive wordlist
-						if index == "exit": 
+								newrow=[word]
 								print("[+] NEWROW=",newrow)
 								# (rest is automatic: status, score, instance is = '' because it is your own instance)
 								# check if the entry doesn't already exist in verified.csv and in unverified.csv
 								# if it doesnt exist, add it into unverified.csv
 								bldf.loc[-1] = newrow  # adding a row
 								bldf.index = bldf.index + 1  # shifting index
 								bldf = bldf.sort_index()  # sorting by index
 								print("[+] New row added! now writing the csv file: ")
 								bldf.to_csv(blcsvfile, index=False)
 					if option == "2": 
 						print(bldf)
 						index=""
 						while (index not in bldf.index) and index != -1:
 							index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
 						if index == -1: 
 							done = True
 							#True to get out of the while loop
 						else:
-							print("checking if index is valid")
+							if (index in bldf.index):
-							# TODO check if index is valid or not
+								#if index exists, remove it
-								# TODO  if valid! remove word at index 
+								print("[+] removing selected index: ")
-								# TODO  if invalid! just pass to ask for another word
+								bldf.drop(index, inplace= True)
 								bldf.to_csv(blcsvfile, index=False)
 							else:
 								print('[-] Error, invalid index')
 					else: 
 						pass
 			# CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file)
 			case "9":
 				print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
@ -977,6 +1037,9 @@ def IsUrlValid(url:str)->bool:
        #if not : return False
        pattern  = re.compile("^[A-Za-z0-9:/.]+$")
        url = str(url)
        if len(url) < 4:
                #print("Status: Got more than one character or nothing.")
                return False
        if url.endswith('.onion'):
                return IsOnionValid(url)
        else:
--- a/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/blacklist.csv
+++ b/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/blacklist.csv
@ -1,2 +1,5 @@
 blacklisted-words
 pr0n
 pron
 teen
 porn
--- a/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/sensitive.csv
+++ b/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/sensitive.csv
@ -2,4 +2,3 @@ sensitive-words
 Market
 market
 drug