option 7 and 8 done

This commit is contained in:
root 2025-01-18 11:31:14 +01:00
parent c863f71951
commit 95474141f4
4 changed files with 114 additions and 45 deletions

View file

@ -9,11 +9,15 @@ DONE:
-php : if valid make it filter your own verified.csv and unverified.csv files -php : if valid make it filter your own verified.csv and unverified.csv files
-py : option 9) cleanup all duplicates in your own unverified.csv and verified.csv -py : option 9) cleanup all duplicates in your own unverified.csv and verified.csv
-py : option 10) perform sanity checks on all csv files (to mark them as sensitive or remove the ones that are blacklisted) -py : option 10) perform sanity checks on all csv files (to mark them as sensitive or remove the ones that are blacklisted)
-py : option 7) Add/Remove words in the sensitive list
-py : option 8) Add/Remove words in the blacklist
TODO: TODO:
-py : option 7) Add/Remove words in the sensitive list (assigned to anon) -manual work: fit all the existing links into the current format one by one
-py : option 8) Add/Remove words in the blacklist (assigned to anon) -php/css: make the search page preety
-doc: redo the documentation for the project
-doc: finish the blogpost about it
-release it
``` ```

View file

@ -103,12 +103,12 @@ Managing Webring Participants:
6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous) 6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
Managing Wordlists: Managing Wordlists:
7) Add/Remove words or links in the sensitive list (ex: drug) 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
8) Add/Remove words or links in the blacklist (ex: porn) 8) Add/Remove Words/URLs or links in the blacklist (ex: porn)
Maintenance: Maintenance:
9) remove the duplicate URLs for your own instance 9) Remove the duplicate URLs for your own instance
10) perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted) 10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
0) Exit 0) Exit
""") """)
@ -712,73 +712,133 @@ Maintenance:
################### MANAGING WORDLISTS ################# ################### MANAGING WORDLISTS #################
#Managing Wordlists: #Managing Wordlists:
# 7) Add/Remove words or links in the sensitive list (ex: drug) # 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
# 8) Add/Remove words or links in the blacklist (ex: porn) # 8) Add/Remove words or links in the blacklist (ex: porn)
#Maintenance: #Maintenance:
#9) remove the duplicate URLs for your own instance # 9) remove the duplicate URLs for your own instance
#10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted) # 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
######################################################### #########################################################
case "7": case "7":
print("[+] Add/Remove words in the sensitive list (ex: drug)") print("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)")
print("do you want to 1) add words or 2) remove words ?") #secsvfile=instancepath+'/sensitive.csv' #fyi
#sedf = pd.read_csv(secsvfile) #fyi
option="0" option="0"
done = False done = False
while(done == False): while(done == False):
while option != "1" and option != "2" and option != "exit": while option != "1" and option != "2" and option != "-1":
option=input("do you want to 1) add words or 2) remove words ? (type exit to exit)") option=input("[+] do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
# TODO display the contents of sensitive.csv file if option == "1":
if option == 1: word=input("[+] which Sensitive word do you want to add? (write -1 to exit) ")
word=input("which word do you want to add? (write 0 to exit") if word == "-1":
if word == "0":
done = True done = True
#True to get out of the while loop #True to get out of the while loop
else: else:
print("checking if word is valid") print("[+] checking if the Word/URL is valid: ")
# TODO check if word is valid in a while loop (dont check if if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
# TODO if invalid! remove word at index print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
else: print('[+] Word/URL is valid, adding the word into the sensitive wordlist ')
index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)") # add it to the sensitive wordlist
if index == "exit": newrow=[word]
print("[+] NEWROW=",newrow)
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
sedf.loc[-1] = newrow # adding a row
sedf.index = sedf.index + 1 # shifting index
sedf = sedf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file: ")
sedf.to_csv(secsvfile, index=False)
if option == "2":
print(sedf)
index=""
while (index not in sedf.index) and index != -1:
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
if index == -1:
done = True done = True
#True to get out of the while loop #True to get out of the while loop
else: else:
print("checking if index is valid") if (index in sedf.index):
# TODO check if index is valid or not #if index exists, remove it
# TODO if valid! remove word at index print("[+] removing selected index: ")
# TODO if invalid! just pass to ask for another word sedf.drop(index, inplace= True)
sedf.to_csv(secsvfile, index=False)
else:
print('[-] Error, invalid index')
else:
pass
case "8": case "8":
print("[+] Add/Remove words in the blacklist list (ex: porn)") print("[+] Add/Remove words in the blacklist list (ex: porn)")
#blcsvfile=instancepath+'/sensitive.csv' #fyi
#bldf = pd.read_csv(secsvfile) #fyi
option="0" option="0"
done = False done = False
while(done == False): while(done == False):
while option != "1" and option != "2" and option != "exit": while option != "1" and option != "2" and option != "-1":
option=input("do you want to 1) add words or 2) remove words ? (type exit to exit)") option=input("[+] Do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
# TODO display the contents of blacklist.csv file if option == "1":
if option == 1: word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ")
word=input("which word do you want to add? (write 0 to exit") if word == "-1":
if word == "0":
done = True done = True
#True to get out of the while loop #True to get out of the while loop
else: else:
print("checking if word is valid") print("[+] Checking if the Word/URL is valid: ")
# TODO check if word is valid in a while loop (dont check if if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
# TODO if invalid! remove word at index print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
else: print('[+] Word/URL is valid, adding the word into the blacklist ')
index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)") # add it to the sensitive wordlist
if index == "exit": newrow=[word]
print("[+] NEWROW=",newrow)
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
bldf.loc[-1] = newrow # adding a row
bldf.index = bldf.index + 1 # shifting index
bldf = bldf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file: ")
bldf.to_csv(blcsvfile, index=False)
if option == "2":
print(bldf)
index=""
while (index not in bldf.index) and index != -1:
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
if index == -1:
done = True done = True
#True to get out of the while loop #True to get out of the while loop
else: else:
print("checking if index is valid") if (index in bldf.index):
# TODO check if index is valid or not #if index exists, remove it
# TODO if valid! remove word at index print("[+] removing selected index: ")
# TODO if invalid! just pass to ask for another word bldf.drop(index, inplace= True)
bldf.to_csv(blcsvfile, index=False)
else:
print('[-] Error, invalid index')
else:
pass
# CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file) # CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file)
case "9": case "9":
print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
@ -977,6 +1037,9 @@ def IsUrlValid(url:str)->bool:
#if not : return False #if not : return False
pattern = re.compile("^[A-Za-z0-9:/.]+$") pattern = re.compile("^[A-Za-z0-9:/.]+$")
url = str(url) url = str(url)
if len(url) < 4:
#print("Status: Got more than one character or nothing.")
return False
if url.endswith('.onion'): if url.endswith('.onion'):
return IsOnionValid(url) return IsOnionValid(url)
else: else:

View file

@ -1,2 +1,5 @@
blacklisted-words blacklisted-words
pr0n
pron
teen
porn porn

1 blacklisted-words
2 pr0n
3 pron
4 teen
5 porn

View file

@ -2,4 +2,3 @@ sensitive-words
Market Market
market market
drug drug

1 sensitive-words
2 Market
3 market
4 drug