option 7 and 8 done

This commit is contained in:
root 2025-01-18 11:31:14 +01:00
parent c863f71951
commit 95474141f4
4 changed files with 114 additions and 45 deletions

View file

@ -9,11 +9,15 @@ DONE:
-php : if valid make it filter your own verified.csv and unverified.csv files
-py : option 9) cleanup all duplicates in your own unverified.csv and verified.csv
-py : option 10) perform sanity checks on all csv files (to mark them as sensitive or remove the ones that are blacklisted)
-py : option 7) Add/Remove words in the sensitive list
-py : option 8) Add/Remove words in the blacklist
TODO:
-py : option 7) Add/Remove words in the sensitive list (assigned to anon)
-py : option 8) Add/Remove words in the blacklist (assigned to anon)
-manual work: fit all the existing links into the current format one by one
-php/css: make the search page preety
-doc: redo the documentation for the project
-doc: finish the blogpost about it
-release it
```

View file

@ -103,12 +103,12 @@ Managing Webring Participants:
6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
Managing Wordlists:
7) Add/Remove words or links in the sensitive list (ex: drug)
8) Add/Remove words or links in the blacklist (ex: porn)
7) Add/Remove Words/URLs in the sensitive list (ex: drug)
8) Add/Remove Words/URLs or links in the blacklist (ex: porn)
Maintenance:
9) remove the duplicate URLs for your own instance
10) perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
9) Remove the duplicate URLs for your own instance
10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
0) Exit
""")
@ -712,72 +712,132 @@ Maintenance:
################### MANAGING WORDLISTS #################
#Managing Wordlists:
# 7) Add/Remove words or links in the sensitive list (ex: drug)
# 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
# 8) Add/Remove words or links in the blacklist (ex: porn)
#Maintenance:
#9) remove the duplicate URLs for your own instance
#10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
# 9) remove the duplicate URLs for your own instance
# 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
#########################################################
case "7":
print("[+] Add/Remove words in the sensitive list (ex: drug)")
print("do you want to 1) add words or 2) remove words ?")
print("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)")
#secsvfile=instancepath+'/sensitive.csv' #fyi
#sedf = pd.read_csv(secsvfile) #fyi
option="0"
done = False
while(done == False):
while option != "1" and option != "2" and option != "exit":
option=input("do you want to 1) add words or 2) remove words ? (type exit to exit)")
# TODO display the contents of sensitive.csv file
if option == 1:
word=input("which word do you want to add? (write 0 to exit")
if word == "0":
while option != "1" and option != "2" and option != "-1":
option=input("[+] do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
if option == "1":
word=input("[+] which Sensitive word do you want to add? (write -1 to exit) ")
if word == "-1":
done = True
#True to get out of the while loop
else:
print("checking if word is valid")
# TODO check if word is valid in a while loop (dont check if
# TODO if invalid! remove word at index
else:
index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)")
if index == "exit":
print("[+] checking if the Word/URL is valid: ")
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
print('[+] Word/URL is valid, adding the word into the sensitive wordlist ')
# add it to the sensitive wordlist
newrow=[word]
print("[+] NEWROW=",newrow)
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
sedf.loc[-1] = newrow # adding a row
sedf.index = sedf.index + 1 # shifting index
sedf = sedf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file: ")
sedf.to_csv(secsvfile, index=False)
if option == "2":
print(sedf)
index=""
while (index not in sedf.index) and index != -1:
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
if index == -1:
done = True
#True to get out of the while loop
else:
print("checking if index is valid")
# TODO check if index is valid or not
# TODO if valid! remove word at index
# TODO if invalid! just pass to ask for another word
if (index in sedf.index):
#if index exists, remove it
print("[+] removing selected index: ")
sedf.drop(index, inplace= True)
sedf.to_csv(secsvfile, index=False)
else:
print('[-] Error, invalid index')
else:
pass
case "8":
print("[+] Add/Remove words in the blacklist list (ex: porn)")
#blcsvfile=instancepath+'/sensitive.csv' #fyi
#bldf = pd.read_csv(secsvfile) #fyi
option="0"
done = False
while(done == False):
while option != "1" and option != "2" and option != "exit":
option=input("do you want to 1) add words or 2) remove words ? (type exit to exit)")
# TODO display the contents of blacklist.csv file
if option == 1:
word=input("which word do you want to add? (write 0 to exit")
if word == "0":
while option != "1" and option != "2" and option != "-1":
option=input("[+] Do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
if option == "1":
word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ")
if word == "-1":
done = True
#True to get out of the while loop
else:
print("checking if word is valid")
# TODO check if word is valid in a while loop (dont check if
# TODO if invalid! remove word at index
else:
index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)")
if index == "exit":
print("[+] Checking if the Word/URL is valid: ")
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
print('[+] Word/URL is valid, adding the word into the blacklist ')
# add it to the sensitive wordlist
newrow=[word]
print("[+] NEWROW=",newrow)
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
bldf.loc[-1] = newrow # adding a row
bldf.index = bldf.index + 1 # shifting index
bldf = bldf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file: ")
bldf.to_csv(blcsvfile, index=False)
if option == "2":
print(bldf)
index=""
while (index not in bldf.index) and index != -1:
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
if index == -1:
done = True
#True to get out of the while loop
else:
print("checking if index is valid")
# TODO check if index is valid or not
# TODO if valid! remove word at index
# TODO if invalid! just pass to ask for another word
if (index in bldf.index):
#if index exists, remove it
print("[+] removing selected index: ")
bldf.drop(index, inplace= True)
bldf.to_csv(blcsvfile, index=False)
else:
print('[-] Error, invalid index')
else:
pass
# CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file)
case "9":
@ -977,6 +1037,9 @@ def IsUrlValid(url:str)->bool:
#if not : return False
pattern = re.compile("^[A-Za-z0-9:/.]+$")
url = str(url)
if len(url) < 4:
#print("Status: Got more than one character or nothing.")
return False
if url.endswith('.onion'):
return IsOnionValid(url)
else:

View file

@ -1,2 +1,5 @@
blacklisted-words
pr0n
pron
teen
porn

1 blacklisted-words
2 pr0n
3 pron
4 teen
5 porn

View file

@ -2,4 +2,3 @@ sensitive-words
Market
market
drug

1 sensitive-words
2 Market
3 market
4 drug