From 95474141f4fa6375f05ae2c295a1ed9e748b82cf Mon Sep 17 00:00:00 2001 From: root Date: Sat, 18 Jan 2025 11:31:14 +0100 Subject: [PATCH] option 7 and 8 done --- README.md | 10 +- scripts/darknet_exploration.py | 145 +++++++++++++----- .../blacklist.csv | 3 + .../sensitive.csv | 1 - 4 files changed, 114 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index a3daa67..cf3ff82 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,15 @@ DONE: -php : if valid make it filter your own verified.csv and unverified.csv files -py : option 9) cleanup all duplicates in your own unverified.csv and verified.csv -py : option 10) perform sanity checks on all csv files (to mark them as sensitive or remove the ones that are blacklisted) +-py : option 7) Add/Remove words in the sensitive list +-py : option 8) Add/Remove words in the blacklist TODO: --py : option 7) Add/Remove words in the sensitive list (assigned to anon) --py : option 8) Add/Remove words in the blacklist (assigned to anon) - +-manual work: fit all the existing links into the current format one by one +-php/css: make the search page preety +-doc: redo the documentation for the project +-doc: finish the blogpost about it +-release it ``` diff --git a/scripts/darknet_exploration.py b/scripts/darknet_exploration.py index b2dfa74..822eeb6 100644 --- a/scripts/darknet_exploration.py +++ b/scripts/darknet_exploration.py @@ -103,12 +103,12 @@ Managing Webring Participants: 6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous) Managing Wordlists: - 7) Add/Remove words or links in the sensitive list (ex: drug) - 8) Add/Remove words or links in the blacklist (ex: porn) + 7) Add/Remove Words/URLs in the sensitive list (ex: drug) + 8) Add/Remove Words/URLs or links in the blacklist (ex: porn) Maintenance: -9) remove the duplicate URLs for your own instance -10) perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted) + 9) Remove the duplicate URLs for your own instance + 10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted) 0) Exit """) @@ -712,73 +712,133 @@ Maintenance: ################### MANAGING WORDLISTS ################# #Managing Wordlists: -# 7) Add/Remove words or links in the sensitive list (ex: drug) +# 7) Add/Remove Words/URLs in the sensitive list (ex: drug) # 8) Add/Remove words or links in the blacklist (ex: porn) #Maintenance: -#9) remove the duplicate URLs for your own instance -#10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted) +# 9) remove the duplicate URLs for your own instance +# 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted) ######################################################### case "7": - print("[+] Add/Remove words in the sensitive list (ex: drug)") - print("do you want to 1) add words or 2) remove words ?") + print("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)") + #secsvfile=instancepath+'/sensitive.csv' #fyi + #sedf = pd.read_csv(secsvfile) #fyi option="0" done = False while(done == False): - while option != "1" and option != "2" and option != "exit": - option=input("do you want to 1) add words or 2) remove words ? (type exit to exit)") - # TODO display the contents of sensitive.csv file - if option == 1: - word=input("which word do you want to add? (write 0 to exit") - if word == "0": + while option != "1" and option != "2" and option != "-1": + option=input("[+] do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ") + if option == "1": + word=input("[+] which Sensitive word do you want to add? (write -1 to exit) ") + if word == "-1": done = True #True to get out of the while loop else: - print("checking if word is valid") - # TODO check if word is valid in a while loop (dont check if - # TODO if invalid! remove word at index - else: - index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)") - if index == "exit": + print("[+] checking if the Word/URL is valid: ") + if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word): + print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word)) + print('[+] Word/URL is valid, adding the word into the sensitive wordlist ') + # add it to the sensitive wordlist + newrow=[word] + print("[+] NEWROW=",newrow) + # (rest is automatic: status, score, instance is = '' because it is your own instance) + # check if the entry doesn't already exist in verified.csv and in unverified.csv + # if it doesnt exist, add it into unverified.csv + sedf.loc[-1] = newrow # adding a row + sedf.index = sedf.index + 1 # shifting index + sedf = sedf.sort_index() # sorting by index + print("[+] New row added! now writing the csv file: ") + sedf.to_csv(secsvfile, index=False) + + + if option == "2": + print(sedf) + index="" + while (index not in sedf.index) and index != -1: + index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) ")) + if index == -1: done = True #True to get out of the while loop else: - print("checking if index is valid") - # TODO check if index is valid or not - # TODO if valid! remove word at index - # TODO if invalid! just pass to ask for another word + if (index in sedf.index): + #if index exists, remove it + print("[+] removing selected index: ") + sedf.drop(index, inplace= True) + sedf.to_csv(secsvfile, index=False) + + else: + print('[-] Error, invalid index') + + + else: + pass + + + + case "8": print("[+] Add/Remove words in the blacklist list (ex: porn)") + #blcsvfile=instancepath+'/sensitive.csv' #fyi + #bldf = pd.read_csv(secsvfile) #fyi option="0" done = False while(done == False): - while option != "1" and option != "2" and option != "exit": - option=input("do you want to 1) add words or 2) remove words ? (type exit to exit)") - # TODO display the contents of blacklist.csv file - if option == 1: - word=input("which word do you want to add? (write 0 to exit") - if word == "0": + while option != "1" and option != "2" and option != "-1": + option=input("[+] Do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ") + if option == "1": + word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ") + if word == "-1": done = True #True to get out of the while loop else: - print("checking if word is valid") - # TODO check if word is valid in a while loop (dont check if - # TODO if invalid! remove word at index - else: - index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)") - if index == "exit": + print("[+] Checking if the Word/URL is valid: ") + if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word): + print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word)) + print('[+] Word/URL is valid, adding the word into the blacklist ') + # add it to the sensitive wordlist + newrow=[word] + print("[+] NEWROW=",newrow) + # (rest is automatic: status, score, instance is = '' because it is your own instance) + # check if the entry doesn't already exist in verified.csv and in unverified.csv + # if it doesnt exist, add it into unverified.csv + bldf.loc[-1] = newrow # adding a row + bldf.index = bldf.index + 1 # shifting index + bldf = bldf.sort_index() # sorting by index + print("[+] New row added! now writing the csv file: ") + bldf.to_csv(blcsvfile, index=False) + + + if option == "2": + print(bldf) + index="" + while (index not in bldf.index) and index != -1: + index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) ")) + if index == -1: done = True #True to get out of the while loop else: - print("checking if index is valid") - # TODO check if index is valid or not - # TODO if valid! remove word at index - # TODO if invalid! just pass to ask for another word + if (index in bldf.index): + #if index exists, remove it + print("[+] removing selected index: ") + bldf.drop(index, inplace= True) + bldf.to_csv(blcsvfile, index=False) + + else: + print('[-] Error, invalid index') + + + else: + pass + + + + + # CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file) case "9": print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") @@ -977,6 +1037,9 @@ def IsUrlValid(url:str)->bool: #if not : return False pattern = re.compile("^[A-Za-z0-9:/.]+$") url = str(url) + if len(url) < 4: + #print("Status: Got more than one character or nothing.") + return False if url.endswith('.onion'): return IsOnionValid(url) else: diff --git a/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/blacklist.csv b/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/blacklist.csv index 432db87..908a748 100644 --- a/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/blacklist.csv +++ b/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/blacklist.csv @@ -1,2 +1,5 @@ blacklisted-words +pr0n +pron +teen porn diff --git a/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/sensitive.csv b/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/sensitive.csv index 6a38cbc..b396b79 100644 --- a/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/sensitive.csv +++ b/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/sensitive.csv @@ -2,4 +2,3 @@ sensitive-words Market market drug -