darknet-lantern/scripts/darknet_exploration.py
2025-01-18 21:16:22 +01:00

1179 lines
51 KiB
Python

import os, pwd, re, pandas as pd, requests, shutil
from PIL import Image
import urllib
import socks, socket, glob
#apt install python3-pandas python3-requests python3-socks
def main():
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
rootpath='/srv/darknet-onion-webring/'
urlpath=pwd.getpwuid(os.getuid()).pw_dir+"/.darknet_participant_url"
#print(urlpath)
# check if ~/.darknet_participant_url exists,
# if exists, instance= the content of ~/.darknet_participant_url (which is the url: such as uptime.nowherejez...onion)
isitvalid="n"
while isitvalid != "y":
if os.path.isfile(urlpath):
with open(urlpath) as f:
instance = f.read().rstrip()
# check if the instance URL domain is valid
#print(urlpath,instance)
if IsOnionValid(instance):
print("[+] Instance Name:",instance,IsOnionValid(instance))
isitvalid="y"
else:
print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
return False
else:
print("[+] Instance Path doesn't exist yet")
# and ask for the instance URL domain
instance = input("What is your Instance domain ? (ex: uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion):")
instancepath=rootpath+'www/participants/'+instance
# check if the instance URL domain is valid
if IsOnionValid(instance):
print("[+] Instance Name: ",instance,IsUrlValid(instance))
else:
print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
return False
# ask the user if the instance URL is valid ?
print()
print(instance)
isitvalid=input("Is this your this your instance domain ? (y/n)")
# if yes, then write it into ~/.darknet_participant_url
if isitvalid == "y" :
print("OK writing the instance url to ~/.darknet_participants_url")
with open(urlpath, "w") as file:
file.write(instance)
print("[+] file written, let's read it")
f = open(urlpath,"r")
print(f.read())
print("[+] Initial Setup Completed!")
myinstance = instance
instancepath=rootpath+'www/participants/'+instance
templatepath=rootpath+'templates/'
verifiedcsvfile=instancepath+'/verified.csv'
unverifiedcsvfile=instancepath+'/unverified.csv'
blcsvfile=instancepath+'/blacklist.csv'
secsvfile=instancepath+'/sensitive.csv'
webpcsvfile=instancepath+'/webring-participants.csv'
# check if instancepath exists, if not then create the directory
if not os.path.exists(instancepath):
os.makedirs(instancepath)
# check if all the required csv files exist in it, otherwise copy them from the templates directory
# NOTE : the templates files are EMPTY by default, this is because i want each peer to manually review lists of links, and links themselves manually, this is to avoid allowing malicious links to slip through without intentional edits from the peer themselves.
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv','banner.png']:
filepath=instancepath+'/'+i
if not os.path.isfile(filepath):
# copy templates/ FILE.CSV to instancepath/ FILE.CSV
src=templatepath+i
shutil.copyfile(src, filepath)
# now that they exist, get vdf and uvdf and the rest
vdf = pd.read_csv(verifiedcsvfile)
uvdf = pd.read_csv(unverifiedcsvfile)
bldf = pd.read_csv(blcsvfile)
sedf = pd.read_csv(secsvfile)
webpdf = pd.read_csv(webpcsvfile)
print("[+] file exists, your Webring URL is", instance)
isitvalid = "y"
while True:
print("""
[+] Welcome to the Darknet Onion Webring, where you are exploring the Darknet and helping others do the same.
Managing Websites:
1) Add a new Website entry (into unverified.csv)
2) Trust a Website entry (move an entry from unverified to verified.csv)
3) Untrust a Website entry (move an entry from unverified to verified.csv)
Managing Webring Participants:
4) Synchronize new links from existing webring participants, into your unverified.csv file
5) Add a new webring participant (and download their files into their directory (without trusting them yet!))
6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
Managing Wordlists:
7) Add/Remove Words/URLs in the sensitive list (ex: drug)
8) Add/Remove Words/URLs or links in the blacklist (ex: porn)
Maintenance:
9) Remove the duplicate URLs for your own instance
10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
0) Exit
""")
option = input("Select Option? (0-11): ")
print(option)
match option:
########## MANAGING WEBSITE ENTRIES #################
#Websites:
# 1) Add a new Website entry (into unverified.csv)
# 2) Trust a Website entry (move an entry from unverified to verified.csv)
# 3) Untrust a Website entry (move an entry from unverified to verified.csv)
#####################################################
case "1":
done = False
while done == False:
print("\n[+] Add a new Website entry (into unverified.csv)")
name=''
while(IsNameValid(name) is not True):
name = input("What is the Website name ? ")
category=''
while(IsCategoryValid(category) is not True):
category = input("What is the website Category ? ")
# the url of the website (required) + check if its valid
url=''
while(IsUrlValid(url) is not True):
url=input("What is the website URL ? ")
# a quick description (optional) + check if its valid
desc='DEFAULT'
while(IsDescriptionValid(desc) is not True):
desc=input("Description for the website ? (Optional) ")
# sensitive ? (y/n) + check if its valid
#entry_sensi = input("is it a sensitive website ? (ex: website related to drugs) (y/n)")
choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ")
if choice == "n":
sensi = ''
else:
sensi = '✔️'
newrow=[instance,category,name,url,sensi,desc,'','']
print("[+] NEWROW=",newrow)
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# TODO check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
uvdf.loc[-1] = newrow # adding a row
uvdf.index = uvdf.index + 1 # shifting index
uvdf = uvdf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file:")
uvdf.to_csv(unverifiedcsvfile, index=False)
choice=input("\n[+] Want to add another website ? (y/n) ")
if choice == "n":
done = True
case "2":
print("[+] Trust a Website entry (move an entry from unverified to verified.csv)")
done = False
while done == False:
vdf = pd.read_csv(verifiedcsvfile)
uvdf = pd.read_csv(unverifiedcsvfile)
# search for a word
print(uvdf[['Name','URL']])
name=''
while(IsNameValid(name) is not True):
name = input("What is the Website name you want to trust ? (ex: Nowhere)")
filter_uvdf = uvdf[uvdf.Name.str.contains(name)]
# NOTE and display only the matching entries in unverified.csv in an array format (display it in CLI).
print(filter_uvdf[['Name','URL']])
# check if there are no results, dont proceed if there are none!
if filter_uvdf.size == 0:
print("ERROR no results, skipping.")
else:
# Each of the rows has an index,
index=-1
while (index not in filter_uvdf.index):
# prompt the user to ask for with row they want to move to verified.csv
index = int(input("What is the index of the entry that you want to move to verified.csv ? (ex: 3) "))
# once selected, it must be able to SAVE and print that row:
print(uvdf.iloc[index].values)
newrow=uvdf.iloc[index].values
# append it into verified.csv
vdf.loc[-1] = newrow # adding a row
vdf.index = vdf.index + 1 # shifting index
vdf = vdf.sort_index() # sorting by index
vdf.to_csv(verifiedcsvfile, index=False)
print("[+] New row added to verified.csv! now writing to the csv")
# remove it from unverified.csv
uvdf.drop(index, inplace= True)
uvdf.to_csv(unverifiedcsvfile, index=False)
print("[+] Link is now moved to verified.csv!")
choice=input("\n[+] Want to trust another website ? (y/n) ")
if choice == "n":
done = True
case "3":
print("[+] Untrust a Website entry (move an entry from verified to unverified.csv)")
print(vdf[['Name','URL']])
# search for a word
name=''
while(IsNameValid(name) is not True):
name = input("What is the Website name you want to untrust ? (ex: BreachForums)")
filter_vdf = vdf[vdf.Name.str.contains(name)]
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
print(filter_vdf[['Name','URL']])
# check if there are no results, dont proceed if there are none!
if filter_vdf.size == 0:
print("ERROR no results, skipping.")
else:
# Each of the rows has an index,
index=-1
while (index not in filter_vdf.index):
# prompt the user to ask for with row they want to move to unverified.csv
index = int(input("What is the index of the entry that you want to move to unverified.csv ? (ex: 3) "))
# once selected, it must be able to SAVE and print that row:
print(vdf.iloc[index].values)
newrow=vdf.iloc[index].values
# append it into unverified.csv
uvdf.loc[-1] = newrow # adding a row
uvdf.index = uvdf.index + 1 # shifting index
uvdf = uvdf.sort_index() # sorting by index
uvdf.to_csv(unverifiedcsvfile, index=False)
print("[+] New row added to unverified.csv!")
# remove it from verified.csv
vdf.drop(index, inplace= True)
vdf.to_csv(verifiedcsvfile, index=False)
print("[+] Link is now moved to unverified.csv!")
####### MANAGING WEBRING PARTICIPANTS ###########
# 4) Synchronize new links from webring participants, into your unverified.csv file
# 5) Add a new webring participant (and download their files into their directory (without trusting them yet!))
# 6) Trust/UnTrust/Blacklist a webring participant
#####################################################
#check if it works when you have a second webring participant
case "4":
print("4) Synchronize new links from existing webring participants, into your unverified.csv file")
# iterate through each existing directories in www/participants/* to get each webring participant
participantsdir=rootpath+'www/participants/'
#print(os.listdir(participantsdir))
name=''
desc=''
trusted=''
status=''
score=''
webringcsvfile=instancepath+'/'+'webring-participants.csv'
wdf = pd.read_csv(webringcsvfile)
for participant in os.listdir(participantsdir):
participantdir=participantsdir+participant
#print(participant)
# NOTE check if the webring participant is yourself, if it is, then skip it
if participant != myinstance: # prod: dont use your own intance
#if participant == myinstance: # preprod testing only on your own instance
#overwrite the existing files in the participant's directory, with their version (download all the csv files from them again)
basewurl='http://'+participant+'/participants/'+participant+'/'
print(basewurl)
print('[+] Downloading the files of ',participant, ": ")
w_vcsv=basewurl+'verified.csv'
w_uvcsv=basewurl+'unverified.csv'
#print(CheckUrl(w_uvcsv))
w_blcsv=basewurl+'blacklist.csv'
#print(CheckUrl(w_blcsv))
w_scsv=basewurl+'sensitive.csv'
#print(CheckUrl(w_scsv))
w_webcsv=basewurl+'webring-participants.csv'
#print(CheckUrl(w_webcsv))
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
print("[-] Webring Participant isn't reachable, skipping")
#return False #dont do anything if the webring participant isnt reachable.
else: #if the webring participant is reachable, proceed
print("[+] Webring Participant is reachable, updating their csv files:")
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
# FOR EACH CSV FILE TO GET:
# URL: basewurl / FILE.CSV
# PATH: participantdir / FILE.CSV
#print('[+] DOWNLOADING ',basewurl+i)
# download the external csv file and save it into the "text" variable:
#response = urllib.request.urlopen(basewurl+i)
response = requests.get(basewurl+i, proxies=proxies)
#data = response.read() # a `bytes` object
#text = data.decode('utf-8')
text = response.text
# save the text variable into the destination file:
#print('[+] SAVING IT INTO ',participantdir+'/'+i)
csvfilepath=participantdir+'/'+i
with open(csvfilepath, "w") as file:
file.write(text)
#print("[+] file written, let's read it")
f = open(csvfilepath,"r")
#print(f.read())
# download the banner.png image:
bannerurl=basewurl+'banner.png'
bannerpath=participantdir+'/banner.png'
r = requests.get(bannerurl, stream=True, proxies=proxies)
with open(bannerpath, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
# SANITY CHECK ON THE BANNER PNG IMAGE:
if IsBannerValid(bannerpath):
#print('[+] Banner is valid')
pass
else:
# if false, overwrite it with the template banner png file
#print('[-] Banner is not valid, replacing it with the default banner')
os.remove(bannerpath)
# copy templates/banner.png to bannerpath
bannertemplatepath=templatepath+'banner.png'
shutil.copyfile(bannertemplatepath, bannerpath)
#print("[+] Webring Participant is valid, adding it if it's not already added.")
#print('[+] PARTICIPANT=',participant)
# check if the participant is already listed in webring-participants.csv or not, and add them if not already listed
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
filter_wdf = wdf[wdf.URL.str.contains(participant)]
#print(filter_wdf[['Name','URL']])
# check if there are no results, dont proceed if there are none!
if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed
newrow=[name,participant,desc,trusted,status,score]
#print("[+] NEWROW=",newrow)
wdf.loc[-1] = newrow # adding a row
wdf.index = wdf.index + 1 # shifting index
wdf = wdf.sort_index() # sorting by index
#print("[+] New row added! now writing the csv file:",webringcsvfile)
wdf.to_csv(webringcsvfile, index=False)
else:
pass
#print('[+] Webring participant is already listed in your own webring-participants.csv file!')
# iterate through the participant's verified.csv and unverified.csv files
for w in ['verified.csv','unverified.csv']:
csvfilepath=participantdir+'/'+w
print(csvfilepath)
csvdf = pd.read_csv(csvfilepath)
#print(bldf[['blacklisted-words']])
bldf[['blacklisted-words']].iterrows()
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
#print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
#print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
#row=uvdf.iloc[[i]] #it displays the index
row=csvdf.loc[i,:].values.tolist()
print(row)
#print(i,row)
################################ SANITY CHECKS ####################################
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
#print("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score']))
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
#mark the row for deletion as it has invalid inputs
if i not in rows2delete:
print("Marking row", i,"for deletion, as it has invalid inputs")
rows2delete.append(i) #mark the row for deletion if not already done
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
for k,l in bldf.iterrows():
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
#print("found blacklisted word! marking row for deletion")
if i not in rows2delete:
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
else:
# not a blacklisted link, therefore it is suitable to be added to your own csv files:
################################ CHECKING FOR DUPLICATES! #########################
# for each link in the participant's verified/unverified csv files,
# check if the link is already listed in your own verified.csv or unverified.csv
filterterm=csvdf.at[i, 'URL']
filter_vdf= vdf[vdf.URL.str.contains(filterterm)]
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm)]
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
#if link doesnt exist in either of your verified/unverified csv files,
# then add it to your own unverified.csv file:
newrow=row
uvdf.loc[-1] = newrow # adding a row
uvdf.index = uvdf.index + 1 # shifting index
uvdf = uvdf.sort_index() # sorting by index
uvdf.to_csv(unverifiedcsvfile, index=False)
print("[+] New row added to your own unverified.csv file!")
else:
print('[-] Skipping row as it is already added in ',w,row,)
###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE###################
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
for k,l in sedf.iterrows():
#print("[+] Sensitive word=",k, sedf.at[k, 'sensitive-words'])
seword=sedf.at[k, 'sensitive-words']
if any(seword in str(x) for x in row) == True:
if csvdf.at[i, 'Sensitive'] != '✔️':
print("Marking row", i,"as sensitive, as it matches with a sensitive word")
csvdf.at[i, 'Sensitive']='✔️'
print('[-] Rows to delete: ',rows2delete)
# only delete rows after you've gone through all the unverified.csv OR verified.csv rows'
for i in rows2delete:
row=csvdf.loc[i,:].values.tolist()
print('[+] REMOVING ROW :',i,row)
csvdf.drop(i, inplace= True)
csvdf.to_csv(csvfilepath, index=False)
rows2delete= [] # it is an empty list at first
case "5":
print("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
webring_participant_url = ''
while(IsOnionValid(webring_participant_url) is not True):
# ask for the url to the other webring participant and check if the (onion only) url is valid or not:
webring_participant_url = input("What is the onion domain of the new webring participant? (ex: uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion) ")
# check if the directory exists locally or not,
participantdir=rootpath+'www/participants/'+webring_participant_url
if not os.path.isdir(participantdir): # to test on your own instance
#if os.path.isdir(participantdir):
# if it does, it means that the webring is ALREADY added
print("[-] Webring Participant is already listed, skipping.")
return False
else:
# if not, then proceed:
# print the URL to the csv files at http://URL.onion/participants/URL.onion/{verified.csv,unverified.csv,sensitive.csv,blacklist.csv,webring-participants.csv}
basewurl='http://'+webring_participant_url+'/participants/'+webring_participant_url+'/'
print(basewurl)
print('[+] Checking if all of the required csv files exists for new webring participant ',webring_participant_url, ": ")
w_vcsv=basewurl+'verified.csv'
w_uvcsv=basewurl+'unverified.csv'
#print(CheckUrl(w_uvcsv))
w_blcsv=basewurl+'blacklist.csv'
#print(CheckUrl(w_blcsv))
w_scsv=basewurl+'sensitive.csv'
#print(CheckUrl(w_scsv))
w_webcsv=basewurl+'webring-participants.csv'
#print(CheckUrl(w_webcsv))
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
print("[-] Webring Participant is invalid, exiting.")
return False
else:
print("[+] Webring Participant is valid, adding it.")
name=''
while(IsNameValid(name) is not True):
name = input("What is the Webring instance name ? ")
desc='DEFAULT'
while(IsDescriptionValid(desc) is not True):
desc=input("Description for the webring participant ? (Optional)")
# if OK then add it to your own webring-participants.csv
trusted=''
status=''
score=''
newrow=[name,webring_participant_url,desc,trusted,status,score]
webringcsvfile=instancepath+'/'+'webring-participants.csv'
wdf = pd.read_csv(webringcsvfile)
#print("[+] NEWROW=",newrow)
wdf.loc[-1] = newrow # adding a row
wdf.index = wdf.index + 1 # shifting index
wdf = wdf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file:",webringcsvfile)
wdf.to_csv(webringcsvfile, index=False)
# create the directory in www/participants/PARTICIPANTURL/ if it's not there already
if not os.path.exists(participantdir):
os.makedirs(participantdir)
# then download their csv files at http://URL.onion/participants/URL.onion/{verified.csv,unverified.csv,sensitive.csv,blacklist.csv,webring-participants.csv}
# then save the csv file contents into a variable, then write it where it belongs:
# for loop with each csv file you want:
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
# FOR EACH CSV FILE TO GET:
# URL: basewurl / FILE.CSV
# PATH: participantdir / FILE.CSV
print('[+] DOWNLOADING ',basewurl+i)
# download the external csv file and save it into the "text" variable:
#response = urllib.request.urlopen(basewurl+i)
response = requests.get(basewurl+i, proxies=proxies)
#data = response.read() # a `bytes` object
#text = data.decode('utf-8')
text = response.text
# save the text variable into the destination file:
print('[+] SAVING IT INTO ',participantdir+'/'+i)
csvfilepath=participantdir+'/'+i
with open(csvfilepath, "w") as file:
file.write(text)
print("[+] file written, let's read it")
f = open(csvfilepath,"r")
print(f.read())
# download the banner.png image:
bannerurl=basewurl+'banner.png'
bannerpath=participantdir+'/banner.png'
r = requests.get(bannerurl, stream=True, proxies=proxies)
with open(bannerpath, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
# SANITY CHECK ON THE BANNER PNG IMAGE:
if IsBannerValid(bannerpath):
print('[+] Banner is valid')
else:
# if false, overwrite it with the template banner png file
print('[-] Banner is not valid, replacing it with the default banner')
os.remove(bannerpath)
# copy templates/banner.png to bannerpath
bannertemplatepath=templatepath+'banner.png'
shutil.copyfile(bannertemplatepath, bannerpath)
########### PERFORM SANITY CHECKS ON the webring participant's verified.csv and unverified.csv ##################
for w in ['verified.csv','unverified.csv']:
csvfilepath=participantdir+'/'+w
csvdf = pd.read_csv(csvfilepath)
#print(bldf[['blacklisted-words']])
bldf[['blacklisted-words']].iterrows()
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
#print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
#print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
#row=uvdf.iloc[[i]] #it displays the index
row=csvdf.loc[i,:].values.tolist()
#print(i,row)
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
#print("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score']))
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
#mark the row for deletion as it has invalid inputs
if i not in rows2delete:
print("Marking row", i,"for deletion, as it has invalid inputs")
rows2delete.append(i) #mark the row for deletion if not already done
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
for k,l in bldf.iterrows():
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
#print("found blacklisted word! marking row for deletion")
if i not in rows2delete:
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
for k,l in sedf.iterrows():
#print("[+] Sensitive word=",k, sedf.at[k, 'sensitive-words'])
seword=sedf.at[k, 'sensitive-words']
if any(seword in str(x) for x in row) == True:
if csvdf.at[i, 'Sensitive'] != '✔️':
print("Marking row", i,"as sensitive, as it matches with a sensitive word")
csvdf.at[i, 'Sensitive']='✔️'
print('[-] Rows to delete: ',rows2delete)
for i in rows2delete:
row=csvdf.loc[i,:].values.tolist()
print('[+] REMOVING ROW :',i,row)
csvdf.drop(i, inplace= True)
csvdf.to_csv(csvfilepath, index=False)
##############################################
case "6":
print("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
webringcsvfile=instancepath+'/'+'webring-participants.csv'
wdf = pd.read_csv(webringcsvfile)
# list each webring participant in your webring-participants.csv file
print(wdf[["URL","Trusted"]])
# ask the user to pick an index
index=""
while (index not in wdf.index):
# prompt the user to ask for with row they want to move to verified.csv
index = int(input("What is the index of the webring participant that you want to edit ? (ex: 3) "))
# once a valid index is picked, ask if the user wants to 1) trust the webring participant, or 2) untrust them, or 3) black list them
choice=""
while (choice not in ["1","2","3"]):
choice = input("Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist the webring participant ?")
if choice == "1":
# trust the webring participant
# ask the user if they want to proceed, as this is potentially risky if the webring participant tries to list malicious links in the future
choice2=input("You're about to trust another peer, this means that you're going to automatically trust all of the links they have in their verified.csv file! If this is a malicious peer, you're about to potentially going to automatically trust malicious links, it is potentially risky! Do you want to continue ? (y/n)")
if choice2 == "y":
# if user wants to proceed, mark the "trusted" column as V
print("[+] Trusting webring participant", wdf.at[index,"URL"])
wdf.at[index,"Trusted"]='✔️'
wdf.to_csv(webringcsvfile, index=False)
else:
print("[-] not trusting webring participant, skipping.")
if choice == "2":
print("[+] UnTrusting webring participant", wdf.at[index,"URL"])
wdf.at[index,"Trusted"]=''
wdf.to_csv(webringcsvfile, index=False)
# untrust the webring participant
# if 2: mark the "trusted" column as empty
if choice == "3":
print("[+] Blacklisting webring participant", wdf.at[index,"URL"])
# blacklist the webring participant
# add it's URL to your own blacklist.csv
instance2blacklist=wdf.at[index,"URL"]
newrow=[instance2blacklist]
print("[+] NEWROW=",newrow)
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
bldf.loc[-1] = newrow # adding a row
bldf.index = bldf.index + 1 # shifting index
bldf = bldf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file:")
bldf.to_csv(blcsvfile, index=False)
# remove all of the entries that came from that participant (drop the lines in your own verified+unverified.csv that have that instance in the instance column)
#vdf
rows2delete= [] # it is an empty list at first
for i,j in vdf.iterrows():
row=vdf.loc[i,:].values.tolist()
for k,l in bldf.iterrows():
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
#print("found blacklisted word! marking row for deletion")
if i not in rows2delete:
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
for i in rows2delete:
row=vdf.loc[i,:].values.tolist()
print('[+] REMOVING ROW :',i,row)
vdf.drop(i, inplace= True)
vdf.to_csv(verifiedcsvfile, index=False)
print(vdf)
rows2delete= [] # it is an empty list at first
#uvdf
rows2delete= [] # it is an empty list at first
for i,j in uvdf.iterrows():
row=uvdf.loc[i,:].values.tolist()
for k,l in bldf.iterrows():
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
#print("found blacklisted word! marking row for deletion")
if i not in rows2delete:
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
for i in rows2delete:
row=uvdf.loc[i,:].values.tolist()
print('[+] REMOVING ROW :',i,row)
uvdf.drop(i, inplace= True)
uvdf.to_csv(unverifiedcsvfile, index=False)
print(uvdf)
rows2delete= [] # it is an empty list at first
# find all rows that match with the instance name in wdf aswell to remove them
for i,j in wdf.iterrows():
row=wdf.loc[i,:].values.tolist()
for k,l in bldf.iterrows():
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
#print("found blacklisted word! marking row for deletion")
if i not in rows2delete:
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
for i in rows2delete:
row=wdf.loc[i,:].values.tolist()
print('[+] REMOVING ROW :',i,row)
wdf.drop(i, inplace= True)
wdf.to_csv(webringcsvfile, index=False)
print(wdf)
rows2delete= [] # it is an empty list at first
# remove the entire directory in www/participants/INSTANCENAME aswell to get rid of it
instance2blacklistpath=rootpath+'www/participants/'+instance2blacklist
print("[+] removing the participant's directory at ",instance2blacklistpath)
shutil.rmtree(instance2blacklistpath)
################### MANAGING WORDLISTS #################
#Managing Wordlists:
# 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
# 8) Add/Remove words or links in the blacklist (ex: porn)
#Maintenance:
# 9) remove the duplicate URLs for your own instance
# 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
#########################################################
case "7":
print("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)")
#secsvfile=instancepath+'/sensitive.csv' #fyi
#sedf = pd.read_csv(secsvfile) #fyi
option="0"
done = False
while(done == False):
while option != "1" and option != "2" and option != "-1":
option=input("[+] do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
if option == "1":
word=input("[+] which Sensitive word do you want to add? (write -1 to exit) ")
if word == "-1":
done = True
#True to get out of the while loop
else:
print("[+] checking if the Word/URL is valid: ")
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
print('[+] Word/URL is valid, adding the word into the sensitive wordlist ')
# add it to the sensitive wordlist
newrow=[word]
print("[+] NEWROW=",newrow)
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
sedf.loc[-1] = newrow # adding a row
sedf.index = sedf.index + 1 # shifting index
sedf = sedf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file: ")
sedf.to_csv(secsvfile, index=False)
if option == "2":
print(sedf)
index=""
while (index not in sedf.index) and index != -1:
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
if index == -1:
done = True
#True to get out of the while loop
else:
if (index in sedf.index):
#if index exists, remove it
print("[+] removing selected index: ")
sedf.drop(index, inplace= True)
sedf.to_csv(secsvfile, index=False)
else:
print('[-] Error, invalid index')
else:
pass
case "8":
print("[+] Add/Remove words in the blacklist list (ex: porn)")
#blcsvfile=instancepath+'/sensitive.csv' #fyi
#bldf = pd.read_csv(secsvfile) #fyi
option="0"
done = False
while(done == False):
while option != "1" and option != "2" and option != "-1":
option=input("[+] Do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
if option == "1":
word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ")
if word == "-1":
done = True
#True to get out of the while loop
else:
print("[+] Checking if the Word/URL is valid: ")
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
print('[+] Word/URL is valid, adding the word into the blacklist ')
# add it to the sensitive wordlist
newrow=[word]
print("[+] NEWROW=",newrow)
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
bldf.loc[-1] = newrow # adding a row
bldf.index = bldf.index + 1 # shifting index
bldf = bldf.sort_index() # sorting by index
print("[+] New row added! now writing the csv file: ")
bldf.to_csv(blcsvfile, index=False)
if option == "2":
print(bldf)
index=""
while (index not in bldf.index) and index != -1:
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
if index == -1:
done = True
#True to get out of the while loop
else:
if (index in bldf.index):
#if index exists, remove it
print("[+] removing selected index: ")
bldf.drop(index, inplace= True)
bldf.to_csv(blcsvfile, index=False)
else:
print('[-] Error, invalid index')
else:
pass
# CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file)
case "9":
print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
# ignore it if the index is "indextocheck" and if the index is already listed in rows2delete
# else: add the index to "rows2delete"
# go drop the rows by their index listed in "rows2delete"
################################ CHECKING FOR DUPLICATES! #########################
# for unverified.csv, and verified.csv
for w in ['verified.csv','unverified.csv']:
#instancepath=rootpath+'www/participants/'+instance # fyi
csvfilepath=instancepath+'/'+w
print(csvfilepath)
csvdf = pd.read_csv(csvfilepath)
print("REMOVING DUPLICATES IN", csvfilepath)
csvdf = csvdf.drop_duplicates(subset=['URL'])
csvdf.to_csv(csvfilepath, index=False)
print(csvdf[['URL']])
case "10":
print("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
participantspath = rootpath+'www/participants/'
for participant in os.listdir(participantspath):
print("Participant:",participant)
participantdir= participantspath+participant
a=0
if a == 0:
if a== 0:
################ BEGIN SANITY CHECKS FOR EACH PARTICIPANTS ##############
# iterate through the participant's verified.csv and unverified.csv files
for w in ['verified.csv','unverified.csv']:
csvfilepath=participantdir+'/'+w
print(csvfilepath)
csvdf = pd.read_csv(csvfilepath)
#print(bldf[['blacklisted-words']])
bldf[['blacklisted-words']].iterrows()
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
#print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
#print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
#row=uvdf.iloc[[i]] #it displays the index
row=csvdf.loc[i,:].values.tolist()
print(row)
#print(i,row)
################################ SANITY CHECKS ####################################
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
#print("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score']))
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
#mark the row for deletion as it has invalid inputs
if i not in rows2delete:
print("Marking row", i,"for deletion, as it has invalid inputs")
rows2delete.append(i) #mark the row for deletion if not already done
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
for k,l in bldf.iterrows():
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
#print("found blacklisted word! marking row for deletion")
if i not in rows2delete:
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
for i in rows2delete:
row=csvdf.loc[i,:].values.tolist()
print('[+] REMOVING ROW :',i,row)
csvdf.drop(i, inplace= True)
csvdf.to_csv(csvfilepath, index=False)
# TODO find the list of all csv files (in www/participants/*/*.csv) (templates should remain empty by default)
# copy what was done in option 4, to :
# delete the ones that have invalid entries
# mark the sensitive rows as sensitive
# delete the rows that match with blacklisted words
case _:
print("[-] Exiting")
return True
#### Checking Functions to validate that links are legit ####
def CheckUrl(url):
"""
Checks if URL is actually reachable via Tor
"""
proxies = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
try:
status = requests.get(url,proxies=proxies, timeout=5).status_code
#print('[+]',url,status)
if status != 502:
#print(url,"✔️")
return True
else:
#print(url,"❌")
return False
except requests.ConnectionError as e:
#print(url,"❌")
return False
except requests.exceptions.ReadTimeout as e:
#print(url,"❌")
return False
#### PROTECTIONS AGAINST MALICIOUS CSV INPUTS ####
def IsBannerValid(path: str) -> bool:
"""
Checks if the banner.png file has the correct dimensions (240x60)
"""
#print('[+] checking image size')
try:
im = Image.open(path)
except Exception as e:
return False
#im = Image.open("favicon.png")
width, height = im.size
#print('width =',width, 'height=',height)
if width != 240 or height != 60:
#print('[-] Banner doesnt have the correct size (240x60)')
return False
else:
#print('[+] Banner has the correct size (240x60)')
return True
def IsOnionValid(url: str)-> bool:
"""
Checks if the domain(param) is a valid onion domain and return True else False.
"""
# check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
# check that it is only url.onion or subdomain.url.onion,
# if OK return True
#if not : return False
try:
pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
url = url.strip().removesuffix('/')
if url.startswith('http://'):
#print('URL starts with http')
# Removes the http://
domain = url.split('/')[2]
if pattern.fullmatch(domain) is not None:
if len(domain.split('.')) > 3:
n_subdomians = len(domain.split('.'))
# Checks if there is more than 1 subdomain. "subdomain.url.onion" only
#print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
return False
else:
if len(domain) < 62:
#print("Domain length is less than 62.")
return False
return True
elif pattern.fullmatch(domain) is None:
#print("Domain contains invalid character.")
#print(domain)
return False
else:
#print("Domain not valid")
return False
else:
#TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
#print("URL doesn't start http")
if pattern.fullmatch(url) is not None:
if len(url.split('.')) > 3:
n_subdomians = len(url.split('.'))
# Checks if there is more than 1 subdomain. "subdomain.url.onion" only
#print(f"This domain have more than one subdomain. There are {n_subdomians - 1} subdomains")
return False
else:
if len(url) < 62:
#print("Domain length is less than 62.")
return False
return True
elif pattern.fullmatch(url) is None:
#print("Domain contains invalid character.")
#print(url)
return False
else:
#print("Domain not valid")
return False
except Exception as e:
print(f"Error: {e}")
def IsUrlValid(url:str)->bool:
"""
Check if url is valid both dark net end clearnet.
"""
# check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
# check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
# if OK return True
#if not : return False
pattern = re.compile("^[A-Za-z0-9:/.-]+$")
url = str(url)
if len(url) < 4:
#print("Status: Got more than one character or nothing.")
return False
if url.endswith('.onion'):
return IsOnionValid(url)
else:
if not url.__contains__('.'):
#print("No (DOT) in clearnet url")
return False
if pattern.fullmatch(url) is None:
#print('Url contains invalid chars')
return False
return True
def IsStatusValid(status: str)-> bool:
"""
Checks if status contains only [v,x,❌,✔️]. Verbose only if False is returned
"""
pattern = ['y','n','✔️','','','nan']
status = str(status)
status.strip()
#print('[+] STATUS = ',status.splitlines())
if len(status) > 4:
#print("Status: Got more than one character or nothing.")
return False
elif (status not in pattern):
#print("Status: Got an invalid character it must be either y, n, ✔️, or ❌ ")
return False
return True
def IsScoreValid(score:str)->bool:
"""
Check the Score is only "^[0-9.,]+$" with 8 max chars.
"""
# check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max
#(careful with the ' and , make sure you test if it fucks the csv up or else)
# if OK return True
#if not : return False
pattern = re.compile("^[0-9.,]+$")
score = str(score)
score.strip()
#pattern = ['','nan']
if score in ['','nan']:
#Score can be empty when initially added
return True
if pattern.fullmatch(score) is None:
# empty description is fine as it's optional
return False
elif len(score) > 8:
#print("score is greater than 8 chars")
return False
# empty score is fine
return True
def IsDescriptionValid(desc:str)->bool:
"""
Check the categories are only [a-zA-Z0-9.' ] with 256 max chars.
"""
# check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max
#(careful with the ' and , make sure you test if it fucks the csv up or else)
# if OK return True
#if not : return False
if desc == "":
# empty description is fine as it's optional
return True
pattern = re.compile("^[A-Za-z0-9-.' ]+$")
desc = str(desc)
desc.strip()
if pattern.fullmatch(desc) is None:
return False
if desc == "DEFAULT":
return False
elif len(desc) > 256:
#print("desc is greater than 256 chars")
return False
return True
def IsCategoryValid(categories: list)-> bool:
"""
Check the categories are only [a-zA-Z0-9 ] with 64 max chars.
"""
# check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max
#(careful with the ' and , make sure you test if it fucks the csv up or else)
# if OK return True
#if not : return False
pattern = re.compile("^[A-Za-z0-9 ]+$")
for category in categories:
category.strip()
if pattern.fullmatch(category) is None:
#print('Got an empty list or invalid chars')
return False
elif len(category) > 64:
#print('Category is too long')
return False
else:
return True
def IsNameValid(name: str)->bool:
"""
Check the parameter name only contains [a-zA-Z0-9 ] and is 64 chars long.
"""
# check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max
#(careful with the ' and , make sure you test if it fucks the csv up or else)
# if OK return True
#if not : return False
pattern = re.compile("^[A-Za-z0-9 ]+$")
name = name.strip()
if (pattern.fullmatch(name) is None):
#print("Got an invalid character or nothing")
return False
elif len(name) > 64:
#print(f'Got a name length greater than 64. {len(name)}')
return False
return True
if __name__ == '__main__':
main()