mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-16 20:26:58 +00:00
1179 lines
51 KiB
Python
1179 lines
51 KiB
Python
import os, pwd, re, pandas as pd, requests, shutil
|
|
from PIL import Image
|
|
import urllib
|
|
import socks, socket, glob
|
|
#apt install python3-pandas python3-requests python3-socks
|
|
def main():
|
|
proxies = {
|
|
'http': 'socks5h://127.0.0.1:9050',
|
|
'https': 'socks5h://127.0.0.1:9050'
|
|
}
|
|
|
|
rootpath='/srv/darknet-onion-webring/'
|
|
urlpath=pwd.getpwuid(os.getuid()).pw_dir+"/.darknet_participant_url"
|
|
#print(urlpath)
|
|
|
|
|
|
# check if ~/.darknet_participant_url exists,
|
|
# if exists, instance= the content of ~/.darknet_participant_url (which is the url: such as uptime.nowherejez...onion)
|
|
isitvalid="n"
|
|
while isitvalid != "y":
|
|
if os.path.isfile(urlpath):
|
|
with open(urlpath) as f:
|
|
instance = f.read().rstrip()
|
|
# check if the instance URL domain is valid
|
|
#print(urlpath,instance)
|
|
if IsOnionValid(instance):
|
|
print("[+] Instance Name:",instance,IsOnionValid(instance))
|
|
isitvalid="y"
|
|
else:
|
|
print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
|
|
return False
|
|
else:
|
|
print("[+] Instance Path doesn't exist yet")
|
|
# and ask for the instance URL domain
|
|
instance = input("What is your Instance domain ? (ex: uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion):")
|
|
instancepath=rootpath+'www/participants/'+instance
|
|
# check if the instance URL domain is valid
|
|
if IsOnionValid(instance):
|
|
print("[+] Instance Name: ",instance,IsUrlValid(instance))
|
|
else:
|
|
print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
|
|
return False
|
|
|
|
|
|
# ask the user if the instance URL is valid ?
|
|
print()
|
|
print(instance)
|
|
isitvalid=input("Is this your this your instance domain ? (y/n)")
|
|
# if yes, then write it into ~/.darknet_participant_url
|
|
if isitvalid == "y" :
|
|
print("OK writing the instance url to ~/.darknet_participants_url")
|
|
with open(urlpath, "w") as file:
|
|
file.write(instance)
|
|
print("[+] file written, let's read it")
|
|
f = open(urlpath,"r")
|
|
print(f.read())
|
|
print("[+] Initial Setup Completed!")
|
|
myinstance = instance
|
|
|
|
instancepath=rootpath+'www/participants/'+instance
|
|
templatepath=rootpath+'templates/'
|
|
verifiedcsvfile=instancepath+'/verified.csv'
|
|
unverifiedcsvfile=instancepath+'/unverified.csv'
|
|
blcsvfile=instancepath+'/blacklist.csv'
|
|
secsvfile=instancepath+'/sensitive.csv'
|
|
webpcsvfile=instancepath+'/webring-participants.csv'
|
|
# check if instancepath exists, if not then create the directory
|
|
if not os.path.exists(instancepath):
|
|
os.makedirs(instancepath)
|
|
# check if all the required csv files exist in it, otherwise copy them from the templates directory
|
|
# NOTE : the templates files are EMPTY by default, this is because i want each peer to manually review lists of links, and links themselves manually, this is to avoid allowing malicious links to slip through without intentional edits from the peer themselves.
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv','banner.png']:
|
|
filepath=instancepath+'/'+i
|
|
if not os.path.isfile(filepath):
|
|
# copy templates/ FILE.CSV to instancepath/ FILE.CSV
|
|
src=templatepath+i
|
|
shutil.copyfile(src, filepath)
|
|
# now that they exist, get vdf and uvdf and the rest
|
|
vdf = pd.read_csv(verifiedcsvfile)
|
|
uvdf = pd.read_csv(unverifiedcsvfile)
|
|
bldf = pd.read_csv(blcsvfile)
|
|
sedf = pd.read_csv(secsvfile)
|
|
webpdf = pd.read_csv(webpcsvfile)
|
|
print("[+] file exists, your Webring URL is", instance)
|
|
isitvalid = "y"
|
|
|
|
while True:
|
|
print("""
|
|
|
|
|
|
|
|
|
|
[+] Welcome to the Darknet Onion Webring, where you are exploring the Darknet and helping others do the same.
|
|
|
|
Managing Websites:
|
|
1) Add a new Website entry (into unverified.csv)
|
|
2) Trust a Website entry (move an entry from unverified to verified.csv)
|
|
3) Untrust a Website entry (move an entry from unverified to verified.csv)
|
|
|
|
Managing Webring Participants:
|
|
4) Synchronize new links from existing webring participants, into your unverified.csv file
|
|
5) Add a new webring participant (and download their files into their directory (without trusting them yet!))
|
|
6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
|
|
|
|
Managing Wordlists:
|
|
7) Add/Remove Words/URLs in the sensitive list (ex: drug)
|
|
8) Add/Remove Words/URLs or links in the blacklist (ex: porn)
|
|
|
|
Maintenance:
|
|
9) Remove the duplicate URLs for your own instance
|
|
10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
|
|
|
|
0) Exit
|
|
""")
|
|
option = input("Select Option? (0-11): ")
|
|
print(option)
|
|
match option:
|
|
|
|
|
|
########## MANAGING WEBSITE ENTRIES #################
|
|
#Websites:
|
|
# 1) Add a new Website entry (into unverified.csv)
|
|
# 2) Trust a Website entry (move an entry from unverified to verified.csv)
|
|
# 3) Untrust a Website entry (move an entry from unverified to verified.csv)
|
|
#####################################################
|
|
|
|
case "1":
|
|
done = False
|
|
while done == False:
|
|
print("\n[+] Add a new Website entry (into unverified.csv)")
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name ? ")
|
|
category=''
|
|
while(IsCategoryValid(category) is not True):
|
|
category = input("What is the website Category ? ")
|
|
# the url of the website (required) + check if its valid
|
|
url=''
|
|
while(IsUrlValid(url) is not True):
|
|
url=input("What is the website URL ? ")
|
|
|
|
# a quick description (optional) + check if its valid
|
|
desc='DEFAULT'
|
|
while(IsDescriptionValid(desc) is not True):
|
|
desc=input("Description for the website ? (Optional) ")
|
|
# sensitive ? (y/n) + check if its valid
|
|
#entry_sensi = input("is it a sensitive website ? (ex: website related to drugs) (y/n)")
|
|
|
|
choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ")
|
|
if choice == "n":
|
|
sensi = '❌'
|
|
else:
|
|
sensi = '✔️'
|
|
|
|
newrow=[instance,category,name,url,sensi,desc,'','']
|
|
print("[+] NEWROW=",newrow)
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# TODO check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sorting by index
|
|
print("[+] New row added! now writing the csv file:")
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
choice=input("\n[+] Want to add another website ? (y/n) ")
|
|
if choice == "n":
|
|
done = True
|
|
|
|
|
|
case "2":
|
|
print("[+] Trust a Website entry (move an entry from unverified to verified.csv)")
|
|
done = False
|
|
while done == False:
|
|
vdf = pd.read_csv(verifiedcsvfile)
|
|
uvdf = pd.read_csv(unverifiedcsvfile)
|
|
# search for a word
|
|
print(uvdf[['Name','URL']])
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to trust ? (ex: Nowhere)")
|
|
filter_uvdf = uvdf[uvdf.Name.str.contains(name)]
|
|
# NOTE and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
print(filter_uvdf[['Name','URL']])
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_uvdf.size == 0:
|
|
print("ERROR no results, skipping.")
|
|
else:
|
|
# Each of the rows has an index,
|
|
index=-1
|
|
while (index not in filter_uvdf.index):
|
|
# prompt the user to ask for with row they want to move to verified.csv
|
|
index = int(input("What is the index of the entry that you want to move to verified.csv ? (ex: 3) "))
|
|
# once selected, it must be able to SAVE and print that row:
|
|
print(uvdf.iloc[index].values)
|
|
newrow=uvdf.iloc[index].values
|
|
|
|
|
|
# append it into verified.csv
|
|
vdf.loc[-1] = newrow # adding a row
|
|
vdf.index = vdf.index + 1 # shifting index
|
|
vdf = vdf.sort_index() # sorting by index
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print("[+] New row added to verified.csv! now writing to the csv")
|
|
|
|
|
|
# remove it from unverified.csv
|
|
uvdf.drop(index, inplace= True)
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print("[+] Link is now moved to verified.csv!")
|
|
choice=input("\n[+] Want to trust another website ? (y/n) ")
|
|
if choice == "n":
|
|
done = True
|
|
|
|
case "3":
|
|
print("[+] Untrust a Website entry (move an entry from verified to unverified.csv)")
|
|
print(vdf[['Name','URL']])
|
|
# search for a word
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to untrust ? (ex: BreachForums)")
|
|
filter_vdf = vdf[vdf.Name.str.contains(name)]
|
|
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
print(filter_vdf[['Name','URL']])
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_vdf.size == 0:
|
|
print("ERROR no results, skipping.")
|
|
else:
|
|
# Each of the rows has an index,
|
|
index=-1
|
|
while (index not in filter_vdf.index):
|
|
# prompt the user to ask for with row they want to move to unverified.csv
|
|
index = int(input("What is the index of the entry that you want to move to unverified.csv ? (ex: 3) "))
|
|
# once selected, it must be able to SAVE and print that row:
|
|
print(vdf.iloc[index].values)
|
|
newrow=vdf.iloc[index].values
|
|
|
|
|
|
# append it into unverified.csv
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sorting by index
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print("[+] New row added to unverified.csv!")
|
|
|
|
|
|
# remove it from verified.csv
|
|
vdf.drop(index, inplace= True)
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print("[+] Link is now moved to unverified.csv!")
|
|
|
|
|
|
####### MANAGING WEBRING PARTICIPANTS ###########
|
|
# 4) Synchronize new links from webring participants, into your unverified.csv file
|
|
# 5) Add a new webring participant (and download their files into their directory (without trusting them yet!))
|
|
# 6) Trust/UnTrust/Blacklist a webring participant
|
|
#####################################################
|
|
|
|
|
|
|
|
|
|
|
|
#check if it works when you have a second webring participant
|
|
case "4":
|
|
print("4) Synchronize new links from existing webring participants, into your unverified.csv file")
|
|
# iterate through each existing directories in www/participants/* to get each webring participant
|
|
participantsdir=rootpath+'www/participants/'
|
|
#print(os.listdir(participantsdir))
|
|
name=''
|
|
desc=''
|
|
trusted=''
|
|
status=''
|
|
score=''
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile)
|
|
for participant in os.listdir(participantsdir):
|
|
participantdir=participantsdir+participant
|
|
#print(participant)
|
|
|
|
# NOTE check if the webring participant is yourself, if it is, then skip it
|
|
if participant != myinstance: # prod: dont use your own intance
|
|
#if participant == myinstance: # preprod testing only on your own instance
|
|
#overwrite the existing files in the participant's directory, with their version (download all the csv files from them again)
|
|
basewurl='http://'+participant+'/participants/'+participant+'/'
|
|
print(basewurl)
|
|
print('[+] Downloading the files of ',participant, ": ")
|
|
w_vcsv=basewurl+'verified.csv'
|
|
w_uvcsv=basewurl+'unverified.csv'
|
|
#print(CheckUrl(w_uvcsv))
|
|
w_blcsv=basewurl+'blacklist.csv'
|
|
#print(CheckUrl(w_blcsv))
|
|
w_scsv=basewurl+'sensitive.csv'
|
|
#print(CheckUrl(w_scsv))
|
|
w_webcsv=basewurl+'webring-participants.csv'
|
|
#print(CheckUrl(w_webcsv))
|
|
|
|
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
|
|
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
|
|
print("[-] Webring Participant isn't reachable, skipping")
|
|
#return False #dont do anything if the webring participant isnt reachable.
|
|
else: #if the webring participant is reachable, proceed
|
|
print("[+] Webring Participant is reachable, updating their csv files:")
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
|
|
# FOR EACH CSV FILE TO GET:
|
|
# URL: basewurl / FILE.CSV
|
|
# PATH: participantdir / FILE.CSV
|
|
#print('[+] DOWNLOADING ',basewurl+i)
|
|
# download the external csv file and save it into the "text" variable:
|
|
#response = urllib.request.urlopen(basewurl+i)
|
|
response = requests.get(basewurl+i, proxies=proxies)
|
|
#data = response.read() # a `bytes` object
|
|
#text = data.decode('utf-8')
|
|
text = response.text
|
|
# save the text variable into the destination file:
|
|
#print('[+] SAVING IT INTO ',participantdir+'/'+i)
|
|
csvfilepath=participantdir+'/'+i
|
|
with open(csvfilepath, "w") as file:
|
|
file.write(text)
|
|
#print("[+] file written, let's read it")
|
|
f = open(csvfilepath,"r")
|
|
#print(f.read())
|
|
|
|
# download the banner.png image:
|
|
|
|
bannerurl=basewurl+'banner.png'
|
|
bannerpath=participantdir+'/banner.png'
|
|
r = requests.get(bannerurl, stream=True, proxies=proxies)
|
|
with open(bannerpath, 'wb') as f:
|
|
r.raw.decode_content = True
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
|
if IsBannerValid(bannerpath):
|
|
#print('[+] Banner is valid')
|
|
pass
|
|
else:
|
|
# if false, overwrite it with the template banner png file
|
|
#print('[-] Banner is not valid, replacing it with the default banner')
|
|
os.remove(bannerpath)
|
|
# copy templates/banner.png to bannerpath
|
|
bannertemplatepath=templatepath+'banner.png'
|
|
shutil.copyfile(bannertemplatepath, bannerpath)
|
|
|
|
|
|
#print("[+] Webring Participant is valid, adding it if it's not already added.")
|
|
#print('[+] PARTICIPANT=',participant)
|
|
# check if the participant is already listed in webring-participants.csv or not, and add them if not already listed
|
|
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
filter_wdf = wdf[wdf.URL.str.contains(participant)]
|
|
#print(filter_wdf[['Name','URL']])
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed
|
|
newrow=[name,participant,desc,trusted,status,score]
|
|
#print("[+] NEWROW=",newrow)
|
|
wdf.loc[-1] = newrow # adding a row
|
|
wdf.index = wdf.index + 1 # shifting index
|
|
wdf = wdf.sort_index() # sorting by index
|
|
#print("[+] New row added! now writing the csv file:",webringcsvfile)
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
else:
|
|
pass
|
|
#print('[+] Webring participant is already listed in your own webring-participants.csv file!')
|
|
|
|
# iterate through the participant's verified.csv and unverified.csv files
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
print(csvfilepath)
|
|
csvdf = pd.read_csv(csvfilepath)
|
|
#print(bldf[['blacklisted-words']])
|
|
bldf[['blacklisted-words']].iterrows()
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
#print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
|
|
#print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
|
|
#row=uvdf.iloc[[i]] #it displays the index
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print(row)
|
|
#print(i,row)
|
|
|
|
|
|
|
|
################################ SANITY CHECKS ####################################
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
#print("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score']))
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
#mark the row for deletion as it has invalid inputs
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it has invalid inputs")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
else:
|
|
# not a blacklisted link, therefore it is suitable to be added to your own csv files:
|
|
################################ CHECKING FOR DUPLICATES! #########################
|
|
# for each link in the participant's verified/unverified csv files,
|
|
# check if the link is already listed in your own verified.csv or unverified.csv
|
|
filterterm=csvdf.at[i, 'URL']
|
|
filter_vdf= vdf[vdf.URL.str.contains(filterterm)]
|
|
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm)]
|
|
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
|
|
#if link doesnt exist in either of your verified/unverified csv files,
|
|
# then add it to your own unverified.csv file:
|
|
newrow=row
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sorting by index
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print("[+] New row added to your own unverified.csv file!")
|
|
else:
|
|
print('[-] Skipping row as it is already added in ',w,row,)
|
|
|
|
|
|
|
|
###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE###################
|
|
|
|
|
|
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
|
|
for k,l in sedf.iterrows():
|
|
#print("[+] Sensitive word=",k, sedf.at[k, 'sensitive-words'])
|
|
seword=sedf.at[k, 'sensitive-words']
|
|
if any(seword in str(x) for x in row) == True:
|
|
if csvdf.at[i, 'Sensitive'] != '✔️':
|
|
print("Marking row", i,"as sensitive, as it matches with a sensitive word")
|
|
csvdf.at[i, 'Sensitive']='✔️'
|
|
|
|
print('[-] Rows to delete: ',rows2delete)
|
|
# only delete rows after you've gone through all the unverified.csv OR verified.csv rows'
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print('[+] REMOVING ROW :',i,row)
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
rows2delete= [] # it is an empty list at first
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case "5":
|
|
print("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
|
webring_participant_url = ''
|
|
while(IsOnionValid(webring_participant_url) is not True):
|
|
# ask for the url to the other webring participant and check if the (onion only) url is valid or not:
|
|
webring_participant_url = input("What is the onion domain of the new webring participant? (ex: uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion) ")
|
|
# check if the directory exists locally or not,
|
|
participantdir=rootpath+'www/participants/'+webring_participant_url
|
|
if not os.path.isdir(participantdir): # to test on your own instance
|
|
#if os.path.isdir(participantdir):
|
|
# if it does, it means that the webring is ALREADY added
|
|
print("[-] Webring Participant is already listed, skipping.")
|
|
return False
|
|
else:
|
|
# if not, then proceed:
|
|
# print the URL to the csv files at http://URL.onion/participants/URL.onion/{verified.csv,unverified.csv,sensitive.csv,blacklist.csv,webring-participants.csv}
|
|
basewurl='http://'+webring_participant_url+'/participants/'+webring_participant_url+'/'
|
|
print(basewurl)
|
|
print('[+] Checking if all of the required csv files exists for new webring participant ',webring_participant_url, ": ")
|
|
w_vcsv=basewurl+'verified.csv'
|
|
w_uvcsv=basewurl+'unverified.csv'
|
|
#print(CheckUrl(w_uvcsv))
|
|
w_blcsv=basewurl+'blacklist.csv'
|
|
#print(CheckUrl(w_blcsv))
|
|
w_scsv=basewurl+'sensitive.csv'
|
|
#print(CheckUrl(w_scsv))
|
|
w_webcsv=basewurl+'webring-participants.csv'
|
|
#print(CheckUrl(w_webcsv))
|
|
|
|
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
|
|
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
|
|
print("[-] Webring Participant is invalid, exiting.")
|
|
return False
|
|
else:
|
|
print("[+] Webring Participant is valid, adding it.")
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Webring instance name ? ")
|
|
desc='DEFAULT'
|
|
while(IsDescriptionValid(desc) is not True):
|
|
desc=input("Description for the webring participant ? (Optional)")
|
|
# if OK then add it to your own webring-participants.csv
|
|
trusted=''
|
|
status=''
|
|
score=''
|
|
newrow=[name,webring_participant_url,desc,trusted,status,score]
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile)
|
|
#print("[+] NEWROW=",newrow)
|
|
wdf.loc[-1] = newrow # adding a row
|
|
wdf.index = wdf.index + 1 # shifting index
|
|
wdf = wdf.sort_index() # sorting by index
|
|
print("[+] New row added! now writing the csv file:",webringcsvfile)
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
|
|
# create the directory in www/participants/PARTICIPANTURL/ if it's not there already
|
|
if not os.path.exists(participantdir):
|
|
os.makedirs(participantdir)
|
|
# then download their csv files at http://URL.onion/participants/URL.onion/{verified.csv,unverified.csv,sensitive.csv,blacklist.csv,webring-participants.csv}
|
|
# then save the csv file contents into a variable, then write it where it belongs:
|
|
# for loop with each csv file you want:
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
|
|
# FOR EACH CSV FILE TO GET:
|
|
# URL: basewurl / FILE.CSV
|
|
# PATH: participantdir / FILE.CSV
|
|
print('[+] DOWNLOADING ',basewurl+i)
|
|
# download the external csv file and save it into the "text" variable:
|
|
#response = urllib.request.urlopen(basewurl+i)
|
|
response = requests.get(basewurl+i, proxies=proxies)
|
|
#data = response.read() # a `bytes` object
|
|
#text = data.decode('utf-8')
|
|
text = response.text
|
|
# save the text variable into the destination file:
|
|
print('[+] SAVING IT INTO ',participantdir+'/'+i)
|
|
csvfilepath=participantdir+'/'+i
|
|
with open(csvfilepath, "w") as file:
|
|
file.write(text)
|
|
print("[+] file written, let's read it")
|
|
f = open(csvfilepath,"r")
|
|
print(f.read())
|
|
|
|
# download the banner.png image:
|
|
|
|
bannerurl=basewurl+'banner.png'
|
|
bannerpath=participantdir+'/banner.png'
|
|
r = requests.get(bannerurl, stream=True, proxies=proxies)
|
|
with open(bannerpath, 'wb') as f:
|
|
r.raw.decode_content = True
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
|
if IsBannerValid(bannerpath):
|
|
print('[+] Banner is valid')
|
|
else:
|
|
# if false, overwrite it with the template banner png file
|
|
print('[-] Banner is not valid, replacing it with the default banner')
|
|
os.remove(bannerpath)
|
|
# copy templates/banner.png to bannerpath
|
|
bannertemplatepath=templatepath+'banner.png'
|
|
shutil.copyfile(bannertemplatepath, bannerpath)
|
|
|
|
########### PERFORM SANITY CHECKS ON the webring participant's verified.csv and unverified.csv ##################
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
csvdf = pd.read_csv(csvfilepath)
|
|
|
|
#print(bldf[['blacklisted-words']])
|
|
bldf[['blacklisted-words']].iterrows()
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
#print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
|
|
#print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
|
|
#row=uvdf.iloc[[i]] #it displays the index
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
#print(i,row)
|
|
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
#print("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score']))
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
#mark the row for deletion as it has invalid inputs
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it has invalid inputs")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
|
|
for k,l in sedf.iterrows():
|
|
#print("[+] Sensitive word=",k, sedf.at[k, 'sensitive-words'])
|
|
seword=sedf.at[k, 'sensitive-words']
|
|
if any(seword in str(x) for x in row) == True:
|
|
if csvdf.at[i, 'Sensitive'] != '✔️':
|
|
print("Marking row", i,"as sensitive, as it matches with a sensitive word")
|
|
csvdf.at[i, 'Sensitive']='✔️'
|
|
|
|
print('[-] Rows to delete: ',rows2delete)
|
|
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print('[+] REMOVING ROW :',i,row)
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
##############################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case "6":
|
|
print("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile)
|
|
# list each webring participant in your webring-participants.csv file
|
|
print(wdf[["URL","Trusted"]])
|
|
# ask the user to pick an index
|
|
index=""
|
|
while (index not in wdf.index):
|
|
# prompt the user to ask for with row they want to move to verified.csv
|
|
index = int(input("What is the index of the webring participant that you want to edit ? (ex: 3) "))
|
|
# once a valid index is picked, ask if the user wants to 1) trust the webring participant, or 2) untrust them, or 3) black list them
|
|
choice=""
|
|
while (choice not in ["1","2","3"]):
|
|
choice = input("Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist the webring participant ?")
|
|
if choice == "1":
|
|
# trust the webring participant
|
|
# ask the user if they want to proceed, as this is potentially risky if the webring participant tries to list malicious links in the future
|
|
choice2=input("You're about to trust another peer, this means that you're going to automatically trust all of the links they have in their verified.csv file! If this is a malicious peer, you're about to potentially going to automatically trust malicious links, it is potentially risky! Do you want to continue ? (y/n)")
|
|
if choice2 == "y":
|
|
# if user wants to proceed, mark the "trusted" column as V
|
|
print("[+] Trusting webring participant", wdf.at[index,"URL"])
|
|
wdf.at[index,"Trusted"]='✔️'
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
|
|
else:
|
|
print("[-] not trusting webring participant, skipping.")
|
|
if choice == "2":
|
|
print("[+] UnTrusting webring participant", wdf.at[index,"URL"])
|
|
wdf.at[index,"Trusted"]=''
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
# untrust the webring participant
|
|
# if 2: mark the "trusted" column as empty
|
|
if choice == "3":
|
|
print("[+] Blacklisting webring participant", wdf.at[index,"URL"])
|
|
# blacklist the webring participant
|
|
# add it's URL to your own blacklist.csv
|
|
instance2blacklist=wdf.at[index,"URL"]
|
|
newrow=[instance2blacklist]
|
|
print("[+] NEWROW=",newrow)
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
bldf.loc[-1] = newrow # adding a row
|
|
bldf.index = bldf.index + 1 # shifting index
|
|
bldf = bldf.sort_index() # sorting by index
|
|
print("[+] New row added! now writing the csv file:")
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
|
|
|
|
# remove all of the entries that came from that participant (drop the lines in your own verified+unverified.csv that have that instance in the instance column)
|
|
|
|
#vdf
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in vdf.iterrows():
|
|
row=vdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=vdf.loc[i,:].values.tolist()
|
|
print('[+] REMOVING ROW :',i,row)
|
|
vdf.drop(i, inplace= True)
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print(vdf)
|
|
rows2delete= [] # it is an empty list at first
|
|
#uvdf
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in uvdf.iterrows():
|
|
row=uvdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=uvdf.loc[i,:].values.tolist()
|
|
print('[+] REMOVING ROW :',i,row)
|
|
uvdf.drop(i, inplace= True)
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print(uvdf)
|
|
rows2delete= [] # it is an empty list at first
|
|
|
|
# find all rows that match with the instance name in wdf aswell to remove them
|
|
for i,j in wdf.iterrows():
|
|
row=wdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=wdf.loc[i,:].values.tolist()
|
|
print('[+] REMOVING ROW :',i,row)
|
|
wdf.drop(i, inplace= True)
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
print(wdf)
|
|
rows2delete= [] # it is an empty list at first
|
|
|
|
|
|
|
|
# remove the entire directory in www/participants/INSTANCENAME aswell to get rid of it
|
|
instance2blacklistpath=rootpath+'www/participants/'+instance2blacklist
|
|
print("[+] removing the participant's directory at ",instance2blacklistpath)
|
|
shutil.rmtree(instance2blacklistpath)
|
|
|
|
|
|
|
|
|
|
|
|
################### MANAGING WORDLISTS #################
|
|
#Managing Wordlists:
|
|
# 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
|
|
# 8) Add/Remove words or links in the blacklist (ex: porn)
|
|
|
|
#Maintenance:
|
|
# 9) remove the duplicate URLs for your own instance
|
|
# 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
|
|
#########################################################
|
|
|
|
case "7":
|
|
print("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)")
|
|
#secsvfile=instancepath+'/sensitive.csv' #fyi
|
|
#sedf = pd.read_csv(secsvfile) #fyi
|
|
option="0"
|
|
|
|
done = False
|
|
while(done == False):
|
|
while option != "1" and option != "2" and option != "-1":
|
|
option=input("[+] do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
|
|
if option == "1":
|
|
word=input("[+] which Sensitive word do you want to add? (write -1 to exit) ")
|
|
if word == "-1":
|
|
done = True
|
|
#True to get out of the while loop
|
|
else:
|
|
print("[+] checking if the Word/URL is valid: ")
|
|
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
|
print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
|
|
print('[+] Word/URL is valid, adding the word into the sensitive wordlist ')
|
|
# add it to the sensitive wordlist
|
|
newrow=[word]
|
|
print("[+] NEWROW=",newrow)
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
sedf.loc[-1] = newrow # adding a row
|
|
sedf.index = sedf.index + 1 # shifting index
|
|
sedf = sedf.sort_index() # sorting by index
|
|
print("[+] New row added! now writing the csv file: ")
|
|
sedf.to_csv(secsvfile, index=False)
|
|
|
|
|
|
if option == "2":
|
|
print(sedf)
|
|
index=""
|
|
while (index not in sedf.index) and index != -1:
|
|
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
|
|
if index == -1:
|
|
done = True
|
|
#True to get out of the while loop
|
|
else:
|
|
if (index in sedf.index):
|
|
#if index exists, remove it
|
|
print("[+] removing selected index: ")
|
|
sedf.drop(index, inplace= True)
|
|
sedf.to_csv(secsvfile, index=False)
|
|
|
|
else:
|
|
print('[-] Error, invalid index')
|
|
|
|
|
|
else:
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
case "8":
|
|
print("[+] Add/Remove words in the blacklist list (ex: porn)")
|
|
#blcsvfile=instancepath+'/sensitive.csv' #fyi
|
|
#bldf = pd.read_csv(secsvfile) #fyi
|
|
option="0"
|
|
|
|
done = False
|
|
while(done == False):
|
|
while option != "1" and option != "2" and option != "-1":
|
|
option=input("[+] Do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
|
|
if option == "1":
|
|
word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ")
|
|
if word == "-1":
|
|
done = True
|
|
#True to get out of the while loop
|
|
else:
|
|
print("[+] Checking if the Word/URL is valid: ")
|
|
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
|
print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
|
|
print('[+] Word/URL is valid, adding the word into the blacklist ')
|
|
# add it to the sensitive wordlist
|
|
newrow=[word]
|
|
print("[+] NEWROW=",newrow)
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
bldf.loc[-1] = newrow # adding a row
|
|
bldf.index = bldf.index + 1 # shifting index
|
|
bldf = bldf.sort_index() # sorting by index
|
|
print("[+] New row added! now writing the csv file: ")
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
|
|
|
|
if option == "2":
|
|
print(bldf)
|
|
index=""
|
|
while (index not in bldf.index) and index != -1:
|
|
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
|
|
if index == -1:
|
|
done = True
|
|
#True to get out of the while loop
|
|
else:
|
|
if (index in bldf.index):
|
|
#if index exists, remove it
|
|
print("[+] removing selected index: ")
|
|
bldf.drop(index, inplace= True)
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
|
|
else:
|
|
print('[-] Error, invalid index')
|
|
|
|
|
|
else:
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file)
|
|
case "9":
|
|
print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
|
|
# ignore it if the index is "indextocheck" and if the index is already listed in rows2delete
|
|
# else: add the index to "rows2delete"
|
|
# go drop the rows by their index listed in "rows2delete"
|
|
################################ CHECKING FOR DUPLICATES! #########################
|
|
# for unverified.csv, and verified.csv
|
|
for w in ['verified.csv','unverified.csv']:
|
|
#instancepath=rootpath+'www/participants/'+instance # fyi
|
|
csvfilepath=instancepath+'/'+w
|
|
|
|
print(csvfilepath)
|
|
csvdf = pd.read_csv(csvfilepath)
|
|
print("REMOVING DUPLICATES IN", csvfilepath)
|
|
csvdf = csvdf.drop_duplicates(subset=['URL'])
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
print(csvdf[['URL']])
|
|
case "10":
|
|
print("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
|
|
participantspath = rootpath+'www/participants/'
|
|
for participant in os.listdir(participantspath):
|
|
print("Participant:",participant)
|
|
participantdir= participantspath+participant
|
|
a=0
|
|
if a == 0:
|
|
if a== 0:
|
|
################ BEGIN SANITY CHECKS FOR EACH PARTICIPANTS ##############
|
|
# iterate through the participant's verified.csv and unverified.csv files
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
print(csvfilepath)
|
|
csvdf = pd.read_csv(csvfilepath)
|
|
#print(bldf[['blacklisted-words']])
|
|
bldf[['blacklisted-words']].iterrows()
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
#print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
|
|
#print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
|
|
#row=uvdf.iloc[[i]] #it displays the index
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print(row)
|
|
#print(i,row)
|
|
|
|
|
|
|
|
################################ SANITY CHECKS ####################################
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
#print("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score']))
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
#mark the row for deletion as it has invalid inputs
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it has invalid inputs")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
#print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print('[+] REMOVING ROW :',i,row)
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
|
|
|
|
# TODO find the list of all csv files (in www/participants/*/*.csv) (templates should remain empty by default)
|
|
# copy what was done in option 4, to :
|
|
# delete the ones that have invalid entries
|
|
# mark the sensitive rows as sensitive
|
|
# delete the rows that match with blacklisted words
|
|
case _:
|
|
print("[-] Exiting")
|
|
return True
|
|
|
|
|
|
#### Checking Functions to validate that links are legit ####
|
|
|
|
def CheckUrl(url):
|
|
"""
|
|
Checks if URL is actually reachable via Tor
|
|
"""
|
|
proxies = {
|
|
'http': 'socks5h://127.0.0.1:9050',
|
|
'https': 'socks5h://127.0.0.1:9050'
|
|
}
|
|
try:
|
|
status = requests.get(url,proxies=proxies, timeout=5).status_code
|
|
#print('[+]',url,status)
|
|
if status != 502:
|
|
#print(url,"✔️")
|
|
return True
|
|
else:
|
|
#print(url,"❌")
|
|
return False
|
|
except requests.ConnectionError as e:
|
|
#print(url,"❌")
|
|
return False
|
|
except requests.exceptions.ReadTimeout as e:
|
|
#print(url,"❌")
|
|
return False
|
|
|
|
#### PROTECTIONS AGAINST MALICIOUS CSV INPUTS ####
|
|
|
|
def IsBannerValid(path: str) -> bool:
|
|
"""
|
|
Checks if the banner.png file has the correct dimensions (240x60)
|
|
"""
|
|
#print('[+] checking image size')
|
|
try:
|
|
im = Image.open(path)
|
|
except Exception as e:
|
|
return False
|
|
#im = Image.open("favicon.png")
|
|
width, height = im.size
|
|
#print('width =',width, 'height=',height)
|
|
if width != 240 or height != 60:
|
|
#print('[-] Banner doesnt have the correct size (240x60)')
|
|
return False
|
|
else:
|
|
#print('[+] Banner has the correct size (240x60)')
|
|
return True
|
|
|
|
|
|
def IsOnionValid(url: str)-> bool:
|
|
"""
|
|
Checks if the domain(param) is a valid onion domain and return True else False.
|
|
"""
|
|
# check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
|
|
# check that it is only url.onion or subdomain.url.onion,
|
|
# if OK return True
|
|
#if not : return False
|
|
try:
|
|
pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
|
|
url = url.strip().removesuffix('/')
|
|
if url.startswith('http://'):
|
|
#print('URL starts with http')
|
|
# Removes the http://
|
|
domain = url.split('/')[2]
|
|
if pattern.fullmatch(domain) is not None:
|
|
if len(domain.split('.')) > 3:
|
|
n_subdomians = len(domain.split('.'))
|
|
# Checks if there is more than 1 subdomain. "subdomain.url.onion" only
|
|
#print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
|
|
return False
|
|
else:
|
|
if len(domain) < 62:
|
|
#print("Domain length is less than 62.")
|
|
return False
|
|
return True
|
|
elif pattern.fullmatch(domain) is None:
|
|
#print("Domain contains invalid character.")
|
|
#print(domain)
|
|
return False
|
|
else:
|
|
#print("Domain not valid")
|
|
return False
|
|
else:
|
|
#TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
|
|
#print("URL doesn't start http")
|
|
if pattern.fullmatch(url) is not None:
|
|
if len(url.split('.')) > 3:
|
|
n_subdomians = len(url.split('.'))
|
|
# Checks if there is more than 1 subdomain. "subdomain.url.onion" only
|
|
#print(f"This domain have more than one subdomain. There are {n_subdomians - 1} subdomains")
|
|
return False
|
|
else:
|
|
if len(url) < 62:
|
|
#print("Domain length is less than 62.")
|
|
return False
|
|
return True
|
|
elif pattern.fullmatch(url) is None:
|
|
#print("Domain contains invalid character.")
|
|
#print(url)
|
|
return False
|
|
else:
|
|
#print("Domain not valid")
|
|
return False
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
def IsUrlValid(url:str)->bool:
|
|
"""
|
|
Check if url is valid both dark net end clearnet.
|
|
"""
|
|
# check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
|
|
# check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
|
|
# if OK return True
|
|
#if not : return False
|
|
pattern = re.compile("^[A-Za-z0-9:/.-]+$")
|
|
url = str(url)
|
|
if len(url) < 4:
|
|
#print("Status: Got more than one character or nothing.")
|
|
return False
|
|
if url.endswith('.onion'):
|
|
return IsOnionValid(url)
|
|
else:
|
|
if not url.__contains__('.'):
|
|
#print("No (DOT) in clearnet url")
|
|
return False
|
|
if pattern.fullmatch(url) is None:
|
|
#print('Url contains invalid chars')
|
|
return False
|
|
return True
|
|
|
|
def IsStatusValid(status: str)-> bool:
|
|
"""
|
|
Checks if status contains only [v,x,❌,✔️]. Verbose only if False is returned
|
|
"""
|
|
pattern = ['y','n','✔️','❌','','nan']
|
|
status = str(status)
|
|
status.strip()
|
|
#print('[+] STATUS = ',status.splitlines())
|
|
if len(status) > 4:
|
|
#print("Status: Got more than one character or nothing.")
|
|
return False
|
|
elif (status not in pattern):
|
|
#print("Status: Got an invalid character it must be either y, n, ✔️, or ❌ ")
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def IsScoreValid(score:str)->bool:
|
|
"""
|
|
Check the Score is only "^[0-9.,]+$" with 8 max chars.
|
|
"""
|
|
# check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max
|
|
#(careful with the ' and , make sure you test if it fucks the csv up or else)
|
|
# if OK return True
|
|
#if not : return False
|
|
pattern = re.compile("^[0-9.,]+$")
|
|
score = str(score)
|
|
score.strip()
|
|
#pattern = ['','nan']
|
|
if score in ['','nan']:
|
|
#Score can be empty when initially added
|
|
return True
|
|
if pattern.fullmatch(score) is None:
|
|
# empty description is fine as it's optional
|
|
return False
|
|
elif len(score) > 8:
|
|
#print("score is greater than 8 chars")
|
|
return False
|
|
# empty score is fine
|
|
return True
|
|
|
|
|
|
def IsDescriptionValid(desc:str)->bool:
|
|
"""
|
|
Check the categories are only [a-zA-Z0-9.' ] with 256 max chars.
|
|
"""
|
|
# check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max
|
|
#(careful with the ' and , make sure you test if it fucks the csv up or else)
|
|
# if OK return True
|
|
#if not : return False
|
|
if desc == "":
|
|
# empty description is fine as it's optional
|
|
return True
|
|
pattern = re.compile("^[A-Za-z0-9-.' ]+$")
|
|
desc = str(desc)
|
|
desc.strip()
|
|
if pattern.fullmatch(desc) is None:
|
|
return False
|
|
if desc == "DEFAULT":
|
|
return False
|
|
elif len(desc) > 256:
|
|
#print("desc is greater than 256 chars")
|
|
return False
|
|
return True
|
|
|
|
def IsCategoryValid(categories: list)-> bool:
|
|
"""
|
|
Check the categories are only [a-zA-Z0-9 ] with 64 max chars.
|
|
"""
|
|
# check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max
|
|
#(careful with the ' and , make sure you test if it fucks the csv up or else)
|
|
# if OK return True
|
|
#if not : return False
|
|
pattern = re.compile("^[A-Za-z0-9 ]+$")
|
|
for category in categories:
|
|
category.strip()
|
|
if pattern.fullmatch(category) is None:
|
|
#print('Got an empty list or invalid chars')
|
|
return False
|
|
elif len(category) > 64:
|
|
#print('Category is too long')
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
def IsNameValid(name: str)->bool:
|
|
"""
|
|
Check the parameter name only contains [a-zA-Z0-9 ] and is 64 chars long.
|
|
"""
|
|
# check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max
|
|
#(careful with the ' and , make sure you test if it fucks the csv up or else)
|
|
# if OK return True
|
|
#if not : return False
|
|
pattern = re.compile("^[A-Za-z0-9 ]+$")
|
|
name = name.strip()
|
|
if (pattern.fullmatch(name) is None):
|
|
#print("Got an invalid character or nothing")
|
|
return False
|
|
elif len(name) > 64:
|
|
#print(f'Got a name length greater than 64. {len(name)}')
|
|
return False
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|