mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-16 04:06:59 +00:00
1461 lines
92 KiB
Python
1461 lines
92 KiB
Python
from utils import *
|
|
from dotenv import load_dotenv
|
|
|
|
import os, pwd
|
|
import pandas as pd
|
|
import requests
|
|
import shutil
|
|
import time
|
|
import urllib
|
|
import sys
|
|
|
|
|
|
script_abs_path = os.path.dirname(os.path.abspath(__file__))
|
|
env_path = os.path.join(script_abs_path+"/.env")
|
|
default_env_path = os.path.join(script_abs_path+"/.env.sample")
|
|
|
|
if os.path.exists(env_path):
|
|
load_dotenv(dotenv_path=env_path)
|
|
else:
|
|
load_dotenv(dotenv_path=default_env_path)
|
|
|
|
tor_host = os.getenv("TOR_HOST")
|
|
tor_port = os.getenv("TOR_PORT")
|
|
|
|
def main():
|
|
#os.system('clear')
|
|
proxies = {
|
|
'http': f'{tor_host}:{tor_port}',
|
|
'https': f'{tor_host}:{tor_port}'
|
|
}
|
|
|
|
rootpath='/srv/darknet-lantern/'
|
|
urlpath=pwd.getpwuid(os.getuid()).pw_dir+"/.darknet_participant_url"
|
|
participantsdir=rootpath+'www/participants/'
|
|
officialparticipants=rootpath+'www/.official_participants'
|
|
|
|
# check if /srv/darknet-lantern/www/participants directory exists,
|
|
if not os.path.isdir(participantsdir):
|
|
print("participants directory doesnt exist, creating it")
|
|
os.makedirs(participantsdir)
|
|
# iterate over /srv/darknet-lantern/www/.official_participants, list each line
|
|
with open(officialparticipants, 'r') as file:
|
|
# for each line (which is a participant):
|
|
for line in file:
|
|
participantdir=participantsdir+line.strip()
|
|
# check if the directory exists
|
|
if not os.path.isdir(participantdir):
|
|
#if not, create it
|
|
print("Official participant ",line.strip() , "'s directory doesnt exist, creating it")
|
|
os.makedirs(participantdir)
|
|
|
|
|
|
|
|
|
|
|
|
print_colors("""
|
|
;
|
|
ED.
|
|
E#Wi G: L. ,;
|
|
E###G. j. E#, :EW: ,ft f#i
|
|
E#fD#W; .. EW, E#t .GEE##; t#E .E#t GEEEEEEEL
|
|
E#t t##L ;W, E##j E#t j#K;E###t t#E i#W, ,;;L#K;;.
|
|
E#t .E#K, j##, E###D. E#GK#f E#fE#f t#E L#D. t#E
|
|
E#t j##f G###, E#jG#W; E##D. E#t D#G t#E :K#Wfff; t#E
|
|
E#t :E#K: :E####, E#t t##f E##Wi E#t f#E. t#E i##WLLLLt t#E
|
|
E#t t##L ;W#DG##, E#t :K#E: E#jL#D: E#t t#K: t#E .E#L t#E
|
|
E#t .D#W; j###DW##, E#KDDDD###iE#t ,K#jE#t ;#W,t#E f#E: t#E
|
|
E#tiW#G. G##i,,G##, E#f,t#Wi,,,E#t jDE#t :K#D#E ,WW; t#E
|
|
E#K##i :K#K: L##, E#t ;#W: j#t E#t .E##E .D#; t#E
|
|
E##D. ;##D. L##, DWi ,KK: ,; .. G#E tt fE
|
|
E#t ,,, .,, fE :
|
|
L: ,
|
|
|
|
|
|
L. ,; L.
|
|
i EW: ,ft f#i j. EW: ,ft
|
|
LE .. E##; t#E GEEEEEEEL .E#t EW, E##; t#E
|
|
L#E ;W, E###t t#E ,;;L#K;;. i#W, E##j E###t t#E
|
|
G#W. j##, E#fE#f t#E t#E L#D. E###D. E#fE#f t#E
|
|
D#K. G###, E#t D#G t#E t#E :K#Wfff; E#jG#W; E#t D#G t#E
|
|
E#K. :E####, E#t f#E. t#E t#E i##WLLLLt E#t t##f E#t f#E. t#E
|
|
.E#E. ;W#DG##, E#t t#K: t#E t#E .E#L E#t :K#E: E#t t#K: t#E
|
|
.K#E j###DW##, E#t ;#W,t#E t#E f#E: E#KDDDD###iE#t ;#W,t#E
|
|
.K#D G##i,,G##, E#t :K#D#E t#E ,WW; E#f,t#Wi,,,E#t :K#D#E
|
|
.W#G :K#K: L##, E#t .E##E t#E .D#; E#t ;#W: E#t .E##E
|
|
:W##########Wt ;##D. L##, .. G#E fE tt DWi ,KK: .. G#E
|
|
:,,,,,,,,,,,,,.,,, .,, fE : fE
|
|
, ,
|
|
version: 1.0.2
|
|
""", bold=True)
|
|
|
|
|
|
while True:
|
|
if os.path.isfile(urlpath):
|
|
with open(urlpath) as f:
|
|
instance = f.read().rstrip()
|
|
if IsOnionValid(instance):
|
|
print_colors(f"[+] Instance Name: {instance}. Valid:{IsOnionValid(instance)}")
|
|
break
|
|
else:
|
|
print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}',is_error=True )
|
|
break
|
|
else:
|
|
print_colors("[+] Instance Path doesn't exist yet")
|
|
print_colors(f"Your url will be saved here {urlpath}")
|
|
instance = input("What is your Instance domain?(ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion): ")
|
|
if IsOnionValid(instance):
|
|
print_colors(f"[+] Instance Name: {instance}. Valid: {IsUrlValid(instance)}")
|
|
instancepath=rootpath+'www/participants/'+instance
|
|
else:
|
|
print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}', is_error=True )
|
|
break
|
|
|
|
isitvalid=input("Is this your this your instance domain?(y/n) ")
|
|
if isitvalid == "y" :
|
|
print_colors("OK writing the instance url to ~/.darknet_participants_url")
|
|
with open(urlpath, "w") as file:
|
|
file.write(instance)
|
|
print_colors("[+] File written")
|
|
f = open(urlpath,"r")
|
|
print_colors(f"{f.read()}")
|
|
print_colors("[+] Initial Setup Completed!")
|
|
myinstance = instance
|
|
|
|
instancepath=rootpath+'www/participants/'+instance
|
|
templatepath=rootpath+'templates/'
|
|
verifiedcsvfile=instancepath+'/verified.csv'
|
|
unverifiedcsvfile=instancepath+'/unverified.csv'
|
|
blcsvfile=instancepath+'/blacklist.csv'
|
|
secsvfile=instancepath+'/sensitive.csv'
|
|
webpcsvfile=instancepath+'/webring-participants.csv'
|
|
submission_file_abs_path = os.path.abspath('submissions/submission.csv')
|
|
crawled_file_abs_path = os.path.abspath('crawler/onion_crawler.csv')
|
|
|
|
if not os.path.exists(instancepath):
|
|
print_colors(f"{rootpath}",is_error=True, bold=True)
|
|
os.makedirs(instancepath)
|
|
# check if all the required csv files exist in it, otherwise copy them from the templates directory
|
|
# NOTE : the templates files are EMPTY by default, this is because each peer has to manually review lists of links, and add links themselves manually, this is to avoid allowing malicious links to slip through without intentional edits from the peer themselves.
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv','banner.png']:
|
|
filepath=instancepath+'/'+i
|
|
if not os.path.isfile(filepath):
|
|
# copy templates/ FILE.CSV to instancepath/ FILE.CSV
|
|
src=templatepath+i
|
|
shutil.copyfile(src, filepath)
|
|
# now that they exist, get vdf and uvdf and the rest
|
|
vdf = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
|
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
|
bldf = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
|
sedf = pd.read_csv(secsvfile, on_bad_lines='skip')
|
|
webpdf = pd.read_csv(webpcsvfile, on_bad_lines='skip')
|
|
print_colors(f"[+] file exists, your Webring URL is {instance}")
|
|
|
|
##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
|
|
if len(sys.argv) == 2 and sys.argv[1] == "4":
|
|
print("4) Synchronize new links from existing webring participants into your unverified.csv file")
|
|
option=4
|
|
elif len(sys.argv) == 2 and sys.argv[1] == "9":
|
|
print("remove duplicate urls from instance")
|
|
option=9
|
|
elif len(sys.argv) == 2 and sys.argv[1] == "10":
|
|
print("Perform sanity checks on all csv files for all instances")
|
|
option=10
|
|
else:
|
|
print_colors("""
|
|
[+] Welcome to your own Darknet Lantern Instance, where you can explore the Darknet and help others do the same.
|
|
|
|
Managing Websites:
|
|
1) Add a new Website entry (into unverified.csv)
|
|
2) Trust/Untrust/ Blacklist a Website entry (move an entry from unverified to verified.csv)
|
|
3) Edit link attributes
|
|
|
|
Managing Webring Participants:
|
|
4) Synchronize new links from existing webring participants, into your unverified.csv file
|
|
5) Add a new webring participant (and download their files into their directory (without trusting them yet!))
|
|
6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
|
|
|
|
Managing Wordlists:
|
|
7) Add/Remove Words/URLs in the sensitive list (ex: drug)
|
|
8) Add/Remove Words/URLs or links in the blacklist (ex: porn)
|
|
|
|
Maintenance:
|
|
9) Remove the duplicate URLs for your own instance
|
|
10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
|
|
11) Review submissions (Add to verified.csv /add to unverified.csv /delete /blacklist)
|
|
12) Review crawled websites (Add to verified.csv /add to unverified.csv /delete /blacklist)
|
|
|
|
0) Exit
|
|
""")
|
|
option = input("Select an option? (0-12): ").strip()
|
|
try:
|
|
option = int(option)
|
|
except ValueError:
|
|
print_colors(f"[-] Exiting. {option} is not a valid option.", bold=True, is_error=True)
|
|
return False
|
|
while True:
|
|
match option:
|
|
|
|
########## MANAGING WEBSITE ENTRIES #################
|
|
#Websites:
|
|
# 1) Add a new Website entry (into unverified.csv)
|
|
# 2) Trust a Website entry (move an entry from unverified to verified.csv)
|
|
# 3) Untrust a Website entry (move an entry from unverified to verified.csv)
|
|
#####################################################
|
|
case 1:
|
|
while True:
|
|
print_colors("\n[+] Add a new Website entry (into unverified.csv)")
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the name of the website? ")
|
|
category=''
|
|
while(IsCategoryValid(category) is not True):
|
|
category = input("What is the website Category? ")
|
|
# the url of the website (required) + check if its valid
|
|
url=''
|
|
while(IsUrlValid(url) is not True and IsSimpleXChatroomValid(url) is not True):
|
|
url=input("What is the website URL ? ")
|
|
|
|
# a quick description (optional) + check if its valid
|
|
desc='DEFAULT'
|
|
while(IsDescriptionValid(desc) is not True):
|
|
desc=input("Description for the website ? (if not empty = the link will be added to verified.csv directly ) ")
|
|
choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ")
|
|
if choice == "n":
|
|
sensi = 'NO'
|
|
else:
|
|
sensi = 'YES'
|
|
|
|
newrow=[instance,category,name,url,sensi,desc,'YES','100']
|
|
print_colors(f"[+] NEWROW= {newrow}")
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# delete existing entries in verified.csv
|
|
vdf_same_url_filter = vdf["URL"] == url # check for same url
|
|
vdf_same_url_filter_count = vdf_same_url_filter.sum() # total url matches
|
|
if vdf_same_url_filter_count > 0:
|
|
print(f"Found {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
|
|
for index, row in vdf[vdf_same_url_filter].iterrows():
|
|
print_colors(f"[+] ROW[{index}]= {list(row)}")
|
|
vdf = vdf[~vdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
|
|
print(f"Deleted {vdf_same_url_filter_count} row(s) with the same url in verified.csv")
|
|
if desc == '': # if the description is empty = it means that it goes in unverified.csv, so save modified verified.csv file now
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
# delete existing entries in unverified.csv
|
|
uvdf_same_url_filter = uvdf["URL"] == url # check for same url
|
|
uvdf_same_url_filter_count = uvdf_same_url_filter.sum() # total url matches
|
|
if uvdf_same_url_filter_count > 0:
|
|
print(f"Found {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
|
|
for index, row in uvdf[uvdf_same_url_filter].iterrows():
|
|
print_colors(f"[+] ROW[{index}]= {list(row)}")
|
|
uvdf = uvdf[~uvdf_same_url_filter].reset_index(drop=True) # keep only entries that do not match filter
|
|
print(f"Deleted {uvdf_same_url_filter_count} row(s) with the same url in unverified.csv")
|
|
if desc != '': # if the description isnt empty = it means that it goes in verified.csv, so save modified unverified.csv file now
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
if desc == '': # if the description is empty = it means that it goes in unverified.csv
|
|
print("Adding new row in unverified.csv since description is empty")
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sorting by index
|
|
uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
print_colors("[+] New row added! now writing the csv file")
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
else: # if the description isnt empty = it means that it goes in verified.csv
|
|
print("Adding new row in verified.csv since description is not empty")
|
|
vdf.loc[-1] = newrow # adding a row
|
|
vdf.index = vdf.index + 1 # shifting index
|
|
vdf = vdf.sort_index() # sorting by index
|
|
vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
print_colors("[+] New row added! now writing the csv file")
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
|
|
choice=input("\n[+] Want to add another website ? (y/n) ")
|
|
if choice == "n":
|
|
break
|
|
break
|
|
|
|
|
|
case 2:
|
|
print_colors("[+] Trust/Untrust/Blacklist a Website entry (move an entry from unverified to verified.csv)")
|
|
while True:
|
|
vdf = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
|
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
|
# search for a word
|
|
name=''
|
|
|
|
#ask the user if they want to 1) trust, 2) untrust 3) blacklist the selected website
|
|
choice = int(input("Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist an existing entry ?").strip())
|
|
while True:
|
|
match choice:
|
|
case 1:
|
|
# 1) Trust an existing website
|
|
print_colors(f"{uvdf[['Name','URL']]}")
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to Trust ? (ex: Nowhere)")
|
|
filter_uvdf = uvdf[uvdf.Name.str.contains(name,na=False)]
|
|
# display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
print_colors(f"{filter_uvdf[['Name','URL']]}")
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_uvdf.size == 0:
|
|
print_colors("ERROR no results, skipping.",is_error=True)
|
|
break
|
|
else:
|
|
# Each of the rows has an index,
|
|
index=-1
|
|
while (index not in filter_uvdf.index):
|
|
index = int(input("What is the index of the entry that you want to move to Trust ? (ex: 3) "))
|
|
# once selected, it must be able to SAVE and print_colors that row:
|
|
print_colors(f"{uvdf.iloc[index].values}")
|
|
|
|
# 1) Trust an existing website (move it from unverified.csv to verified.csv)
|
|
newrow=uvdf.iloc[index].values
|
|
newdesc=""
|
|
#while the description is empty, keep looping
|
|
while(newdesc == "" or (IsDescriptionValid(newdesc) is not True)):
|
|
newdesc=input("Description for the website ? (it cannot be empty, for the link to be added in verified.csv): ")
|
|
|
|
# append it into verified.csv
|
|
vdf.loc[-1] = newrow # adding a row
|
|
vdf.at[-1, 'Description']=newdesc
|
|
vdf.index = vdf.index + 1 # shifting index
|
|
vdf = vdf.sort_index() # sxorting by index
|
|
vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print_colors("[+] New row added to verified.csv! now writing to the csv")
|
|
|
|
# remove it from unverified.csv
|
|
uvdf.drop(index, inplace= True)
|
|
uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print_colors("[+] Link is now moved to verified.csv!")
|
|
break
|
|
case 2:
|
|
# 2) Untrust an existing website
|
|
print_colors(f"{vdf[['Name','URL']]}")
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to Untrust ? (ex: BreachForums)")
|
|
filter_vdf = vdf[vdf.Name.str.contains(name,na=False)]
|
|
# display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
print_colors(f"{filter_vdf[['Name','URL']]}")
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_vdf.size == 0:
|
|
print_colors("ERROR no results, skipping.",is_error=True)
|
|
else:
|
|
# Each of the rows has an index,
|
|
index=-1
|
|
while (index not in filter_vdf.index):
|
|
index = int(input("What is the index of the entry that you want to move to Untrust ? (ex: 3) "))
|
|
# once selected, it must be able to SAVE and print_colors that row:
|
|
print_colors(f"{uvdf.iloc[index].values}")
|
|
|
|
# 1) Untrust an existing website (move it from verified.csv to unverified.csv)
|
|
newrow=vdf.iloc[index].values
|
|
|
|
# append it into unverified.csv
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sxorting by index
|
|
uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print_colors("[+] New row added to unverified.csv! now writing to the csv")
|
|
|
|
# remove it from verified.csv
|
|
vdf.drop(index, inplace= True)
|
|
vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print_colors("[+] Link is now moved to unverified.csv!")
|
|
break
|
|
case 3:
|
|
# 3) Blacklist an existing website
|
|
print_colors(f"{vdf[['Name','URL']]}")
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to Blacklist ? (ex: BreachForums)")
|
|
filter_uvdf = uvdf[uvdf.Name.str.contains(name,na=False)]
|
|
filter_vdf = vdf[vdf.Name.str.contains(name,na=False)]
|
|
|
|
if filter_vdf.size == 0 and filter_uvdf.size == 0 :
|
|
print_colors("ERROR no results, skipping.",is_error=True)
|
|
else:
|
|
# Each of the rows has an index,
|
|
index=-1
|
|
### CHECKING IN VERIFIED.CSV ###
|
|
if filter_vdf.size != 0 :
|
|
print_colors(f"{filter_vdf[['Name','URL']]}")
|
|
# check if website name exists in verified.csv, if yes ask the user what index should be blacklisted
|
|
while (index not in filter_vdf.index):
|
|
index = int(input("What is the index of the entry in verified.csv that you want to blacklist ? (ex: 3) "))
|
|
# add the URL of the website in your blacklist.csv file
|
|
url2blacklist=filter_vdf.at[index,"URL"]
|
|
elif filter_uvdf.size != 0:
|
|
print_colors(f"{filter_uvdf[['Name','URL']]}")
|
|
# check if website name exists in verified.csv, if yes ask the user what index should be blacklisted
|
|
while (index not in filter_uvdf.index):
|
|
index = int(input("What is the index of the entry in unverified.csv that you want to blacklist ? (ex: 3) "))
|
|
# add the URL of the website in your blacklist.csv file
|
|
url2blacklist=filter_uvdf.at[index,"URL"]
|
|
|
|
newrow=[url2blacklist]
|
|
bldf.loc[-1] = newrow # adding a row
|
|
bldf.index = bldf.index + 1 # shifting index
|
|
bldf = bldf.sort_index() # sorting by index
|
|
# drop blacklist.csv's duplicates
|
|
bldf = bldf.drop_duplicates(subset=['blacklisted-words'])
|
|
print_colors("[+] New row added! now writing the csv file:")
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
#: drop the rows that contain that blacklisted URL in your verified.csv and unverified.csv files
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=instancepath+'/'+w
|
|
print_colors(f"{csvfilepath}")
|
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
if any(url2blacklist in str(x) for x in row) == True:
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it matches with the blacklisted word {url2blacklist}")
|
|
rows2delete.append(i)
|
|
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors(f'[+] REMOVING ROW : {i} {row}')
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
|
|
|
|
|
|
|
|
break
|
|
choice=input("\n[+] Want to Trust/Untrust/Blacklist another existing entry ? (y/n) ")
|
|
if choice == "n":
|
|
break
|
|
break
|
|
|
|
case 3:
|
|
# ask the user to select between 1) verified.csv and 2) unverified.csv
|
|
while True:
|
|
print_colors("[+] Edit link attributes")
|
|
choice = int(input("Do you want to edit link attributes in 1) verified.csv or 2) unverified.csv ? (-1 to exit)").strip())
|
|
index=-1
|
|
name=''
|
|
value=''
|
|
#newrow=[instance,category,name,url,sensi,desc,'YES','100']
|
|
match choice:
|
|
case 1:
|
|
#IF verified.csv:
|
|
#ask the user to select a valid website name
|
|
print_colors(f"{vdf[['Name','URL']]}")
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to edit ? (ex: BreachForums)")
|
|
filter_vdf = vdf[vdf.Name.str.contains(name,na=False)]
|
|
if filter_vdf.size != 0 :
|
|
print_colors(f"{filter_vdf[['Name','URL']]}")
|
|
# check if website name exists in verified.csv, if yes ask the user what index should be blacklisted
|
|
# ask the user to write a valid name (enter to skip)
|
|
while (index not in filter_vdf.index):
|
|
index = int(input("What is the index of the entry in verified.csv that you want to edit ? (ex: 3) "))
|
|
newrow=vdf.iloc[index].values
|
|
columnnames=vdf.iloc[0].values
|
|
#for i in range(len(newrow)):
|
|
for i in [1,2,3,4,5]:
|
|
print("Do you want to change the value of",filter_vdf.columns[i] ," ? (", newrow[i], ") (y to edit, enter to skip)")
|
|
choice=input()
|
|
if choice == "y":
|
|
if i == 1: # column Category
|
|
while(IsCategoryValid(value) is not True or value == ''):
|
|
value = input("What is the new name of the Category? ")
|
|
vdf.at[index,'Category']=value
|
|
elif i == 2: # column Name
|
|
while(IsNameValid(value) is not True or value == ''):
|
|
value = input("What is the new name of the website? ")
|
|
vdf.at[index,'Name']=value
|
|
elif i == 3: # column URL
|
|
while(IsUrlValid(value) is not True or value == ''):
|
|
value = input("What is the new URL of the website? ")
|
|
vdf.at[index,'URL']=value
|
|
elif i == 4: # column Sensitive
|
|
while(IsStatusValid(value) is not True or value == ''):
|
|
value = input("Is this website sensitive? ")
|
|
vdf.at[index,'Sensitive']=value
|
|
elif i == 5: # column Description
|
|
while(IsDescriptionValid(value) is not True or value == ''):
|
|
value = input("What is the description of the website ? ")
|
|
vdf.at[index,'Description']=value
|
|
value=''
|
|
print(vdf.iloc[index].values)
|
|
print("[+] overwriting existing row with new values:")
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
break
|
|
|
|
case 2:
|
|
#IF unverified.csv:
|
|
#ask the user to select a valid website name
|
|
print_colors(f"{uvdf[['Name','URL']]}")
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to edit? (ex: BreachForums)")
|
|
filter_uvdf = uvdf[uvdf.Name.str.contains(name,na=False)]
|
|
if filter_uvdf.size != 0 :
|
|
print_colors(f"{filter_uvdf[['Name','URL']]}")
|
|
# check if website name exists in unverified.csv, if yes ask the user what index should be blacklisted
|
|
#ask the user to select a valid index in either csv files
|
|
while (index not in filter_uvdf.index):
|
|
index = int(input("What is the index of the entry in verified.csv that you want to edit ? (ex: 3) "))
|
|
newrow=uvdf.iloc[index].values
|
|
columnnames=uvdf.iloc[0].values
|
|
#for i in range(len(newrow)):
|
|
for i in [1,2,3,4,5]:
|
|
print("Do you want to change the value of",filter_uvdf.columns[i] ," ? (", newrow[i], ") (y to edit, enter to skip)")
|
|
choice=input()
|
|
if choice == "y":
|
|
if i == 1: # column Category
|
|
while(IsCategoryValid(value) is not True or value == ''):
|
|
value = input("What is the new name of the Category? ")
|
|
uvdf.at[index,'Category']=value
|
|
elif i == 2: # column Name
|
|
while(IsNameValid(value) is not True or value == ''):
|
|
value = input("What is the new name of the website? ")
|
|
uvdf.at[index,'Name']=value
|
|
elif i == 3: # column URL
|
|
while(IsUrlValid(value) is not True or value == ''):
|
|
value = input("What is the new URL of the website? ")
|
|
uvdf.at[index,'URL']=value
|
|
elif i == 4: # column Sensitive
|
|
while(IsStatusValid(value) is not True or value == ''):
|
|
value = input("Is this website sensitive? ")
|
|
uvdf.at[index,'Sensitive']=value
|
|
elif i == 5: # column Description
|
|
while(IsDescriptionValid(value) is not True or value == ''):
|
|
value = input("What is the description of the website ? ")
|
|
uvdf.at[index,'Description']=value
|
|
value=''
|
|
print(newrow)
|
|
print("[+] overwriting existing row with new values:")
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
break
|
|
case -1:
|
|
return False
|
|
|
|
|
|
|
|
####### MANAGING WEBRING PARTICIPANTS ###########
|
|
# 4) Synchronize new links from webring participants, into your unverified.csv file
|
|
# 5) Add a new webring participant (and download their files into their directory (without trusting them yet!))
|
|
# 6) Trust/UnTrust/Blacklist a webring participant
|
|
#####################################################
|
|
|
|
|
|
|
|
#check if it works when you have a second webring participant
|
|
case 4:
|
|
print_colors("4) Synchronize new links from existing webring participants, into your unverified.csv file")
|
|
participantsdir=rootpath+'www/participants/'
|
|
name=''
|
|
desc=''
|
|
trusted=''
|
|
status=''
|
|
score=''
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
|
for participant in os.listdir(participantsdir):
|
|
participantdir=participantsdir+participant
|
|
|
|
# NOTE check if the webring participant is yourself, if it is, then skip it
|
|
if participant != myinstance: # prod: dont use your own intance
|
|
#if participant == myinstance: # preprod testing only on your own instance
|
|
#overwrite the existing files in the participant's directory, with their version (download all the csv files from them again)
|
|
basewurl='http://'+participant+'/participants/'+participant+'/'
|
|
print_colors(f"{basewurl}")
|
|
print_colors(f"[+] Downloading the files of: {participant} ")
|
|
w_vcsv=basewurl+'verified.csv'
|
|
w_uvcsv=basewurl+'unverified.csv'
|
|
w_blcsv=basewurl+'blacklist.csv'
|
|
w_scsv=basewurl+'sensitive.csv'
|
|
w_webcsv=basewurl+'webring-participants.csv'
|
|
|
|
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
|
|
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
|
|
print_colors("[-] Webring Participant isn't reachable, skipping", is_error=True)
|
|
else: #if the webring participant is reachable, proceed
|
|
print_colors("[+] Webring Participant is reachable, updating their csv files:")
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
|
|
# FOR EACH CSV FILE TO GET:
|
|
# URL: basewurl / FILE.CSV
|
|
# PATH: participantdir / FILE.CSV
|
|
# download the external csv file and save it into the "text" variable:
|
|
#response = urllib.request.urlopen(basewurl+i)
|
|
response = requests.get(basewurl+i, proxies=proxies)
|
|
#data = response.read() # a `bytes` object
|
|
#text = data.decode('utf-8')
|
|
text = response.text
|
|
# save the text variable into the destination file:
|
|
csvfilepath=participantdir+'/'+i
|
|
with open(csvfilepath, "w") as file:
|
|
file.write(text)
|
|
f = open(csvfilepath,"r")
|
|
|
|
# download the banner.png image:
|
|
|
|
bannerurl=basewurl+'banner.png'
|
|
bannerpath=participantdir+'/banner.png'
|
|
r = requests.get(bannerurl, stream=True, proxies=proxies)
|
|
with open(bannerpath, 'wb') as f:
|
|
r.raw.decode_content = True
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
|
if IsBannerValid(bannerpath):
|
|
pass
|
|
else:
|
|
# if false, overwrite it with the template banner png file
|
|
os.remove(bannerpath)
|
|
# copy templates/banner.png to bannerpath
|
|
bannertemplatepath=templatepath+'banner.png'
|
|
shutil.copyfile(bannertemplatepath, bannerpath)
|
|
|
|
|
|
# check if the participant is already listed in webring-participants.csv or not, and add them if not already listed
|
|
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
filter_wdf = wdf[wdf.URL.str.contains(participant,na=False)]
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed
|
|
newrow=[name,participant,desc,trusted,status,score]
|
|
wdf.loc[-1] = newrow # adding a row
|
|
wdf.index = wdf.index + 1 # shifting index
|
|
wdf = wdf.sort_index() # sorting by index
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
else:
|
|
pass
|
|
|
|
# iterate through the participant's verified.csv and unverified.csv files
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
print_colors(f"{csvfilepath}")
|
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
|
|
print("[+] Removing the participant's duplicate entries... ")
|
|
# REMOVE DUPLICATES !!! do not accept any duplicate from remote participants
|
|
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
|
|
bldf[['blacklisted-words']].iterrows()
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
# check the number of columns in said row,
|
|
# print('rowcolnum:',len(row),' colnum:',len(csvdf.columns))
|
|
# print_colors(f"{row}")
|
|
|
|
|
|
|
|
################################ SANITY CHECKS ####################################
|
|
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
|
|
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
|
|
csvdf.at[i, 'Status'] = "YES"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
else:
|
|
csvdf.at[i, 'Status'] = "NO"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
|
|
csvdf.at[i, 'Sensitive'] = "YES"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
else:
|
|
csvdf.at[i, 'Sensitive'] = "NO"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
#mark the row for deletion as it has invalid inputs
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
|
print(row)
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
else:
|
|
if i not in rows2delete:
|
|
# not a blacklisted link, therefore it is suitable to be added to your own csv files:
|
|
################################ CHECKING FOR DUPLICATES! #########################
|
|
# for each link in the participant's verified/unverified csv files,
|
|
# check if the link is already listed in your own verified.csv or unverified.csv
|
|
filterterm=csvdf.at[i, 'URL']
|
|
#print('1)',filterterm)
|
|
filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
|
|
#print('2)',filter_vdf)
|
|
#print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
|
|
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
|
# TODO DELETE ALL DUPLICATES OF UVDF !
|
|
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
|
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
|
|
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
|
|
newrow=row
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sorting by index
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
|
|
print("[+] NEW ROW =",newrow)
|
|
print_colors("[+] New row added to your own unverified.csv file!")
|
|
else:
|
|
pass
|
|
#print_colors(f'[-] Skipping row as it is already added in {w} {row}',is_error=True)
|
|
|
|
|
|
|
|
###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE###################
|
|
|
|
|
|
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
|
|
for k,l in sedf.iterrows():
|
|
seword=sedf.at[k, 'sensitive-words']
|
|
if any(seword in str(x) for x in row) == True:
|
|
if csvdf.at[i, 'Sensitive'] != 'NO':
|
|
print_colors(f"Marking row {i} as sensitive, as it matches with a sensitive word")
|
|
csvdf.at[i, 'Sensitive']='YES'
|
|
|
|
#print_colors(f'[-] Rows to delete: {rows2delete}', is_error=True)
|
|
# only delete rows after you've gone through all the unverified.csv OR verified.csv rows'
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors(f'[+] REMOVING ROW: {i}{row}')
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
rows2delete= [] # it is an empty list at first
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case 5:
|
|
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
|
webring_participant_url = ''
|
|
while(IsOnionValid(webring_participant_url) is not True):
|
|
webring_participant_url = input("What is the onion domain of the new webring participant? (ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion) ")
|
|
participantdir=rootpath+'www/participants/'+webring_participant_url
|
|
if os.path.isdir(participantdir):
|
|
print_colors("[-] Webring Participant is already listed, skipping.")
|
|
else:
|
|
basewurl='http://'+webring_participant_url+'/participants/'+webring_participant_url+'/'
|
|
print_colors(f"{basewurl}")
|
|
print_colors(f"[+] Checking if all of the required csv files exists for new webring participant {webring_participant_url} : ")
|
|
w_vcsv=basewurl+'verified.csv'
|
|
w_uvcsv=basewurl+'unverified.csv'
|
|
w_blcsv=basewurl+'blacklist.csv'
|
|
w_scsv=basewurl+'sensitive.csv'
|
|
w_webcsv=basewurl+'webring-participants.csv'
|
|
|
|
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
|
|
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
|
|
print_colors("[-] Webring Participant is invalid, exiting.")
|
|
else:
|
|
print_colors("[+] Webring Participant is valid, adding it.")
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Webring instance name ? ")
|
|
desc='DEFAULT'
|
|
while(IsDescriptionValid(desc) is not True):
|
|
desc=input("Description for the webring participant ? (Optional)")
|
|
trusted=''
|
|
status=''
|
|
score=''
|
|
newrow=[name,webring_participant_url,desc,trusted,status,score]
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
|
wdf.loc[-1] = newrow # adding a row
|
|
wdf.index = wdf.index + 1 # shifting index
|
|
wdf = wdf.sort_index() # sorting by index
|
|
print_colors(f"[+] New row added! now writing the csv file: {webringcsvfile}")
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
|
|
if not os.path.exists(participantdir):
|
|
os.makedirs(participantdir)
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
|
|
# FOR EACH CSV FILE TO GET:
|
|
# URL: basewurl / FILE.CSV
|
|
# PATH: participantdir / FILE.CSV
|
|
print_colors(f'[+] DOWNLOADING {basewurl}{i}')
|
|
response = requests.get(basewurl+i, proxies=proxies)
|
|
text = response.text
|
|
print_colors(f"[+] SAVING IT INTO participantdir/{i}")
|
|
csvfilepath=participantdir+'/'+i
|
|
with open(csvfilepath, "w") as file:
|
|
file.write(text)
|
|
print_colors("[+] file written, let's read it")
|
|
f = open(csvfilepath,"r")
|
|
print_colors(f.read())
|
|
|
|
# download the banner.png image:
|
|
|
|
bannerurl=basewurl+'banner.png'
|
|
bannerpath=participantdir+'/banner.png'
|
|
r = requests.get(bannerurl, stream=True, proxies=proxies)
|
|
with open(bannerpath, 'wb') as f:
|
|
r.raw.decode_content = True
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
|
if IsBannerValid(bannerpath):
|
|
print_colors('[+] Banner is valid')
|
|
else:
|
|
# if false, overwrite it with the template banner png file
|
|
print_colors('[-] Banner is not valid, replacing it with the default banner')
|
|
os.remove(bannerpath)
|
|
# copy templates/banner.png to bannerpath
|
|
bannertemplatepath=templatepath+'banner.png'
|
|
shutil.copyfile(bannertemplatepath, bannerpath)
|
|
|
|
########### PERFORM SANITY CHECKS ON the webring participant's verified.csv and unverified.csv ##################
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
|
|
#print_colors(bldf[['blacklisted-words']])
|
|
bldf[['blacklisted-words']].iterrows()
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
#row=uvdf.iloc[[i]] #it displays the index
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
|
|
################################ SANITY CHECKS ####################################
|
|
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
|
|
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
|
|
csvdf.at[i, 'Status'] = "YES"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
else:
|
|
csvdf.at[i, 'Status'] = "NO"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
|
|
csvdf.at[i, 'Sensitive'] = "YES"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
else:
|
|
csvdf.at[i, 'Sensitive'] = "NO"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
#mark the row for deletion as it has invalid inputs
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
|
|
for k,l in sedf.iterrows():
|
|
seword=sedf.at[k, 'sensitive-words']
|
|
if any(seword in str(x) for x in row) == True:
|
|
if csvdf.at[i, 'Sensitive'] != 'NO':
|
|
print_colors(f"Marking row {i} as sensitive, as it matches with a sensitive word")
|
|
csvdf.at[i, 'Sensitive']='YES'
|
|
|
|
#print_colors(f"[-] Rows to delete: {rows2delete}")
|
|
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors(f"[+] REMOVING ROW: {i}{row}")
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
break
|
|
##############################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case 6:
|
|
while True:
|
|
print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
|
print_colors(f'{wdf[["URL","Trusted"]]}')
|
|
try:
|
|
index = int(input("What is the index of the webring participant that you want to edit? -1 to exit ").strip())
|
|
if index == -1:
|
|
break
|
|
elif index in wdf.index:
|
|
choice = int(input("Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist the webring participant?").strip())
|
|
while True:
|
|
match choice:
|
|
case 1:
|
|
# trust the webring participant
|
|
choice2=input("You're about to trust another peer, this means that you're going to automatically trust all of the links they have in their verified.csv file! If this is a malicious peer, this action might be potentially risky! Do you want to continue ? (y/n)")
|
|
if choice2 == "y":
|
|
print_colors(f'[+] Trusting webring participant {wdf.at[index,"URL"]}')
|
|
## Warning: In future versions of panda '✔️' will not work. It will show an error.
|
|
wdf.at[index,"Trusted"]= 'YES'
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
break
|
|
else:
|
|
print_colors("[-] not trusting webring participant", is_error=True)
|
|
break
|
|
|
|
case 2:
|
|
print_colors(f'[+] UnTrusting webring participant {wdf.at[index,"URL"]}')
|
|
## Warning: In future versions of panda '' will not work. It will show an error. Maybe change to a 0,1
|
|
wdf.at[index,"Trusted"]='NO'
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
break
|
|
|
|
case 3:
|
|
print_colors(f'[+] Blacklisting webring participant {wdf.at[index,"URL"]}')
|
|
instance2blacklist=wdf.at[index,"URL"]
|
|
newrow=[instance2blacklist]
|
|
print_colors(f"[+] NEWROW= {newrow}")
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
bldf.loc[-1] = newrow # adding a row
|
|
bldf.index = bldf.index + 1 # shifting index
|
|
bldf = bldf.sort_index() # sorting by index
|
|
print_colors("[+] New row added! now writing the csv file:")
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
|
|
|
|
# remove all of the entries that came from that participant (drop the lines in your own verified+unverified.csv that have that instance in the instance column)
|
|
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in vdf.iterrows():
|
|
row=vdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=vdf.loc[i,:].values.tolist()
|
|
print_colors(f'[+] REMOVING ROW: {i} {row}')
|
|
vdf.drop(i, inplace= True)
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print_colors(f"{vdf}")
|
|
rows2delete= [] # it is an empty list at first
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in uvdf.iterrows():
|
|
row=uvdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=uvdf.loc[i,:].values.tolist()
|
|
print_colors(f'[+] REMOVING ROW: {i} {row}')
|
|
uvdf.drop(i, inplace= True)
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print_colors(f"{uvdf}")
|
|
rows2delete= [] # it is an empty list at first
|
|
|
|
# find all rows that match with the instance name in wdf aswell to remove them
|
|
for i,j in wdf.iterrows():
|
|
row=wdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=wdf.loc[i,:].values.tolist()
|
|
print_colors(f'[+] REMOVING ROW: {i} {row}')
|
|
wdf.drop(i, inplace= True)
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
print_colors(f"{wdf}")
|
|
rows2delete= [] # it is an empty list at first
|
|
|
|
|
|
|
|
# remove the entire directory in www/participants/INSTANCENAME aswell to get rid of it
|
|
instance2blacklistpath=rootpath+'www/participants/'+instance2blacklist
|
|
print_colors(f"[+] removing the participant's directory at {instance2blacklistpath}")
|
|
shutil.rmtree(instance2blacklistpath)
|
|
|
|
case _:
|
|
break
|
|
except Exception:
|
|
break
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
################### MANAGING WORDLISTS #################
|
|
#Managing Wordlists:
|
|
# 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
|
|
# 8) Add/Remove words or links in the blacklist (ex: porn)
|
|
|
|
#Maintenance:
|
|
# 9) remove the duplicate URLs for your own instance
|
|
# 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
|
|
#########################################################
|
|
|
|
case 7:
|
|
print_colors("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)")
|
|
try:
|
|
option = int(input("[+] do you want to 1) add or 2) remove Words/URLs? (type -1 to exit) "))
|
|
match option:
|
|
case 1:
|
|
while True:
|
|
word=input("[+] which Sensitive word/url do you want to add? (write -1 to exit) ")
|
|
if word == "-1":
|
|
break
|
|
else:
|
|
print_colors("[+] checking if the Word/URL is valid: ")
|
|
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
|
print_colors('[+] Word/URL is valid, adding the word into the sensitive wordlist')
|
|
newrow=[word]
|
|
print_colors(f"[+] NEWROW= {newrow}")
|
|
sedf.loc[-1] = newrow
|
|
sedf.index = sedf.index + 1
|
|
sedf = sedf.sort_index()
|
|
print_colors("[+] New row added! now writing the csv file.")
|
|
sedf.to_csv(secsvfile, index=False)
|
|
|
|
|
|
case 2:
|
|
while True:
|
|
print_colors(f"{sedf}")
|
|
index=input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) ")
|
|
try:
|
|
indices = index.split(' ')
|
|
if len(indices) == 2:
|
|
for i in range(int(indices[0]),int(indices[1])):
|
|
try:
|
|
idx = int(i)
|
|
if idx in sedf.index:
|
|
print_colors("[+] removing selected index.")
|
|
sedf.drop(index=idx, inplace=True)
|
|
sedf.to_csv(secsvfile, index=True)
|
|
else:
|
|
print_colors(f"[-] Index {idx} does not exist.", is_error=True)
|
|
except ValueError:
|
|
print_colors(f"[-] Error: '{i}' is not a valid integer.", is_error=True)
|
|
elif len(indices) == 1:
|
|
try:
|
|
idx = int(indices[0])
|
|
if idx != -1:
|
|
if idx in sedf.index:
|
|
print_colors("[+] removing selected index.")
|
|
sedf.drop(idx, inplace=True)
|
|
sedf.to_csv(secsvfile, index=True)
|
|
else:
|
|
print_colors(f"[-] Index {idx} does not exist.", is_error=True)
|
|
elif idx == -1:
|
|
break
|
|
except ValueError:
|
|
print_colors(f"[-] Error: '{indices[0]}' is not a valid integer.", is_error=True)
|
|
else:
|
|
print_colors('[-] Error, invalid index', is_error=True)
|
|
except Exception as e:
|
|
print_colors(f"[-] An unexpected error occurred: {str(e)}", is_error=True)
|
|
except Exception:
|
|
break
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
case 8:
|
|
print_colors("[+] Add/Remove words in the blacklist list (ex: porn)")
|
|
try:
|
|
option= int(input("[+] Do you want to 1) add or 2) remove Words/URLs? (type -1 to exit) "))
|
|
|
|
|
|
match option:
|
|
case 1:
|
|
while True:
|
|
word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ")
|
|
if word == "-1":
|
|
break
|
|
else:
|
|
print_colors("[+] Checking if the Word/URL is valid: ")
|
|
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
|
print_colors('[+] Word/URL is valid, adding the word into the blacklist')
|
|
newrow=[word]
|
|
print_colors(f"[+] NEWROW= {newrow}")
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
bldf.loc[-1] = newrow # adding a row
|
|
bldf.index = bldf.index + 1 # shifting index
|
|
bldf = bldf.sort_index() # sorting by index
|
|
print_colors("[+] New row added! Now writing the csv file")
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
|
|
|
|
case 2:
|
|
while True:
|
|
print_colors(f"{bldf}")
|
|
index=input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) ").strip()
|
|
try:
|
|
indices = index.split(' ')
|
|
if len(indices) == 2:
|
|
for i in range(int(indices[0]),int(indices[1])):
|
|
try:
|
|
idx = int(i)
|
|
if idx in bldf.index:
|
|
print_colors("[+] removing selected index.")
|
|
bldf.drop(index=idx, inplace=True)
|
|
bldf.to_csv(blcsvfile, index=True)
|
|
else:
|
|
print_colors(f"[-] Index {idx} does not exist.", is_error=True)
|
|
except ValueError:
|
|
print_colors(f"[-] Error: '{i}' is not a valid integer.", is_error=True)
|
|
elif len(indices) == 1:
|
|
try:
|
|
idx = int(indices[0])
|
|
if idx != -1:
|
|
if idx in bldf.index:
|
|
print_colors("[+] removing selected index.")
|
|
bldf.drop(idx, inplace=True)
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
else:
|
|
print_colors(f"[-] Index {idx} does not exist.", is_error=True)
|
|
elif idx == -1:
|
|
break
|
|
except ValueError:
|
|
print_colors(f"[-] Error: '{indices[0]}' is not a valid integer.", is_error=True)
|
|
else:
|
|
print_colors('[-] Error, invalid index', is_error=True)
|
|
except Exception as e:
|
|
print_colors(f"[-] An unexpected error occurred: {str(e)}", is_error=True)
|
|
except Exception:
|
|
break
|
|
break
|
|
|
|
|
|
|
|
case 9:
|
|
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
|
|
for w in ['verified.csv', 'unverified.csv']:
|
|
csvfilepath = os.path.join(instancepath, w)
|
|
print_colors(f"Processing file: {csvfilepath}")
|
|
try:
|
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
print_colors(f"Removing duplicates in {csvfilepath}")
|
|
#print_colors(f"{csvdf[['URL']]}")
|
|
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
|
#print_colors(f"{csvdf[['URL']]}")
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
print_colors(f"Cleaned data:\n{csvdf[['URL']]}")
|
|
except FileNotFoundError:
|
|
print_colors(f"File not found: {csvfilepath}")
|
|
except Exception as e:
|
|
print_colors(f"An error occurred while processing {csvfilepath}: {e}")
|
|
break
|
|
break
|
|
|
|
case 10:
|
|
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
|
|
participantspath = rootpath+'www/participants/'
|
|
for participant in os.listdir(participantspath):
|
|
print_colors(f"Participant: {participant}")
|
|
#read=input("Continue?")
|
|
participantdir= participantspath+participant
|
|
################ BEGIN SANITY CHECKS FOR EACH PARTICIPANTS ##############
|
|
# iterate through the participant's verified.csv and unverified.csv files
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
print_colors(f"{csvfilepath}")
|
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
#print_colors(f"{row}")
|
|
|
|
|
|
|
|
################################ SANITY CHECKS ####################################
|
|
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
|
|
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
|
|
csvdf.at[i, 'Status'] = "YES"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
else:
|
|
csvdf.at[i, 'Status'] = "NO"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
|
|
csvdf.at[i, 'Sensitive'] = "YES"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
else:
|
|
csvdf.at[i, 'Sensitive'] = "NO"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
|
#print_colors(f"{row}")
|
|
print(IsUrlValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsUrlValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']), IsScoreValid(csvdf.at[i, 'Score']))
|
|
rows2delete.append(i)
|
|
read=input("Continue?")
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
if i not in rows2delete:
|
|
print_colors(f"Marking row {i} for deletion, as it matches with the blacklisted word {blword}")
|
|
rows2delete.append(i)
|
|
#read=input("Continue?")
|
|
### SANITY CHECK 3: Mark all rows that match sensitive words to be sensitive = YES
|
|
for k,l in sedf.iterrows():
|
|
seword=sedf.at[k, 'sensitive-words']
|
|
if any(seword in str(x) for x in row) == True:
|
|
print_colors(f"Marking row {i} as sensitive, as it matches with the sensitive word {seword}")
|
|
csvdf.at[i, 'Sensitive']="YES"
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
#read=input("Continue?")
|
|
|
|
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors(f'[+] REMOVING ROW : {i} {row}')
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
#read=input("Continue?")
|
|
break
|
|
|
|
case 11:
|
|
#review the submitted websites:
|
|
try:
|
|
submission_df = pd.read_csv(submission_file_abs_path, on_bad_lines='skip')
|
|
verified_csv_df = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
|
unverified_csv_df = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
|
blacklist_df = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
|
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
|
for i, row in submission_df.iterrows():
|
|
link = row['link']
|
|
#remove the bad amp; crap that breaks things
|
|
link = link.replace("&","&")
|
|
print('\n',row[['name','desc','category','sensitive']])
|
|
print('\nLink to verify: ',link)
|
|
print_colors("\n1) Move entry to verified.csv \n2) Move entry from submission.csv to unverified.csv \n3) Delete from submission.csv file \n4) Add to blacklist.csv \n-1) exit")
|
|
if link in blacklisted_words:
|
|
print_colors("Black listed entry found", bold=True)
|
|
#TODO delete the entry as its already blacklisted
|
|
continue
|
|
else:
|
|
name = row['name']
|
|
desc = row['desc']
|
|
category = row['category']
|
|
sensi = "YES" if row['sensitive'] == 'y' else "NO"
|
|
number = int(input("Enter an option: "))
|
|
|
|
if number == 1:
|
|
newrow=[instance,category,name,link,sensi,desc,'YES','100']
|
|
|
|
verified_csv_df.loc[-1] = newrow # adding a row
|
|
verified_csv_df.index = verified_csv_df.index + 1 # shifting index
|
|
verified_csv_df = verified_csv_df.sort_index() # sorting by index
|
|
verified_csv_df = verified_csv_df.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
print_colors("[+] New row added! now writing the csv file")
|
|
verified_csv_df.to_csv(verifiedcsvfile, index=False)
|
|
submission_df.drop(index=i,inplace=True)
|
|
submission_df.to_csv(submission_file_abs_path, index=False)
|
|
elif number == 2:
|
|
|
|
newrow=[instance,category,name,link,sensi,desc,'YES','100']
|
|
|
|
unverified_csv_df.loc[-1] = newrow # adding a row
|
|
unverified_csv_df.index = unverified_csv_df.index + 1 # shifting index
|
|
unverified_csv_df = unverified_csv_df.sort_index() # sorting by index
|
|
unverified_csv_df = unverified_csv_df.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
print_colors("[+] New row added! now writing the csv file")
|
|
unverified_csv_df.to_csv(unverifiedcsvfile, index=False)
|
|
submission_df.drop(index=i,inplace=True)
|
|
submission_df.to_csv(submission_file_abs_path, index=False)
|
|
|
|
elif number == 3:
|
|
submission_df.drop(index=i,inplace=True)
|
|
submission_df.to_csv(submission_file_abs_path, index=False)
|
|
|
|
elif number == 4:
|
|
newrow=[link]
|
|
|
|
blacklist_df.loc[-1] = newrow # adding a row
|
|
blacklist_df.index = blacklist_df.index + 1 # shifting index
|
|
blacklist_df = blacklist_df.sort_index() # sorting by index
|
|
print_colors("[+] New row added! now writing the csv file")
|
|
blacklist_df.to_csv(blcsvfile, index=False)
|
|
submission_df.drop(index=i,inplace=True)
|
|
submission_df.to_csv(submission_file_abs_path, index=False)
|
|
|
|
elif number == -1:
|
|
break
|
|
|
|
else:
|
|
print_colors("Invalid Number",is_error=True)
|
|
continue
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
print_colors(f'Try again {e}',is_error=True)
|
|
break
|
|
|
|
finally:
|
|
print_colors("No more submissions to review, exiting.")
|
|
break
|
|
|
|
|
|
case 12:
|
|
# review the crawled websites
|
|
try:
|
|
print(crawled_file_abs_path)
|
|
crawled_df = pd.read_csv(crawled_file_abs_path, on_bad_lines='skip')
|
|
verified_csv_df = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
|
unverified_csv_df = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
|
blacklist_df = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
|
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
|
for i, row in crawled_df.iterrows():
|
|
link = row['URL']
|
|
print('\n',row[['URL','Category','Name']])
|
|
print('\nLink to verify: ',link)
|
|
print_colors("\n1) Move entry to verified.csv \n2) Move entry from submission.csv to unverified.csv \n3) Delete from submission.csv file \n4) Add to blacklist.csv \n-1) exit")
|
|
if link in blacklisted_words:
|
|
print_colors("Black listed entry found", bold=True)
|
|
#TODO delete the entry as its already blacklisted
|
|
crawled_df.drop(index=i,inplace=True)
|
|
crawled_df.to_csv(submission_file_abs_path, index=False)
|
|
continue
|
|
else:
|
|
name = row['Name']
|
|
category = row['Category']
|
|
#desc = row['esc']
|
|
desc = ''
|
|
#sensi = "YES" if row['sensitive'] == 'y' else "NO"
|
|
sensi = ''
|
|
number = int(input("Enter an option: "))
|
|
if number == 1:
|
|
# Add to verified.csv
|
|
# ask the name if invalid
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the name of the website? ")
|
|
|
|
# ask the category
|
|
while((IsCategoryValid(category) != True) or (category == 'Tor Hidden Service')):
|
|
category = input("What is the website Category? (ex: Indexes) ")
|
|
|
|
desc='DEFAULT'
|
|
while(IsDescriptionValid(desc) is not True):
|
|
desc=input("Description for the website ? (cannot be empty) ")
|
|
# ask the sensitivity
|
|
choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ")
|
|
if choice == "n":
|
|
sensi = 'NO'
|
|
else:
|
|
sensi = 'YES'
|
|
|
|
# ask if its sensitive or not
|
|
# ask the user to write a description
|
|
newrow=[instance,category,name,link,sensi,desc,'YES','100']
|
|
verified_csv_df.loc[-1] = newrow # adding a row
|
|
verified_csv_df.index = verified_csv_df.index + 1 # shifting index
|
|
verified_csv_df = verified_csv_df.sort_index() # sorting by index
|
|
verified_csv_df = verified_csv_df.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
print_colors("[+] New row added! now writing the csv file")
|
|
verified_csv_df.to_csv(verifiedcsvfile, index=False)
|
|
crawled_df.drop(index=i,inplace=True)
|
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
|
elif number == 2:
|
|
# Add to unverified.csv
|
|
# consider it as sensitive by default and category must just be 'crawled'
|
|
# ask the name if invalid
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the name of the website? ")
|
|
# ask the category
|
|
while((IsCategoryValid(category) != True) or (category == 'Tor Hidden Service')):
|
|
category = input("What is the website Category? (ex: Indexes) ")
|
|
choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ")
|
|
if choice == "n":
|
|
sensi = 'NO'
|
|
else:
|
|
sensi = 'YES'
|
|
# ask for the category, if empty then the category is 'crawled'
|
|
# add new row
|
|
newrow=[instance,category,name,link,sensi,desc,'YES','100']
|
|
|
|
unverified_csv_df.loc[-1] = newrow # adding a row
|
|
unverified_csv_df.index = unverified_csv_df.index + 1 # shifting index
|
|
unverified_csv_df = unverified_csv_df.sort_index() # sorting by index
|
|
unverified_csv_df = unverified_csv_df.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
print_colors("[+] New row added! now writing the csv file")
|
|
unverified_csv_df.to_csv(unverifiedcsvfile, index=False)
|
|
crawled_df.drop(index=i,inplace=True)
|
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
|
|
|
elif number == 3:
|
|
# Delete from crawled_onion.csv
|
|
crawled_df.drop(index=i,inplace=True)
|
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
|
|
|
elif number == 4:
|
|
# Add to blacklist.csv
|
|
newrow=[link]
|
|
|
|
blacklist_df.loc[-1] = newrow # adding a row
|
|
blacklist_df.index = blacklist_df.index + 1 # shifting index
|
|
blacklist_df = blacklist_df.sort_index() # sorting by index
|
|
print_colors("[+] New row added! now writing the csv file")
|
|
blacklist_df.to_csv(blcsvfile, index=False)
|
|
crawled_df.drop(index=i,inplace=True)
|
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
|
|
|
elif number == -1:
|
|
break
|
|
|
|
else:
|
|
print_colors("Invalid Number",is_error=True)
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
print_colors(f'Try again {e}',is_error=True)
|
|
break
|
|
|
|
finally:
|
|
print_colors("No more crawled websites to review, exiting.")
|
|
break
|
|
|
|
break
|
|
case 0:
|
|
print_colors(f"[-] Exiting", bold=True)
|
|
break
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|