mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-16 20:26:58 +00:00
981 lines
47 KiB
Python
981 lines
47 KiB
Python
import os, pwd, re, pandas as pd, requests, shutil
|
|
from PIL import Image
|
|
import urllib
|
|
import socks, socket, glob
|
|
from utils import *
|
|
|
|
def main():
|
|
proxies = {
|
|
'http': 'socks5h://127.0.0.1:9050',
|
|
'https': 'socks5h://127.0.0.1:9050'
|
|
}
|
|
|
|
rootpath='/srv/darknet-lantern/'
|
|
urlpath=pwd.getpwuid(os.getuid()).pw_dir+"/.darknet_participant_url"
|
|
#print_colors(urlpath)
|
|
|
|
|
|
# check if ~/.darknet_participant_url exists,
|
|
# if exists, instance= the content of ~/.darknet_participant_url (which is the url: such as lantern.nowherejez...onion)
|
|
print_colors("""
|
|
;
|
|
ED.
|
|
E#Wi G: L. ,;
|
|
E###G. j. E#, :EW: ,ft f#i
|
|
E#fD#W; .. EW, E#t .GEE##; t#E .E#t GEEEEEEEL
|
|
E#t t##L ;W, E##j E#t j#K;E###t t#E i#W, ,;;L#K;;.
|
|
E#t .E#K, j##, E###D. E#GK#f E#fE#f t#E L#D. t#E
|
|
E#t j##f G###, E#jG#W; E##D. E#t D#G t#E :K#Wfff; t#E
|
|
E#t :E#K: :E####, E#t t##f E##Wi E#t f#E. t#E i##WLLLLt t#E
|
|
E#t t##L ;W#DG##, E#t :K#E: E#jL#D: E#t t#K: t#E .E#L t#E
|
|
E#t .D#W; j###DW##, E#KDDDD###iE#t ,K#jE#t ;#W,t#E f#E: t#E
|
|
E#tiW#G. G##i,,G##, E#f,t#Wi,,,E#t jDE#t :K#D#E ,WW; t#E
|
|
E#K##i :K#K: L##, E#t ;#W: j#t E#t .E##E .D#; t#E
|
|
E##D. ;##D. L##, DWi ,KK: ,; .. G#E tt fE
|
|
E#t ,,, .,, fE :
|
|
L: ,
|
|
|
|
|
|
L. ,; L.
|
|
i EW: ,ft f#i j. EW: ,ft
|
|
LE .. E##; t#E GEEEEEEEL .E#t EW, E##; t#E
|
|
L#E ;W, E###t t#E ,;;L#K;;. i#W, E##j E###t t#E
|
|
G#W. j##, E#fE#f t#E t#E L#D. E###D. E#fE#f t#E
|
|
D#K. G###, E#t D#G t#E t#E :K#Wfff; E#jG#W; E#t D#G t#E
|
|
E#K. :E####, E#t f#E. t#E t#E i##WLLLLt E#t t##f E#t f#E. t#E
|
|
.E#E. ;W#DG##, E#t t#K: t#E t#E .E#L E#t :K#E: E#t t#K: t#E
|
|
.K#E j###DW##, E#t ;#W,t#E t#E f#E: E#KDDDD###iE#t ;#W,t#E
|
|
.K#D G##i,,G##, E#t :K#D#E t#E ,WW; E#f,t#Wi,,,E#t :K#D#E
|
|
.W#G :K#K: L##, E#t .E##E t#E .D#; E#t ;#W: E#t .E##E
|
|
:W##########Wt ;##D. L##, .. G#E fE tt DWi ,KK: .. G#E
|
|
:,,,,,,,,,,,,,.,,, .,, fE : fE
|
|
, ,
|
|
""", bold=True)
|
|
isitvalid="n"
|
|
while isitvalid != "y":
|
|
if os.path.isfile(urlpath):
|
|
with open(urlpath) as f:
|
|
instance = f.read().rstrip()
|
|
# check if the instance URL domain is valid
|
|
#print_colors(urlpath,instance)
|
|
if IsOnionValid(instance):
|
|
print_colors(f'[+] Instance Name: {instance} {IsOnionValid(instance)}')
|
|
isitvalid="y"
|
|
else:
|
|
print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}',is_error=True )
|
|
return False
|
|
else:
|
|
print_colors("[+] Instance Path doesn't exist yet")
|
|
# and ask for the instance URL domain
|
|
instance = input("What is your Instance domain ? (ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion): ")
|
|
instancepath=rootpath+'www/participants/'+instance
|
|
# check if the instance URL domain is valid
|
|
if IsOnionValid(instance):
|
|
print_colors(f'[+] Instance Name: {instance} {IsUrlValid(instance)}')
|
|
else:
|
|
print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}',is_error=True )
|
|
return False
|
|
|
|
|
|
# ask the user if the instance URL is valid ?
|
|
print_colors()
|
|
print_colors(instance)
|
|
isitvalid=input("Is this your this your instance domain ? (y/n) ")
|
|
# if yes, then write it into ~/.darknet_participant_url
|
|
if isitvalid == "y" :
|
|
print_colors("OK writing the instance url to ~/.darknet_participants_url")
|
|
with open(urlpath, "w") as file:
|
|
file.write(instance)
|
|
print_colors("[+] file written, let's read it")
|
|
f = open(urlpath,"r")
|
|
print_colors(f.read())
|
|
print_colors("[+] Initial Setup Completed!",highlight=True)
|
|
myinstance = instance
|
|
|
|
instancepath=rootpath+'www/participants/'+instance
|
|
templatepath=rootpath+'templates/'
|
|
verifiedcsvfile=instancepath+'/verified.csv'
|
|
unverifiedcsvfile=instancepath+'/unverified.csv'
|
|
blcsvfile=instancepath+'/blacklist.csv'
|
|
secsvfile=instancepath+'/sensitive.csv'
|
|
webpcsvfile=instancepath+'/webring-participants.csv'
|
|
# check if instancepath exists, if not then create the directory
|
|
if not os.path.exists(instancepath):
|
|
os.makedirs(instancepath)
|
|
# check if all the required csv files exist in it, otherwise copy them from the templates directory
|
|
# NOTE : the templates files are EMPTY by default, this is because i want each peer to manually review lists of links, and links themselves manually, this is to avoid allowing malicious links to slip through without intentional edits from the peer themselves.
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv','banner.png']:
|
|
filepath=instancepath+'/'+i
|
|
if not os.path.isfile(filepath):
|
|
# copy templates/ FILE.CSV to instancepath/ FILE.CSV
|
|
src=templatepath+i
|
|
shutil.copyfile(src, filepath)
|
|
# now that they exist, get vdf and uvdf and the rest
|
|
vdf = pd.read_csv(verifiedcsvfile)
|
|
uvdf = pd.read_csv(unverifiedcsvfile)
|
|
bldf = pd.read_csv(blcsvfile)
|
|
sedf = pd.read_csv(secsvfile)
|
|
webpdf = pd.read_csv(webpcsvfile)
|
|
print_colors("[+] file exists, your Webring URL is ", instance)
|
|
isitvalid = "y"
|
|
|
|
while True:
|
|
print_colors("""
|
|
|
|
[+] Welcome to your own Darknet Lantern Instance, where you can explore the Darknet and help others do the same.
|
|
""",highlight=True)
|
|
|
|
print_colors("""
|
|
Managing Websites:
|
|
1) Add a new Website entry (into unverified.csv)
|
|
2) Trust a Website entry (move an entry from unverified to verified.csv)
|
|
3) Untrust a Website entry (move an entry from unverified to verified.csv)
|
|
|
|
Managing Webring Participants:
|
|
4) Synchronize new links from existing webring participants, into your unverified.csv file
|
|
5) Add a new webring participant (and download their files into their directory (without trusting them yet!))
|
|
6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
|
|
|
|
Managing Wordlists:
|
|
7) Add/Remove Words/URLs in the sensitive list (ex: drug)
|
|
8) Add/Remove Words/URLs or links in the blacklist (ex: porn)
|
|
|
|
Maintenance:
|
|
9) Remove the duplicate URLs for your own instance
|
|
10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
|
|
|
|
0) Exit
|
|
""")
|
|
option = input("Select Option? (0-11): ")
|
|
print_colors(option)
|
|
match option:
|
|
|
|
|
|
########## MANAGING WEBSITE ENTRIES #################
|
|
#Websites:
|
|
# 1) Add a new Website entry (into unverified.csv)
|
|
# 2) Trust a Website entry (move an entry from unverified to verified.csv)
|
|
# 3) Untrust a Website entry (move an entry from unverified to verified.csv)
|
|
#####################################################
|
|
|
|
case "1":
|
|
done = False
|
|
while done == False:
|
|
print_colors("\n[+] Add a new Website entry (into unverified.csv)")
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name ? ")
|
|
category=''
|
|
while(IsCategoryValid(category) is not True):
|
|
category = input("What is the website Category ? ")
|
|
# the url of the website (required) + check if its valid
|
|
url=''
|
|
while(IsUrlValid(url) is not True):
|
|
url=input("What is the website URL ? ")
|
|
|
|
# a quick description (optional) + check if its valid
|
|
desc='DEFAULT'
|
|
while(IsDescriptionValid(desc) is not True):
|
|
desc=input("Description for the website ? (Optional) ")
|
|
# sensitive ? (y/n) + check if its valid
|
|
#entry_sensi = input("is it a sensitive website ? (ex: website related to drugs) (y/n)")
|
|
|
|
choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ")
|
|
if choice == "n":
|
|
sensi = '❌'
|
|
else:
|
|
sensi = '✔️'
|
|
|
|
newrow=[instance,category,name,url,sensi,desc,'','']
|
|
print_colors("[+] NEWROW=",newrow)
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# TODO check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sorting by index
|
|
uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
print_colors("[+] New row added! now writing the csv file:")
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
choice=input("\n[+] Want to add another website ? (y/n) ")
|
|
if choice == "n":
|
|
done = True
|
|
|
|
|
|
case "2":
|
|
print_colors("[+] Trust a Website entry (move an entry from unverified to verified.csv)")
|
|
done = False
|
|
while done == False:
|
|
vdf = pd.read_csv(verifiedcsvfile)
|
|
uvdf = pd.read_csv(unverifiedcsvfile)
|
|
# search for a word
|
|
print_colors(uvdf[['Name','URL']])
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to trust ? (ex: Nowhere)")
|
|
filter_uvdf = uvdf[uvdf.Name.str.contains(name)]
|
|
# NOTE and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
print_colors(filter_uvdf[['Name','URL']])
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_uvdf.size == 0:
|
|
print_colors("ERROR no results, skipping.")
|
|
else:
|
|
# Each of the rows has an index,
|
|
index=-1
|
|
while (index not in filter_uvdf.index):
|
|
# prompt the user to ask for with row they want to move to verified.csv
|
|
index = int(input("What is the index of the entry that you want to move to verified.csv ? (ex: 3) "))
|
|
# once selected, it must be able to SAVE and print_colors that row:
|
|
print_colors(uvdf.iloc[index].values)
|
|
newrow=uvdf.iloc[index].values
|
|
|
|
|
|
# append it into verified.csv
|
|
vdf.loc[-1] = newrow # adding a row
|
|
vdf.index = vdf.index + 1 # shifting index
|
|
vdf = vdf.sort_index() # sxorting by index
|
|
vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print_colors("[+] New row added to verified.csv! now writing to the csv")
|
|
|
|
|
|
# remove it from unverified.csv
|
|
uvdf.drop(index, inplace= True)
|
|
uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print_colors("[+] Link is now moved to verified.csv!")
|
|
choice=input("\n[+] Want to trust another website ? (y/n) ")
|
|
if choice == "n":
|
|
done = True
|
|
|
|
case "3":
|
|
print_colors("[+] Untrust a Website entry (move an entry from verified to unverified.csv)")
|
|
print_colors(vdf[['Name','URL']])
|
|
# search for a word
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Website name you want to untrust ? (ex: BreachForums)")
|
|
filter_vdf = vdf[vdf.Name.str.contains(name)]
|
|
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
print_colors(filter_vdf[['Name','URL']])
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_vdf.size == 0:
|
|
print_colors("ERROR no results, skipping.")
|
|
else:
|
|
# Each of the rows has an index,
|
|
index=-1
|
|
while (index not in filter_vdf.index):
|
|
# prompt the user to ask for with row they want to move to unverified.csv
|
|
index = int(input("What is the index of the entry that you want to move to unverified.csv ? (ex: 3) "))
|
|
# once selected, it must be able to SAVE and print_colors that row:
|
|
print_colors(vdf.iloc[index].values)
|
|
newrow=vdf.iloc[index].values
|
|
|
|
|
|
# append it into unverified.csv
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sorting by index
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print_colors("[+] New row added to unverified.csv!")
|
|
|
|
|
|
# remove it from verified.csv
|
|
vdf.drop(index, inplace= True)
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print_colors("[+] Link is now moved to unverified.csv!")
|
|
|
|
|
|
####### MANAGING WEBRING PARTICIPANTS ###########
|
|
# 4) Synchronize new links from webring participants, into your unverified.csv file
|
|
# 5) Add a new webring participant (and download their files into their directory (without trusting them yet!))
|
|
# 6) Trust/UnTrust/Blacklist a webring participant
|
|
#####################################################
|
|
|
|
|
|
|
|
|
|
|
|
#check if it works when you have a second webring participant
|
|
case "4":
|
|
print_colors("4) Synchronize new links from existing webring participants, into your unverified.csv file")
|
|
# iterate through each existing directories in www/participants/* to get each webring participant
|
|
participantsdir=rootpath+'www/participants/'
|
|
#print_colors(os.listdir(participantsdir))
|
|
name=''
|
|
desc=''
|
|
trusted=''
|
|
status=''
|
|
score=''
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile)
|
|
for participant in os.listdir(participantsdir):
|
|
participantdir=participantsdir+participant
|
|
#print_colors(participant)
|
|
|
|
# NOTE check if the webring participant is yourself, if it is, then skip it
|
|
if participant != myinstance: # prod: dont use your own intance
|
|
#if participant == myinstance: # preprod testing only on your own instance
|
|
#overwrite the existing files in the participant's directory, with their version (download all the csv files from them again)
|
|
basewurl='http://'+participant+'/participants/'+participant+'/'
|
|
print_colors(basewurl)
|
|
print_colors('[+] Downloading the files of ',participant, ": ")
|
|
w_vcsv=basewurl+'verified.csv'
|
|
w_uvcsv=basewurl+'unverified.csv'
|
|
#print_colors(CheckUrl(w_uvcsv))
|
|
w_blcsv=basewurl+'blacklist.csv'
|
|
#print_colors(CheckUrl(w_blcsv))
|
|
w_scsv=basewurl+'sensitive.csv'
|
|
#print_colors(CheckUrl(w_scsv))
|
|
w_webcsv=basewurl+'webring-participants.csv'
|
|
#print_colors(CheckUrl(w_webcsv))
|
|
|
|
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
|
|
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
|
|
print_colors("[-] Webring Participant isn't reachable, skipping")
|
|
#return False #dont do anything if the webring participant isnt reachable.
|
|
else: #if the webring participant is reachable, proceed
|
|
print_colors("[+] Webring Participant is reachable, updating their csv files:")
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
|
|
# FOR EACH CSV FILE TO GET:
|
|
# URL: basewurl / FILE.CSV
|
|
# PATH: participantdir / FILE.CSV
|
|
#print_colors('[+] DOWNLOADING ',basewurl+i)
|
|
# download the external csv file and save it into the "text" variable:
|
|
#response = urllib.request.urlopen(basewurl+i)
|
|
response = requests.get(basewurl+i, proxies=proxies)
|
|
#data = response.read() # a `bytes` object
|
|
#text = data.decode('utf-8')
|
|
text = response.text
|
|
# save the text variable into the destination file:
|
|
#print_colors('[+] SAVING IT INTO ',participantdir+'/'+i)
|
|
csvfilepath=participantdir+'/'+i
|
|
with open(csvfilepath, "w") as file:
|
|
file.write(text)
|
|
#print_colors("[+] file written, let's read it")
|
|
f = open(csvfilepath,"r")
|
|
#print_colors(f.read())
|
|
|
|
# download the banner.png image:
|
|
|
|
bannerurl=basewurl+'banner.png'
|
|
bannerpath=participantdir+'/banner.png'
|
|
r = requests.get(bannerurl, stream=True, proxies=proxies)
|
|
with open(bannerpath, 'wb') as f:
|
|
r.raw.decode_content = True
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
|
if IsBannerValid(bannerpath):
|
|
print_colors('[+] Banner is valid')
|
|
pass
|
|
else:
|
|
# if false, overwrite it with the template banner png file
|
|
print_colors('[-] Banner is not valid, replacing it with the default banner')
|
|
os.remove(bannerpath)
|
|
# copy templates/banner.png to bannerpath
|
|
bannertemplatepath=templatepath+'banner.png'
|
|
shutil.copyfile(bannertemplatepath, bannerpath)
|
|
|
|
|
|
#print_colors("[+] Webring Participant is valid, adding it if it's not already added.")
|
|
#print_colors('[+] PARTICIPANT=',participant)
|
|
# check if the participant is already listed in webring-participants.csv or not, and add them if not already listed
|
|
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
|
filter_wdf = wdf[wdf.URL.str.contains(participant)]
|
|
#print_colors(filter_wdf[['Name','URL']])
|
|
# check if there are no results, dont proceed if there are none!
|
|
if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed
|
|
newrow=[name,participant,desc,trusted,status,score]
|
|
#print_colors("[+] NEWROW=",newrow)
|
|
wdf.loc[-1] = newrow # adding a row
|
|
wdf.index = wdf.index + 1 # shifting index
|
|
wdf = wdf.sort_index() # sorting by index
|
|
#print_colors("[+] New row added! now writing the csv file:",webringcsvfile)
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
else:
|
|
pass
|
|
#print_colors('[+] Webring participant is already listed in your own webring-participants.csv file!')
|
|
|
|
# iterate through the participant's verified.csv and unverified.csv files
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
print_colors(csvfilepath)
|
|
csvdf = pd.read_csv(csvfilepath)
|
|
#print_colors(bldf[['blacklisted-words']])
|
|
bldf[['blacklisted-words']].iterrows()
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
#print_colors("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
|
|
#print_colors("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
|
|
#row=uvdf.iloc[[i]] #it displays the index
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors(row)
|
|
#print_colors(i,row)
|
|
|
|
|
|
|
|
################################ SANITY CHECKS ####################################
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
#print_colors("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score']))
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
#mark the row for deletion as it has invalid inputs
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it has invalid inputs")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
#print_colors("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print_colors("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
else:
|
|
if i not in rows2delete:
|
|
# not a blacklisted link, therefore it is suitable to be added to your own csv files:
|
|
################################ CHECKING FOR DUPLICATES! #########################
|
|
# for each link in the participant's verified/unverified csv files,
|
|
# check if the link is already listed in your own verified.csv or unverified.csv
|
|
filterterm=csvdf.at[i, 'URL']
|
|
filter_vdf= vdf[vdf.URL.str.contains(filterterm)]
|
|
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm)]
|
|
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
|
|
#if link doesnt exist in either of your verified/unverified csv files,
|
|
# then add it to your own unverified.csv file:
|
|
newrow=row
|
|
uvdf.loc[-1] = newrow # adding a row
|
|
uvdf.index = uvdf.index + 1 # shifting index
|
|
uvdf = uvdf.sort_index() # sorting by index
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print_colors("[+] New row added to your own unverified.csv file!")
|
|
else:
|
|
#print_colors('[-] Skipping row as it is already added in', w, row, is_error=True )
|
|
print_colors(f'[-] Skipping row as it is already added in {w}, {row}', is_error=True )
|
|
|
|
|
|
|
|
###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE###################
|
|
|
|
|
|
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
|
|
for k,l in sedf.iterrows():
|
|
#print_colors("[+] Sensitive word=",k, sedf.at[k, 'sensitive-words'])
|
|
seword=sedf.at[k, 'sensitive-words']
|
|
if any(seword in str(x) for x in row) == True:
|
|
if csvdf.at[i, 'Sensitive'] != '✔️':
|
|
print_colors("Marking row", i,"as sensitive, as it matches with a sensitive word")
|
|
csvdf.at[i, 'Sensitive']='✔️'
|
|
|
|
print_colors(f'[-] Rows to delete: {rows2delete}')
|
|
# only delete rows after you've gone through all the unverified.csv OR verified.csv rows'
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors('[+] REMOVING ROW :',i,row)
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
rows2delete= [] # it is an empty list at first
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case "5":
|
|
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
|
webring_participant_url = ''
|
|
while(IsOnionValid(webring_participant_url) is not True):
|
|
# ask for the url to the other webring participant and check if the (onion only) url is valid or not:
|
|
webring_participant_url = input("What is the onion domain of the new webring participant? (ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion) ")
|
|
# check if the directory exists locally or not,
|
|
participantdir=rootpath+'www/participants/'+webring_participant_url
|
|
#if not os.path.isdir(participantdir): # to test on your own instance
|
|
if os.path.isdir(participantdir):
|
|
# if it does, it means that the webring is ALREADY added
|
|
print_colors("[-] Webring Participant is already listed, skipping.")
|
|
return False
|
|
else:
|
|
# if not, then proceed:
|
|
# print_colors the URL to the csv files at http://URL.onion/participants/URL.onion/{verified.csv,unverified.csv,sensitive.csv,blacklist.csv,webring-participants.csv}
|
|
basewurl='http://'+webring_participant_url+'/participants/'+webring_participant_url+'/'
|
|
print_colors(basewurl)
|
|
print_colors('[+] Checking if all of the required csv files exists for new webring participant ',webring_participant_url, ": ")
|
|
w_vcsv=basewurl+'verified.csv'
|
|
w_uvcsv=basewurl+'unverified.csv'
|
|
#print_colors(CheckUrl(w_uvcsv))
|
|
w_blcsv=basewurl+'blacklist.csv'
|
|
#print_colors(CheckUrl(w_blcsv))
|
|
w_scsv=basewurl+'sensitive.csv'
|
|
#print_colors(CheckUrl(w_scsv))
|
|
w_webcsv=basewurl+'webring-participants.csv'
|
|
#print_colors(CheckUrl(w_webcsv))
|
|
|
|
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
|
|
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
|
|
print_colors("[-] Webring Participant is invalid, exiting.")
|
|
return False
|
|
else:
|
|
print_colors("[+] Webring Participant is valid, adding it.")
|
|
name=''
|
|
while(IsNameValid(name) is not True):
|
|
name = input("What is the Webring instance name ? ")
|
|
desc='DEFAULT'
|
|
while(IsDescriptionValid(desc) is not True):
|
|
desc=input("Description for the webring participant ? (Optional)")
|
|
# if OK then add it to your own webring-participants.csv
|
|
trusted=''
|
|
status=''
|
|
score=''
|
|
newrow=[name,webring_participant_url,desc,trusted,status,score]
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile)
|
|
#print_colors("[+] NEWROW=",newrow)
|
|
wdf.loc[-1] = newrow # adding a row
|
|
wdf.index = wdf.index + 1 # shifting index
|
|
wdf = wdf.sort_index() # sorting by index
|
|
print_colors("[+] New row added! now writing the csv file:",webringcsvfile)
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
|
|
# create the directory in www/participants/PARTICIPANTURL/ if it's not there already
|
|
if not os.path.exists(participantdir):
|
|
os.makedirs(participantdir)
|
|
# then download their csv files at http://URL.onion/participants/URL.onion/{verified.csv,unverified.csv,sensitive.csv,blacklist.csv,webring-participants.csv}
|
|
# then save the csv file contents into a variable, then write it where it belongs:
|
|
# for loop with each csv file you want:
|
|
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
|
|
# FOR EACH CSV FILE TO GET:
|
|
# URL: basewurl / FILE.CSV
|
|
# PATH: participantdir / FILE.CSV
|
|
print_colors('[+] DOWNLOADING ',basewurl+i)
|
|
# download the external csv file and save it into the "text" variable:
|
|
#response = urllib.request.urlopen(basewurl+i)
|
|
response = requests.get(basewurl+i, proxies=proxies)
|
|
#data = response.read() # a `bytes` object
|
|
#text = data.decode('utf-8')
|
|
text = response.text
|
|
# save the text variable into the destination file:
|
|
print_colors('[+] SAVING IT INTO ',participantdir+'/'+i)
|
|
csvfilepath=participantdir+'/'+i
|
|
with open(csvfilepath, "w") as file:
|
|
file.write(text)
|
|
print_colors("[+] file written, let's read it")
|
|
f = open(csvfilepath,"r")
|
|
print_colors(f.read())
|
|
|
|
# download the banner.png image:
|
|
|
|
bannerurl=basewurl+'banner.png'
|
|
bannerpath=participantdir+'/banner.png'
|
|
r = requests.get(bannerurl, stream=True, proxies=proxies)
|
|
with open(bannerpath, 'wb') as f:
|
|
r.raw.decode_content = True
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
|
if IsBannerValid(bannerpath):
|
|
print_colors('[+] Banner is valid')
|
|
else:
|
|
# if false, overwrite it with the template banner png file
|
|
print_colors('[-] Banner is not valid, replacing it with the default banner')
|
|
os.remove(bannerpath)
|
|
# copy templates/banner.png to bannerpath
|
|
bannertemplatepath=templatepath+'banner.png'
|
|
shutil.copyfile(bannertemplatepath, bannerpath)
|
|
|
|
########### PERFORM SANITY CHECKS ON the webring participant's verified.csv and unverified.csv ##################
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
csvdf = pd.read_csv(csvfilepath)
|
|
|
|
#print_colors(bldf[['blacklisted-words']])
|
|
bldf[['blacklisted-words']].iterrows()
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
#print_colors("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
|
|
#print_colors("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
|
|
#row=uvdf.iloc[[i]] #it displays the index
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
#print_colors(i,row)
|
|
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
#print_colors("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsUrlValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']), IsScoreValid(csvdf.at[i, 'Score']))
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
#mark the row for deletion as it has invalid inputs
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it has invalid inputs")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
#print_colors("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print_colors("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
|
|
for k,l in sedf.iterrows():
|
|
#print_colors("[+] Sensitive word=",k, sedf.at[k, 'sensitive-words'])
|
|
seword=sedf.at[k, 'sensitive-words']
|
|
if any(seword in str(x) for x in row) == True:
|
|
if csvdf.at[i, 'Sensitive'] != '✔️':
|
|
print_colors("Marking row", i,"as sensitive, as it matches with a sensitive word")
|
|
csvdf.at[i, 'Sensitive']='✔️'
|
|
|
|
print_colors(f'[-] Rows to delete: {rows2delete}')
|
|
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors('[+] REMOVING ROW :',i,row)
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
##############################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case "6":
|
|
print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
|
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
wdf = pd.read_csv(webringcsvfile)
|
|
# list each webring participant in your webring-participants.csv file
|
|
print_colors(wdf[["URL","Trusted"]])
|
|
# ask the user to pick an index
|
|
index=""
|
|
while (index not in wdf.index):
|
|
# prompt the user to ask for with row they want to move to verified.csv
|
|
index = int(input("What is the index of the webring participant that you want to edit ? (ex: 3) "))
|
|
# once a valid index is picked, ask if the user wants to 1) trust the webring participant, or 2) untrust them, or 3) black list them
|
|
choice=""
|
|
while (choice not in ["1","2","3"]):
|
|
choice = input("Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist the webring participant ?")
|
|
if choice == "1":
|
|
# trust the webring participant
|
|
# ask the user if they want to proceed, as this is potentially risky if the webring participant tries to list malicious links in the future
|
|
choice2=input("You're about to trust another peer, this means that you're going to automatically trust all of the links they have in their verified.csv file! If this is a malicious peer, you're about to potentially going to automatically trust malicious links, it is potentially risky! Do you want to continue ? (y/n)")
|
|
if choice2 == "y":
|
|
# if user wants to proceed, mark the "trusted" column as V
|
|
print_colors("[+] Trusting webring participant", wdf.at[index,"URL"])
|
|
wdf.at[index,"Trusted"]='✔️'
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
|
|
else:
|
|
print_colors("[-] not trusting webring participant, skipping.")
|
|
if choice == "2":
|
|
print_colors("[+] UnTrusting webring participant", wdf.at[index,"URL"])
|
|
wdf.at[index,"Trusted"]=''
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
# untrust the webring participant
|
|
# if 2: mark the "trusted" column as empty
|
|
if choice == "3":
|
|
print_colors("[+] Blacklisting webring participant", wdf.at[index,"URL"])
|
|
# blacklist the webring participant
|
|
# add it's URL to your own blacklist.csv
|
|
instance2blacklist=wdf.at[index,"URL"]
|
|
newrow=[instance2blacklist]
|
|
print_colors("[+] NEWROW=",newrow)
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
bldf.loc[-1] = newrow # adding a row
|
|
bldf.index = bldf.index + 1 # shifting index
|
|
bldf = bldf.sort_index() # sorting by index
|
|
print_colors("[+] New row added! now writing the csv file:")
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
|
|
|
|
# remove all of the entries that came from that participant (drop the lines in your own verified+unverified.csv that have that instance in the instance column)
|
|
|
|
#vdf
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in vdf.iterrows():
|
|
row=vdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
#print_colors("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print_colors("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=vdf.loc[i,:].values.tolist()
|
|
print_colors('[+] REMOVING ROW :',i,row)
|
|
vdf.drop(i, inplace= True)
|
|
vdf.to_csv(verifiedcsvfile, index=False)
|
|
print_colors(vdf)
|
|
rows2delete= [] # it is an empty list at first
|
|
#uvdf
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in uvdf.iterrows():
|
|
row=uvdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
#print_colors("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print_colors("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=uvdf.loc[i,:].values.tolist()
|
|
print_colors('[+] REMOVING ROW :',i,row)
|
|
uvdf.drop(i, inplace= True)
|
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
print_colors(uvdf)
|
|
rows2delete= [] # it is an empty list at first
|
|
|
|
# find all rows that match with the instance name in wdf aswell to remove them
|
|
for i,j in wdf.iterrows():
|
|
row=wdf.loc[i,:].values.tolist()
|
|
for k,l in bldf.iterrows():
|
|
#print_colors("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print_colors("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
for i in rows2delete:
|
|
row=wdf.loc[i,:].values.tolist()
|
|
print_colors('[+] REMOVING ROW :',i,row)
|
|
wdf.drop(i, inplace= True)
|
|
wdf.to_csv(webringcsvfile, index=False)
|
|
print_colors(wdf)
|
|
rows2delete= [] # it is an empty list at first
|
|
|
|
|
|
|
|
# remove the entire directory in www/participants/INSTANCENAME aswell to get rid of it
|
|
instance2blacklistpath=rootpath+'www/participants/'+instance2blacklist
|
|
print_colors("[+] removing the participant's directory at ",instance2blacklistpath)
|
|
shutil.rmtree(instance2blacklistpath)
|
|
|
|
|
|
|
|
|
|
|
|
################### MANAGING WORDLISTS #################
|
|
#Managing Wordlists:
|
|
# 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
|
|
# 8) Add/Remove words or links in the blacklist (ex: porn)
|
|
|
|
#Maintenance:
|
|
# 9) remove the duplicate URLs for your own instance
|
|
# 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
|
|
#########################################################
|
|
|
|
case "7":
|
|
print_colors("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)")
|
|
#secsvfile=instancepath+'/sensitive.csv' #fyi
|
|
#sedf = pd.read_csv(secsvfile) #fyi
|
|
option="0"
|
|
|
|
done = False
|
|
while(done == False):
|
|
while option != "1" and option != "2" and option != "-1":
|
|
option=input("[+] do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
|
|
if option == "1":
|
|
word=input("[+] which Sensitive word do you want to add? (write -1 to exit) ")
|
|
if word == "-1":
|
|
done = True
|
|
#True to get out of the while loop
|
|
else:
|
|
print_colors("[+] checking if the Word/URL is valid: ")
|
|
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
|
print_colors(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
|
|
print_colors('[+] Word/URL is valid, adding the word into the sensitive wordlist ')
|
|
# add it to the sensitive wordlist
|
|
newrow=[word]
|
|
print_colors("[+] NEWROW=",newrow)
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
sedf.loc[-1] = newrow # adding a row
|
|
sedf.index = sedf.index + 1 # shifting index
|
|
sedf = sedf.sort_index() # sorting by index
|
|
print_colors("[+] New row added! now writing the csv file: ")
|
|
sedf.to_csv(secsvfile, index=False)
|
|
|
|
|
|
if option == "2":
|
|
print_colors(sedf)
|
|
index=""
|
|
while (index not in sedf.index) and index != -1:
|
|
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
|
|
if index == -1:
|
|
done = True
|
|
#True to get out of the while loop
|
|
else:
|
|
if (index in sedf.index):
|
|
#if index exists, remove it
|
|
print_colors("[+] removing selected index: ")
|
|
sedf.drop(index, inplace= True)
|
|
sedf.to_csv(secsvfile, index=False)
|
|
|
|
else:
|
|
print_colors('[-] Error, invalid index')
|
|
|
|
|
|
else:
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
case "8":
|
|
print_colors("[+] Add/Remove words in the blacklist list (ex: porn)")
|
|
#blcsvfile=instancepath+'/sensitive.csv' #fyi
|
|
#bldf = pd.read_csv(secsvfile) #fyi
|
|
option="0"
|
|
|
|
done = False
|
|
while(done == False):
|
|
while option != "1" and option != "2" and option != "-1":
|
|
option=input("[+] Do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
|
|
if option == "1":
|
|
word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ")
|
|
if word == "-1":
|
|
done = True
|
|
#True to get out of the while loop
|
|
else:
|
|
print_colors("[+] Checking if the Word/URL is valid: ")
|
|
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
|
print_colors(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
|
|
print_colors('[+] Word/URL is valid, adding the word into the blacklist ')
|
|
# add it to the sensitive wordlist
|
|
newrow=[word]
|
|
print_colors("[+] NEWROW=",newrow)
|
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
|
# if it doesnt exist, add it into unverified.csv
|
|
bldf.loc[-1] = newrow # adding a row
|
|
bldf.index = bldf.index + 1 # shifting index
|
|
bldf = bldf.sort_index() # sorting by index
|
|
print_colors("[+] New row added! now writing the csv file: ")
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
|
|
|
|
if option == "2":
|
|
print_colors(bldf)
|
|
index=""
|
|
while (index not in bldf.index) and index != -1:
|
|
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
|
|
if index == -1:
|
|
done = True
|
|
#True to get out of the while loop
|
|
else:
|
|
if (index in bldf.index):
|
|
#if index exists, remove it
|
|
print_colors("[+] removing selected index: ")
|
|
bldf.drop(index, inplace= True)
|
|
bldf.to_csv(blcsvfile, index=False)
|
|
|
|
else:
|
|
print_colors('[-] Error, invalid index')
|
|
|
|
|
|
else:
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file)
|
|
case "9":
|
|
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
|
|
# ignore it if the index is "indextocheck" and if the index is already listed in rows2delete
|
|
# else: add the index to "rows2delete"
|
|
# go drop the rows by their index listed in "rows2delete"
|
|
################################ CHECKING FOR DUPLICATES! #########################
|
|
# for unverified.csv, and verified.csv
|
|
for w in ['verified.csv','unverified.csv']:
|
|
#instancepath=rootpath+'www/participants/'+instance # fyi
|
|
csvfilepath=instancepath+'/'+w
|
|
|
|
print_colors(csvfilepath)
|
|
csvdf = pd.read_csv(csvfilepath)
|
|
print_colors("REMOVING DUPLICATES IN", csvfilepath)
|
|
csvdf = csvdf.drop_duplicates(subset=['URL'])
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
print_colors(csvdf[['URL']])
|
|
case "10":
|
|
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
|
|
participantspath = rootpath+'www/participants/'
|
|
for participant in os.listdir(participantspath):
|
|
print_colors("Participant:",participant)
|
|
participantdir= participantspath+participant
|
|
a=0
|
|
if a == 0:
|
|
if a== 0:
|
|
################ BEGIN SANITY CHECKS FOR EACH PARTICIPANTS ##############
|
|
# iterate through the participant's verified.csv and unverified.csv files
|
|
for w in ['verified.csv','unverified.csv']:
|
|
csvfilepath=participantdir+'/'+w
|
|
print_colors(csvfilepath)
|
|
csvdf = pd.read_csv(csvfilepath)
|
|
#print_colors(bldf[['blacklisted-words']])
|
|
bldf[['blacklisted-words']].iterrows()
|
|
rows2delete= [] # it is an empty list at first
|
|
for i,j in csvdf.iterrows():
|
|
#print_colors("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
|
|
#print_colors("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
|
|
#row=uvdf.iloc[[i]] #it displays the index
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors(row)
|
|
#print_colors(i,row)
|
|
|
|
|
|
|
|
################################ SANITY CHECKS ####################################
|
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
#print_colors("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score']))
|
|
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
#mark the row for deletion as it has invalid inputs
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it has invalid inputs")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
for k,l in bldf.iterrows():
|
|
#print_colors("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words'])
|
|
blword=bldf.at[k, 'blacklisted-words']
|
|
if any(blword in str(x) for x in row) == True:
|
|
#print_colors("found blacklisted word! marking row for deletion")
|
|
if i not in rows2delete:
|
|
print_colors("Marking row", i,"for deletion, as it matches with a blacklisted word")
|
|
rows2delete.append(i) #mark the row for deletion if not already done
|
|
|
|
for i in rows2delete:
|
|
row=csvdf.loc[i,:].values.tolist()
|
|
print_colors('[+] REMOVING ROW :',i,row)
|
|
csvdf.drop(i, inplace= True)
|
|
csvdf.to_csv(csvfilepath, index=False)
|
|
|
|
|
|
|
|
# TODO find the list of all csv files (in www/participants/*/*.csv) (templates should remain empty by default)
|
|
# copy what was done in option 4, to :
|
|
# delete the ones that have invalid entries
|
|
# mark the sensitive rows as sensitive
|
|
# delete the rows that match with blacklisted words
|
|
case _:
|
|
print_colors("[-] Exiting")
|
|
return True
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|