darknet-lantern/scripts/darknet_exploration.py

import os, pwd, re, pandas as pd, requests, shutil
#apt install python3-pandas python3-requests python3-socks
def main():
	proxies = {
		'http': 'socks5h://127.0.0.1:9050',
		'https': 'socks5h://127.0.0.1:9050'
	}

	rootpath='/srv/darknet-onion-webring/'
	urlpath=pwd.getpwuid(os.getuid()).pw_dir+"/.darknet_participant_url"
	#print(urlpath)


	# check if ~/.darknet_participant_url exists,
	# if exists, instance= the content of ~/.darknet_participant_url (which is the url: such as uptime.nowherejez...onion)
	isitvalid="n"
	while isitvalid != "y":
		if os.path.isfile(urlpath):
			with open(urlpath) as f:
				instance = f.read().rstrip()
			# check if the instance URL domain is valid
			#print(urlpath,instance)
			if IsOnionValid(instance):
				print("[+] Instance Name:",instance,IsOnionValid(instance))
			else:
				print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
				return False
			instancepath=rootpath+'www/participants/'+instance
			templatepath=rootpath+'templates/'
			verifiedcsvfile=instancepath+'/verified.csv'
			unverifiedcsvfile=instancepath+'/unverified.csv'
			# check if instancepath exists, if not then create the directory
			if not os.path.exists(instancepath):
				os.makedirs(instancepath)
			# check if all the required csv files exist in it, otherwise copy them from the templates directory
			# NOTE : the templates files are EMPTY by default, this is because i want each peer to manually review lists of links, and links themselves manually, do not allow malicious links to slip through without intentional edits from the peer themselves.
			for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
				filepath=instancepath+'/'+i
				if not os.path.isfile(filepath):
					# copy templates/ FILE.CSV to instancepath/ FILE.CSV
					src=templatepath+i
					shutil.copyfile(src, filepath)
			# now that they exist, get vdf and uvdf
			vdf = pd.read_csv(verifiedcsvfile)
			uvdf = pd.read_csv(unverifiedcsvfile)
			print("[+] file exists, your Webring URL is", instance)
			isitvalid = "y"
		else:
			print("[+] Instance Path doesn't exist yet")
			# and ask for the instance URL domain
			instance = input("What is your Instance domain ? (ex: uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion):")
			instancepath=rootpath+'www/participants/'+instance
			# check if the instance URL domain is valid
			if IsOnionValid(instance):
				print("[+] Instance Name: ",instance,IsUrlValid(instance))
			else:
				print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
				return False


			# ask the user if the instance URL is valid ?
			print()
			print(instance)
			isitvalid=input("Is this your this your instance domain ? (y/n)")
			# if yes, then write it into ~/.darknet_participant_url
			if isitvalid == "y" :
				print("OK writing the instance url to ~/.darknet_participants_url")
				with open(urlpath, "w") as file:
					file.write(instance)
				print("[+] file written, let's read it")
				f = open(urlpath,"r")
				print(f.read())
				print("[+] Initial Setup Completed!")
	while True:
		print("[+] Welcome to the Darknet Onion Webring, where you are exploring the Darknet and helping others do the same.")
		print("""
Managing Websites:
 1) Add a new Website entry (into unverified.csv)
 2) Trust a Website entry (move an entry from unverified to verified.csv)
 3) Untrust a Website entry (move an entry from unverified to verified.csv)

Managing Webring Participants:
 4) Add a new webring participant (and download their files into their directory (without trusting them yet!))
 5) Trust a webring participant (Potentially dangerous)
 6) Untrust a webring participant
 7) Remove a webring participant

Managing Wordlists:
 8) Add/Remove words or links in the sensitive list (ex: drug)
 9) Add/Remove words or links in the blacklist (ex: porn)
 0) Exit
		""")
		option = input("Select Option? (0-6): ")
		print(option)
		match option:


########## MANAGING WEBSITE ENTRIES #################
#Websites:
# 1) Add a new Website entry (into unverified.csv)
# 2) Trust a Website entry (move an entry from unverified to verified.csv)
# 3) Untrust a Website entry (move an entry from unverified to verified.csv)
#####################################################

			case "1":
				print("\n[+] Add a new Website entry (into unverified.csv)")
				name=''
				while(IsNameValid(name) is not True):
				    name = input("What is the Website name ? ")
				category=''
				while(IsCategoryValid(category) is not True):
				    category = input("What is the website Category ? ")
			    # the url of the website (required) + check if its valid
			    #entry_url = input("What is URL of the Website ? (ex: https://torproject.org or http://2gzyxa5ihm7nsggfxnu52rck2vv4rvmdlkiu3zzui5du4xyclen53wid.onion)")
				url=''
				while(IsUrlValid(url) is not True):
					url=input("What is the website URL ? ")

			    # a quick description (optional) + check if its valid
			    #entry_desc = input("(Optional) Description of the website ? (max 256 characters) (press enter to skip)")
				#desc="This is a new website that we add, it has this description"
				desc='DEFAULT'
				while(IsDescriptionValid(desc) is not True):
					desc=input("Description for the website ? (Optional)")
			    # sensitive ? (y/n) + check if its valid
			    #entry_sensi = input("is it a sensitive website ? (ex: website related to drugs) (y/n)")
				sensi = ''
				while(IsStatusValid(sensi) is not True):
					sensi=input("Is this website sensitive (ex: related to drugs) ? (y/n)")

				newrow=[instance,category,name,url,sensi,desc,'','']
				print("[+] NEWROW=",newrow)
				# (rest is automatic: status, score, instance is = '' because it is your own instance)
				# check if the entry doesn't already exist in verified.csv and in unverified.csv
				# if it doesnt exist, add it into unverified.csv
				uvdf.loc[-1] = newrow  # adding a row
				uvdf.index = uvdf.index + 1  # shifting index
				uvdf = uvdf.sort_index()  # sorting by index
				print("[+] New row added! now writing the csv file:")
				uvdf.to_csv(unverifiedcsvfile, index=False)

			case "2":
				print("[+] Trust a Website entry (move an entry from unverified to verified.csv)")
				# search for a word
				name=''
				while(IsNameValid(name) is not True):
				    name = input("What is the Website name you want to trust ? (ex: Nowhere)")
				filter_uvdf = uvdf[uvdf.Name.str.contains(name)]
				# and display only the matching entries in unverified.csv in an array format (display it in CLI).
				print(filter_uvdf[['Name','URL']])
				# check if there are no results, dont proceed if there are none!
				if filter_uvdf.size == 0:
					print("ERROR no results, skipping.")
				else:
					# Each of the rows has an index,
					index=-1
					while (index not in filter_uvdf.index):
						# prompt the user to ask for with row they want to move to verified.csv
						index = int(input("What is the index of the entry that you want to move to verified.csv ? (ex: 3) "))
					#  once selected, it must be able to SAVE and print that row:
					print(uvdf.iloc[index].values)
					newrow=uvdf.iloc[index].values


					#  append it into verified.csv
					vdf.loc[-1] = newrow  # adding a row
					vdf.index = vdf.index + 1  # shifting index
					vdf = vdf.sort_index()  # sorting by index
					vdf.to_csv(verifiedcsvfile, index=False)
					print("[+] New row added to verified.csv! now writing to the csv")


					#  remove it from unverified.csv
					uvdf.drop(index, inplace= True)
					uvdf.to_csv(unverifiedcsvfile, index=False)
					print("[+] Link is now moved to verified.csv!")

			case "3":
				print("[+] Untrust a Website entry (move an entry from verified to unverified.csv)")
				# search for a word
				name=''
				while(IsNameValid(name) is not True):
				    name = input("What is the Website name you want to untrust ? (ex: BreachForums)")
				filter_vdf = vdf[vdf.Name.str.contains(name)]
				# and display only the matching entries in unverified.csv in an array format (display it in CLI).
				print(filter_vdf[['Name','URL']])
				# check if there are no results, dont proceed if there are none!
				if filter_vdf.size == 0:
					print("ERROR no results, skipping.")
				else:
					# Each of the rows has an index,
					index=-1
					while (index not in filter_vdf.index):
						# prompt the user to ask for with row they want to move to unverified.csv
						index = int(input("What is the index of the entry that you want to move to unverified.csv ? (ex: 3) "))
					#  once selected, it must be able to SAVE and print that row:
					print(vdf.iloc[index].values)
					newrow=vdf.iloc[index].values


					#  append it into unverified.csv
					uvdf.loc[-1] = newrow  # adding a row
					uvdf.index = uvdf.index + 1  # shifting index
					uvdf = uvdf.sort_index()  # sorting by index
					uvdf.to_csv(unverifiedcsvfile, index=False)
					print("[+] New row added to unverified.csv!")


					#  remove it from verified.csv
					vdf.drop(index, inplace= True)
					vdf.to_csv(verifiedcsvfile, index=False)
					print("[+] Link is now moved to unverified.csv!")


####### MANAGING WEBRING PARTICIPANTS ###########
# 4) Add a new webring participant (and download their files into their directory (without trusting them yet!))
# 5) Trust a webring participant (Potentially dangerous)
# 6) Untrust a webring participant
# 7) Remove a webring participant
#####################################################
			case "4":
				print("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
				webring_participant_url = ''
				while(IsOnionValid(webring_participant_url) is not True):
					# ask for the url to the other webring participant and check if the (onion only) url is valid or not:
					webring_participant_url = input("What is the onion domain of the new webring participant? (ex: uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion)  ")
				# check if the directory exists locally or not,
				participantdir=rootpath+'www/participants/'+webring_participant_url
				if not os.path.isdir(participantdir): # to test on your own instance
				#if os.path.isdir(participantdir):
					# if it does, it means that the webring is ALREADY added
					print("[-] Webring Participant is already listed, skipping.")
					return False
				else:
					# if not, then proceed:
					#  print the URL to the csv files at http://URL.onion/participants/URL.onion/{verified.csv,unverified.csv,sensitive.csv,blacklist.csv,webring-participants.csv}
					basewurl='http://'+webring_participant_url+'/participants/'+webring_participant_url+'/'
					print(basewurl)
					print('[+] Checking if all of the required csv files exists for new webring participant ',webring_participant_url, ": ")
					w_vcsv=basewurl+'verified.csv'
					w_uvcsv=basewurl+'unverified.csv'
					#print(CheckUrl(w_uvcsv))
					w_blcsv=basewurl+'blacklist.csv'
					#print(CheckUrl(w_blcsv))
					w_scsv=basewurl+'sensitive.csv'
					#print(CheckUrl(w_scsv))
					w_webcsv=basewurl+'webring-participants.csv'
					#print(CheckUrl(w_webcsv))

					# verify that their verified.csv csv file exists at basewurl+'verified.csv'
					if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
						print("[-] Webring Participant is invalid, exiting.")
						return False
					else:
						print("[+] Webring Participant is valid, adding it.")
						# TODO if OK then add it to your own webring-participants.csv
						# TODO also check if the line doesn't exist yet the templates csv file !!! (search for it and if length = 0 then add it)
						# create the directory in www/participants/PARTICIPANTURL/ if it's not there already
						if not os.path.exists(participantdir):
							os.makedirs(participantdir)
						#  then download their csv files at http://URL.onion/participants/URL.onion/{verified.csv,unverified.csv,sensitive.csv,blacklist.csv,webring-participants.csv}
						# then save the csv file contents into a variable, then write it where it belongs:
						# for loop with each csv file you want:
						for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
							# FOR EACH CSV FILE TO GET:
							# URL: basewurl / FILE.CSV
							# PATH: participantdir / FILE.CSV
							print('[+] DOWNLOADING ',basewurl+i)
							# download the external csv file and save it into the "text" variable:
							#response = urllib.request.urlopen(basewurl+i)
							response = requests.get(basewurl+i, proxies=proxies)
							#data = response.read()      # a `bytes` object
							#text = data.decode('utf-8')
							text = response.text
							# save the text variable into the destination file:
							print('[+] SAVING IT INTO ',participantdir+'/'+i)
							csvfilepath=participantdir+'/'+i
							with open(csvfilepath, "w") as file:
								file.write(text)
							print("[+] file written, let's read it")
							f = open(csvfilepath,"r")
							print(f.read())
							#  TODO and remove all of the invalid entries !!!
					#######################################################################
					#newrow=[instance,category,name,url,sensi,desc,'','']
					#print("[+] NEWROW=",newrow)
					#uvdf.loc[-1] = newrow  # adding a row
					#uvdf.index = uvdf.index + 1  # shifting index
					#uvdf = uvdf.sort_index()  # sorting by index
					#print("[+] New row added! now writing the csv file:")
					#uvdf.to_csv(unverifiedcsvfile, index=False)


			case "5":
				print("[+] Trust a webring participant (Potentially dangerous)")
				#  TODO print a warning to tell the user that you're about to trust all of the links that are trusted by another peer, if that peer is malicious they may start trusting links that you don't trust! proceed? (answer is y otherwise skip)
				#  TODO list the existing webring participants from your own webring-participants.csv and
				#  TODO ask the user to pick the index (0-9?) of the instance to trust
				#  TODO if index is valid, then mark the instance as trusted in webring-participants.csv
				searchterm = input("What is the index of the entry that you want to trust ?")
			case "6":
				# TODO
				print("[+] Untrust a webring participant (safer)")
			case "7":
				# TODO
				print("[+] Remove a webring participant (in case of abuses)")
				# TODO add it to blacklist.csv
				# TODO remove it from your own webring-participants.csv
				# TODO remove it from the template webring-participants.csv


################### MANAGING WORDLISTS #################
#Wordlists:
# 8) Add/Remove words in the sensitive list (ex: drug)
# 9) Add/Remove words in the blacklist (ex: porn)
# 0) Exit
#########################################################

			case "8":
				print("[+] Add/Remove words in the sensitive list (ex: drug)")
				print("do you want to 1) add words  or 2) remove words ?")
				option="0"

				done = False
				while(done == False):
					while option != "1" and option != "2" and option != "exit":
						option=input("do you want to 1) add words  or 2) remove words ? (type exit to exit)")
					#  TODO display the contents of sensitive.csv file
					if option == 1:
						word=input("which word do you want to add? (write 0 to exit")
						if word == "0":
							done = True
							#True to get out of the while loop
						else:
							print("checking if word is valid")
							# TODO check if word is valid in a while loop (dont check if
							#  TODO if invalid! remove word at index
					else:
						index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)")
						if index == "exit":
							done = True
							#True to get out of the while loop
						else:
							print("checking if index is valid")
							# TODO check if index is valid or not
								# TODO  if valid! remove word at index
								# TODO  if invalid! just pass to ask for another word

			case "9":
				print("[+] Add/Remove words in the blacklist list (ex: porn)")
				# TODO  copy option 5
				# TODO  print("do you want to 1) add words  or 2) remove words ?")
					# TODO  display the contents of blacklist.csv file

			case _:
				print("[-] Exiting")
				return True


#### Checking Functions to validate that links are legit ####

def CheckUrl(url):
	"""
	Checks if URL is actually reachable via Tor
	"""
	proxies = {
		'http': 'socks5h://127.0.0.1:9050',
		'https': 'socks5h://127.0.0.1:9050'
	}
	try:
		status = requests.get(url,proxies=proxies, timeout=5).status_code
		print('[+]',url,status)
		if status != 502:
			print(url,"✔️")
			return True
		else:
			print(url,"❌")
			return False
	except requests.ConnectionError as e:
		print(url,"❌")
		return False
	except requests.exceptions.ReadTimeout as e:
		print(url,"❌")
		return False

#### PROTECTIONS AGAINST MALICIOUS CSV INPUTS ####

def IsOnionValid(url: str)-> bool:
    """
    Checks if the domain(param) is a valid onion domain and return True else False.
    """
        # check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
                # check that it is only url.onion or subdomain.url.onion,
                        #  if OK return True
        #if not : return False
    try:
        pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
        url = url.strip().removesuffix('/')
        if url.startswith('http://'):
            print('URL starts with http')
            # Removes the http://
            domain = url.split('/')[2]
            if pattern.fullmatch(domain) is not None:
                if len(domain.split('.')) > 3:
                    n_subdomians = len(domain.split('.'))
                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
                    print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
                    return False
                else:
                    if len(domain) < 62:
                        print("Domain length is less than 62.")
                        return False
                    return True
            elif pattern.fullmatch(domain) is None:
                print("Domain contains invalid character.")
                print(domain)
                return False
            else:
                print("Domain not valid")
                return False
        else:
            #print("URL doesn't start http")
            if pattern.fullmatch(url) is not None:
                if len(url.split('.')) > 3:
                    n_subdomians = len(url.split('.'))
                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
                    print(f"This domain have more than one subdomain. There are {n_subdomians - 1} subdomains")
                    return False
                else:
                    if len(url) < 62:
                        print("Domain length is less than 62.")
                        return False
                    return True
            elif pattern.fullmatch(url) is None:
                print("Domain contains invalid character.")
                print(url)
                return False
            else:
                print("Domain not valid")
                return False
    except Exception as e:
        print(f"Error: {e}")


def IsUrlValid(url:str)->bool:
        """
        Check if url is valid both dark net end clearnet.
        """
        # check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
                # check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
                        #  if OK return True
        #if not : return False
        pattern  = re.compile("^[A-Za-z0-9:/.]+$")
        if url.endswith('.onion'):
                return IsOnionValid(url)
        else:
                if not url.__contains__('.'):
                        print("No (DOT) in clearnet url")
                        return False
                if pattern.fullmatch(url) is None:
                        print('Url contains invalid chars')
                        return False
                return True

def IsStatusValid(status: str)-> bool:
        """
        Checks if status contains only [v,w]. Verbose only if False is returned
        """
        # check if the characters are only [vx] with maximum 1 chars max
                #  if OK return True
        #if not : return False
        pattern = ['y','n']
        if len(status) != 1:
                print("Got more than one character or nothing.")
                return False
        elif (status not in pattern):
                print("Got an invalid character it must be either y or n")
                return False

        return True

def IsDescriptionValid(desc:str)->bool:
        """
        Check the categories are only [a-zA-Z0-9,.' ] with 256 max chars.
        """
        # check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max
                #(careful with the ' and , make sure you test if it fucks the csv up or else)
                #  if OK return True
        #if not : return False
        pattern = re.compile("^[A-Za-z0-9-.,' ]+$")
        desc.strip()
		# empty description is fine as it's optional
        if desc == "DEFAULT":
                return False
        elif len(desc) > 256:
                print("desc is greater than 256 chars")
                return False
        return True

def IsCategoryValid(categories: list)-> bool:
        """
        Check the categories are only [a-zA-Z0-9 ] with 64 max chars.
        """
        # check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max
                #(careful with the ' and , make sure you test if it fucks the csv up or else)
                #  if OK return True
        #if not : return False
        pattern = re.compile("^[A-Za-z0-9 ]+$")
        for category in categories:
                category.strip()
                if pattern.fullmatch(category) is None:
                        #print('Got an empty list or invalid chars')
                        return False
                elif len(category) > 64:
                        print('Category is too long')
                else:
                       return True

def IsNameValid(name: str)->bool:
        """
        Check the parameter name only contains [a-zA-Z0-9 ] and is 64 chars long.
        """
        # check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max
                #(careful with the ' and , make sure you test if it fucks the csv up or else)
                #  if OK return True
        #if not : return False
        pattern = re.compile("^[A-Za-z0-9 ]+$")
        name = name.strip()
        if (pattern.fullmatch(name) is None):
                #print("Got an invalid character or nothing")
                return False
        elif len(name) > 64:
                print(f'Got a name lenght greater than 64. {len(name)}')
                return False
        return True


if __name__ == '__main__':
	main()