darknet-lantern/scripts/uptimechecker.py

import os,pwd,re
import csv
import requests
import json
import pandas as pd
import glob

#apt install python3-pandas python3-requests python3-socks

def main():
	print('[+] ONION UPTIME CHECKER')

	# TODO get the instance name and exit if its not there
	rootpath='/srv/darknet-lantern/'
	urlpath=pwd.getpwuid(os.getuid()).pw_dir+"/.darknet_participant_url"
	#print(urlpath)


	# check if ~/.darknet_participant_url exists,
	# if exists, instance= the content of ~/.darknet_participant_url (which is the url: such as uptime.nowherejez...onion)
	isitvalid="n"
	while isitvalid != "y":
		if os.path.isfile(urlpath):
			with open(urlpath) as f:
				instance = f.read().rstrip()
			# check if the instance URL domain is valid
			#print(urlpath,instance)
			if IsOnionValid(instance):
				print("[+] Instance Name:",instance,IsOnionValid(instance))
				isitvalid="y"
			else:
				print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
				return False
		else:
			print("[-] Instance path doesn't exist yet, run darknet_exploration.py to set it up" )
			return False

	#i=input("continue?")
	proxies = {
		'http': 'socks5h://127.0.0.1:9050',
		'https': 'socks5h://127.0.0.1:9050'
	}

	instancepath=rootpath+'www/participants/'+instance+'/'
	csvfiles2check=['verified.csv','unverified.csv','webring-participants.csv']
	csvfiles2sortcat=['verified.csv','unverified.csv']

	#for csvfile in glob.glob("/srv/darknet-lantern/www/links/*.csv"):
	for csvfilename in csvfiles2check:
		csvfile = instancepath+csvfilename
		print('[+] Reading the CSV File:', csvfile)

		df = pd.read_csv(csvfile)
		print(df[['Name','URL']])
		print('[+] Checking if each .onion link is reachable:')
		#for i in range(df.index.stop):
		for i in df.index:
			print("[+] Editing the uptime score")
			#if empty, set to 100
			if pd.isnull(df.at[i,"Score"]):
				df.at[i,"Score"] = 100

			print(i)
			#print(df.at[i,"URL"])
			url=df.at[i,"URL"]
			try:
				index1 = url.find("http://")
				index2 = url.find("https://")
				if index1 == -1 and index2 == -1:
					url = "http://"+url
				status = requests.get(url,proxies=proxies, timeout=5).status_code
				print('[+]',url,status)
				if status != 502:
					print(url,"✔️")
					df.at[i,"Status"]="✔️"
					#if uptime <100 do +1 to the value
					if df.at[i,"Score"] < 100:
						df.at[i,"Score"] = df.at[i,"Score"] + 1
				else:
					print(url,"❌")
					df.at[i,"Status"]="❌"
					#if uptime >0  do -1 to the value
					if df.at[i,"Score"] > 0:
						df.at[i,"Score"] = df.at[i,"Score"] - 1
			except requests.ConnectionError as e:
				#print(e)
				print(url,"❌")
				df.at[i,"Status"]="❌"
				#if uptime >0  do -1 to the value
				if df.at[i,"Score"] > 0:
					df.at[i,"Score"] = df.at[i,"Score"] - 1
			except requests.exceptions.ReadTimeout as e:
				#print(e)
				print(url,"❌")
				df.at[i,"Status"]="❌"
				#if uptime >0  do -1 to the value
				if df.at[i,"Score"] > 0:
					df.at[i,"Score"] = df.at[i,"Score"] - 1

			#sort by category if you are verified/unverified.csv
			if csvfilename in csvfiles2sortcat:
				df2 = df.sort_values(by=["Category","Name"], ascending=[True,True])
			else:
				df2 = df.sort_values(by="Score", ascending=False)
			df2.to_csv(csvfile, index=False)
		#print(df2)


def IsUrlValid(url:str)->bool:
        """
        Check if url is valid both dark net end clearnet.
        """
        # check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
                # check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
                        #  if OK return True
        #if not : return False
        pattern  = re.compile("^[A-Za-z0-9:/.]+$")
        url = str(url)
        if url.endswith('.onion'):
                return IsOnionValid(url)
        else:
                if not url.__contains__('.'):
                        #print("No (DOT) in clearnet url")
                        return False
                if pattern.fullmatch(url) is None:
                        #print('Url contains invalid chars')
                        return False
                return True

def IsOnionValid(url: str)-> bool:
    """
    Checks if the domain(param) is a valid onion domain and return True else False.
    """
        # check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
                # check that it is only url.onion or subdomain.url.onion,
                        #  if OK return True
        #if not : return False
    try:
        pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
        url = url.strip().removesuffix('/')
        if url.startswith('http://'):
            #print('URL starts with http')
            # Removes the http://
            domain = url.split('/')[2]
            if pattern.fullmatch(domain) is not None:
                if len(domain.split('.')) > 3:
                    n_subdomians = len(domain.split('.'))
                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
                    #print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
                    return False
                else:
                    if len(domain) < 62:
                        #print("Domain length is less than 62.")
                        return False
                    return True
            elif pattern.fullmatch(domain) is None:
                #print("Domain contains invalid character.")
                #print(domain)
                return False
            else:
                #print("Domain not valid")
                return False
        else:
            #TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
            #print("URL doesn't start http")
            if pattern.fullmatch(url) is not None:
                if len(url.split('.')) > 3:
                    n_subdomians = len(url.split('.'))
                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
                    #print(f"This domain have more than one subdomain. There are {n_subdomians - 1} subdomains")
                    return False
                else:
                    if len(url) < 62:
                        #print("Domain length is less than 62.")
                        return False
                    return True
            elif pattern.fullmatch(url) is None:
                #print("Domain contains invalid character.")
                #print(url)
                return False
            else:
                #print("Domain not valid")
                return False
    except Exception as e:
        print(f"Error: {e}")


if __name__ == '__main__':
	main()