import random import re import os import requests from PIL import Image import json #from SimpleX.utils import IsUrlValid import urllib.parse from websockets.sync.client import connect PURPLE = '\033[35;40m' BOLD_PURPLE = '\033[35;40;1m' RED = '\033[31;40m' BOLD_RED = '\033[31;40;1m' RESET = '\033[m' # name should contain only up to 64 alphanumeric characters VALID_NAME_PATTERN = re.compile(r"^[A-Za-z0-9]{1,64}$") # pattern for regular urls # TODO: this is very simplified pattern URL_PATTERN = re.compile(r"^[A-Za-z0-9:\/\._%-=#?&@]+$") # pattern for onion urls (56 bytes of base32 alphabet + .onion) ONION_URL_PATTERN = re.compile(r"^(\w+:)?(?:\/\/)?(\w+\.)?[a-z2-7]{56}\.onion") def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False): """ Helper function to print with colors """ if is_error: print(f"{RED}{s}{RESET}") elif bold: print(f"{BOLD_PURPLE}{s}{RESET}") elif is_error and bold: print(f"{BOLD_RED}{s}{RESET}") elif default: print(f'{s}') else: print(f"{PURPLE}{s}{RESET}") #### Checking Functions to validate that links are legit #### def CheckUrl(url): """ Checks if URL is actually reachable via Tor """ proxies = { 'http': 'socks5h://127.0.0.1:9050', 'https': 'socks5h://127.0.0.1:9050' } try: status = requests.get(url, proxies=proxies, timeout=5).status_code return bool(status == 200) except requests.ConnectionError: return False except requests.exceptions.ReadTimeout: return False #### PROTECTIONS AGAINST MALICIOUS CSV INPUTS #### def IsBannerValid(path: str) -> bool: """ Checks if the banner.png file has the correct dimensions (240x60) """ try: im = Image.open(path) except Exception: print("ERROR, EXCEPTION") return False width, height = im.size if width != 240 or height != 60: print("INVALID BANNER DIMENSIONS, HEIGHT=", height, " WIDTH=", width) return False filesizeMB = os.path.getsize(path)/1024/1024 if filesizeMB > 5: print("Banner filesize too large (>5Mb): ",os.path.getsize(path)/1024/1024,"MB") return False return True def IsOnionValid(url: str) -> bool: """ Checks if the domain(param) is a valid onion domain and return True else False. """ try: # make sure the protocol is there if not url.startswith(('http://', 'https://')): url = 'http://' + url.strip().removesuffix('/') domain = url.split('/')[2] if ONION_URL_PATTERN.fullmatch(domain): parts_count = len(domain.split('.')) # TODO: we probably don't really need to check 62 char length # regex does that beforehand return (len(domain) == 62) and (parts_count <= 3) except Exception: return False def IsSimpleXChatroomValid(url: str) -> bool: """Validate the SimpleX chatroom URL.""" REQUIRED_SUBSTRING = "#/?v=2-7&smp=smp%3A%2F" # Step 1: Check if it starts with http://, https://, or simplex:/ if url.startswith(('http://', 'https://', 'simplex:/')): # Step 1.5: If http:// or https://, check for valid clearnet or onion domain if url.startswith(('http://', 'https://')) \ and RecognizeUrlOnionClear(url) != 'invalid': return False elif not url.startswith('simplex:/'): return False # Must start with one of the valid protocols # Step 2: Check for the presence of the required substring if REQUIRED_SUBSTRING not in url: return False # Required substring not found # Step 3: Extract the part after "smp=smp%3A%2F" smp_start = url.find("smp=smp%3A%2F") if smp_start == -1: return False # Required substring not found smp_start += len("smp=smp%3A%2F") smp_end = url.find("&", smp_start) if smp_end == -1: smp_end = len(url) # Take until the end if no "&" is found smp_value = urllib.parse.unquote(url[smp_start:smp_end]) # Decode the URL-encoded string # Step 3.5: Check if the smp_value contains a valid hostname if '@' not in smp_value: return False # Must contain '@' to separate fingerprint and hostname fingerprint, hostname = smp_value.split('@', 1) if RecognizeUrlOnionClear(hostname) != 'invalid': return False # Invalid hostname # Step 4: Check for the presence of "%2F" in the original URL if "%2F" not in url: return False # Required substring not found # If all checks pass, return True return True def RecognizeUrlOnionClear(url: str) -> str: """ Recognize if the URL is invalid, onion or clearnet. """ # early terminate preconditions if len(url) < 4 or (';' in url) or ('.' not in url): return 'invalid' # check if possibly onion url, here just perliminary check # IsOnionValid checks it against regex expression if '.onion' in url: if IsOnionValid(url): return 'onion' if URL_PATTERN.fullmatch(url): return 'clearnet' return 'invalid' def RecognizeUrlFull(url: str) -> str: """ Recognize if URL is smp, xftp, simplex groupchat, onion, clearnet or just invalid Depends on RecognizeUrlOnionClear """ if IsSimpleXChatroomValid(url): return 'chatroom' if url.startswith(('http://', 'https://')): return RecognizeUrlOnionClear(url) if url.startswith('xftp://'): if IsSimpleXServerValid(url): return 'xftp' if url.startswith('smp://'): if IsSimpleXServerValid(url): return 'smp' return 'invalid' #def IsUrlValid(url:str)->bool: # """ # Check if url is valid both dark net end clearnet. # """ # pattern = re.compile("^[A-Za-z0-9:/.-]+$") # url = str(url) # if len(url) < 4: # return False # if url.endswith('.onion'): # return IsOnionValid(url) # else: # if not url.__contains__('.'): # return False # if pattern.fullmatch(url) is None: # return False # return True def IsStatusValid(status: str) -> bool: """ Checks if status contains only ['YES','NO']. Verbose only if False is returned """ pattern = ['YES','NO','✔️','❌',''] status = status.strip() if status not in pattern: return False return True def IsScoreValid(score: str) -> bool: """ Check the Score is only "^[0-9.,]+$" with 8 max chars. """ pattern = re.compile("^[0-9.,]+$") score = str(score) score.strip() if score in ['','nan']: return True if pattern.fullmatch(score) is None: return False if len(score) > 8: return False return True def IsDescriptionValid(desc:str)->bool: """ Check the categories are only [a-zA-Z0-9.' ] with 256 max chars. """ if desc == "": return True pattern = re.compile(r"^[A-Za-z0-9-.,' \"\(\)\/]+$") desc = str(desc) desc.strip() if pattern.fullmatch(desc) is None: return False if desc == "DEFAULT": return False elif len(desc) > 256: return False return True def IsCategoryValid(categories: list[str]) -> bool: """ Check the categories are only [a-zA-Z0-9 ] with 64 max chars. """ pattern = re.compile("^[A-Za-z0-9 ]+$") for category in categories: category.strip() if pattern.fullmatch(category) is None: return False elif len(category) > 64: return False else: return True def IsSimpleXServerValid(url: str) -> bool: pattern = re.compile('[0-9A-Za-z-_]*') url = url.strip() try: if url.startswith(('smp://', 'xftp://')): # Remove the protocol part proless = url.split('//', 1)[-1] # Split the fingerprint and hostname parts = proless.split('@') if len(parts) != 2: return False # Must have exactly one '@' character fingerprint = parts[0] hostname = parts[1].split(',')[0] # Get the hostname before any comma # Check fingerprint length and pattern if len(fingerprint) == 44 and pattern.match(fingerprint): # Validate the hostname if RecognizeUrlOnionClear(hostname) != 'invalid': # Check for an optional comma and a valid onion domain if ',' in proless: onion_part = proless.split(',')[1].strip() if RecognizeUrlOnionClear(onion_part) != 'invalid': return False return True return False except Exception as e: print(e) # Any error will be a false return False def IsNameValid(name: str) -> bool: """ Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long. """ try: return bool(VALID_NAME_PATTERN.fullmatch(name.strip())) except Exception: return False def send_server_checks(url: str) -> tuple[str, str, str]: """ Sends requests to sxc websocket and retuns response, response type and testFailure or None. """ with connect(f"ws://localhost:3030") as websocket: query = f"/_server test 1 {url}" command = { 'corrId': f"id{random.randint(0,999999)}", 'cmd': query, } websocket.send(json.dumps(command)) message = websocket.recv() response = json.loads(message) resp_type = response["resp"]["type"] failed_response = response['resp'].get('testFailure') return (response, resp_type, failed_response)