Merge pull request 'Better Check URL function' (#74) from better-check-url into main

Reviewed-on: http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern/pulls/74
2025-07-01 22:46:42 +00:00 · 2025-05-30 17:19:29 +02:00 · 2025-05-30 17:19:29 +02:00 · 4962684603
commit 4962684603
parent d623fb54b9 8a01796db9
5 changed files with 300 additions and 498 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 www/participants/**
 crawler/**
 scripts/__pycache__/**
 scripts/_*.py
 .env
 env/
 submissions/submission.csv
--- a/scripts/crawler.py
+++ b/scripts/crawler.py
@ -11,8 +11,10 @@ import re
 import sys
 sys.path.append("..")
-from utils import print_colors, IsUrlValid
+from utils import (
-from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
+    print_colors, IsURLValid, IsSimplexChatroomValid, RecognizeURLType
 )
 #from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
 from dotenv import load_dotenv
 # Make default parameters for arguments
@ -107,12 +109,12 @@ def add_urls(urls):
    global output_file
    for url in urls:
        parsed_url = urllib.parse.urlparse(url)
-        if IsSimpleXChatroomValid(url) and not (output_file['URL'] == url).any():
+        if IsSimplexChatroomValid(url) and not (output_file['URL'] == url).any():
            output_file.loc[-1] = ["", url, "", "SimpleX Chatroom"]
            output_file.index += 1
            output_file = output_file.sort_index()
            continue
-        elif IsSimpleXServerValid(url) and not (output_file['URL'] == url).any():
+        elif RecognizeURLType(url) in ('smp', 'xftp') and not (output_file['URL'] == url).any():
            output_file.loc[-1] = ["", url, "", "SimpleX Server"]
            output_file.index += 1
            output_file = output_file.sort_index()
@ -164,13 +166,13 @@ def extract_urls_html(url, text):
        print_colors(f'[D] Joined URL: {joined_url}')
        # Capture SimpleX URLs
-        if IsSimpleXChatroomValid(joined_url) or IsSimpleXServerValid(joined_url):
+        if RecognizeURLType(joined_url) in ('smp', 'xftp', 'chatroom'):
            if url not in result.thirdp_urls:
                result.thirdp_urls.append(joined_url)
            continue
        # Check if the URL is a .onion link or not even a web link
-        if not IsUrlValid(joined_url):
+        if not IsURLValid(joined_url):
            continue
        print_colors(f'[+] Found url: {joined_url}')
@ -266,4 +268,3 @@ for i, url in enumerate(vcsv_urls):
    crawl_url(url)
    crawler_file.to_csv(args.crawler_file, index=False)
    output_file.to_csv(args.output, index=False)
--- a/scripts/lantern.py
+++ b/scripts/lantern.py
@ -50,9 +50,6 @@ def main():
                os.makedirs(participantdir)
    print_colors("""
 ;
 ED.
@ -94,8 +91,8 @@ def main():
        if os.path.isfile(urlpath):
            with open(urlpath) as f:
                instance = f.read().rstrip()
-            if IsOnionValid(instance):
+            if IsOnionLinkValid(instance):
-                print_colors(f"[+] Instance Name: {instance}. Valid:{IsOnionValid(instance)}")
+                print_colors(f"[+] Instance Name: {instance}. Valid:{IsOnionLinkValid(instance)}")
                break
            else:
                print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}',is_error=True )
@ -104,8 +101,8 @@ def main():
            print_colors("[+] Instance Path doesn't exist yet")
            print_colors(f"Your url will be saved here {urlpath}")
            instance = input("What is your Instance domain?(ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion): ")
-            if IsOnionValid(instance):
+            if IsOnionLinkValid(instance):
-                print_colors(f"[+] Instance Name: {instance}. Valid: {IsUrlValid(instance)}")
+                print_colors(f"[+] Instance Name: {instance}. Valid: {IsOnionLinkValid(instance)}")
                instancepath=rootpath+'www/participants/'+instance
            else:
                print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}', is_error=True )
@ -212,9 +209,9 @@ Maintenance:
                    while(IsCategoryValid(category) is not True):
                        category = input("What is the website Category? ")
                    # the url of the website (required) + check if its valid
-                    url=''
+                    url = ''
-                    while(IsUrlValid(url) is not True and IsSimpleXChatroomValid(url) is not True):
+                    while not IsURLValid(url):
-                        url=input("What is the website URL ? ")
+                        url = input("What is the website URL ? ")
                    # a quick description (optional) + check if its valid
                    desc='DEFAULT'
@ -247,7 +244,7 @@ Maintenance:
                            uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
                            print_colors("[+] New row added! now writing the csv file")
                        else:
-                            print("Adding new row in verified.csv since descriptioln is not empty")
+                            print("Adding new row in verified.csv since description is not empty")
                            vdf.loc[-1] = newrow  # adding a row
                            vdf = vdf.sort_values(by=["Category","Score"], ascending=[True,False])  # sorting categories
                            print_colors("[+] New row added! now writing the csv file")
@ -458,7 +455,7 @@ Maintenance:
                                                value = input("What is the new name of the website? ")
                                            vdf.at[index,'Name']=value
                                        elif i == 3: # column URL
-                                            while(IsUrlValid(value) is not True or value == ''):
+                                            while(IsURLValid(value) is not True or value == ''):
                                                value = input("What is the new URL of the website? ")
                                            vdf.at[index,'URL']=value
                                        elif i == 4: # column Sensitive
@ -504,7 +501,7 @@ Maintenance:
                                                value = input("What is the new name of the website? ")
                                            uvdf.at[index,'Name']=value
                                        elif i == 3: # column URL
-                                            while(IsUrlValid(value) is not True or value == ''):
+                                            while(IsURLValid(value) is not True or value == ''):
                                                value = input("What is the new URL of the website? ")
                                            uvdf.at[index,'URL']=value
                                        elif i == 4: # column Sensitive
@ -655,8 +652,9 @@ Maintenance:
                                        csvdf.at[i, 'Sensitive'] = "NO" 
                                        csvdf.to_csv(csvfilepath, index=False)
                                    print('sync:::', csvdf.at[i, 'Instance'])
                                    ### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
-                                    if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
+                                    if IsURLValid(str(csvdf.at[i, 'Instance'])) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
                                        #mark the row for deletion as it has invalid inputs
                                        if i not in rows2delete:
                                            print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
@ -788,7 +786,7 @@ Maintenance:
            case 5:
                print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
                webring_participant_url = ''
-                while(IsOnionValid(webring_participant_url) is not True):
+                while(IsOnionLinkValid(webring_participant_url) is not True):
                    webring_participant_url = input("What is the onion domain of the new webring participant? (ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion)  ")
                participantdir=rootpath+'www/participants/'+webring_participant_url
                if os.path.isdir(participantdir):
@ -892,7 +890,7 @@ Maintenance:
                                    csvdf.to_csv(csvfilepath, index=False)
                                ### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
-                                if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
+                                if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
                                    #mark the row for deletion as it has invalid inputs
                                    if i not in rows2delete:
                                        print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
@ -1072,7 +1070,7 @@ Maintenance:
                                    break
                                else:
                                    print_colors("[+] checking if the Word/URL is valid: ")
-                                    if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
+                                    if IsURLValid(word) or IsDescriptionValid(word):
                                        print_colors('[+] Word/URL is valid, adding the word into the sensitive wordlist')
                                        newrow=[word]
                                        print_colors(f"[+] NEWROW= {newrow}")
@ -1141,7 +1139,7 @@ Maintenance:
                                    break
                                else:
                                    print_colors("[+] Checking if the Word/URL is valid: ")
-                                    if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
+                                    if IsURLValid(word) or IsDescriptionValid(word):
                                        print_colors('[+] Word/URL is valid, adding the word into the blacklist')
                                        newrow=[word]
                                        print_colors(f"[+] NEWROW= {newrow}")
@ -1254,11 +1252,11 @@ Maintenance:
                                csvdf.to_csv(csvfilepath, index=False)
                            ### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
-                            if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
+                            if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
                                if i not in rows2delete:
                                    print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
                                    #print_colors(f"{row}")
-                                    print(IsUrlValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsUrlValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']),  IsScoreValid(csvdf.at[i, 'Score']))
+                                    print(IsURLValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsURLValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']),  IsScoreValid(csvdf.at[i, 'Score']))
                                    rows2delete.append(i)
                                    read=input("Continue?")
@ -1360,9 +1358,6 @@ Maintenance:
                                print_colors("Invalid Number",is_error=True)
                                continue
                except Exception as e:
                    print_colors(f'Try again {e}',is_error=True)
                    break 
@ -1371,7 +1366,6 @@ Maintenance:
                    print_colors("No more submissions to review, exiting.")
                    break 
            case 12:
                # review the crawled websites 
                try:       
@ -1482,11 +1476,6 @@ Maintenance:
                                print_colors("Invalid Number",is_error=True)
                                continue
                except Exception as e:
                    print_colors(f'Try again {e}',is_error=True)
                    break
--- a/scripts/uptimechecker.py
+++ b/scripts/uptimechecker.py
@ -8,7 +8,7 @@ import requests
 import json
 import pandas as pd
 import glob
-from utils import IsSimpleXServerValid, send_server_checks
+from utils import RecognizeURLType, IsOnionLinkValid, send_server_checks
@ -46,8 +46,8 @@ def main():
                    with open(urlpath) as f:
                            instance = f.read().rstrip()
                    # check if the instance URL domain is valid
-                    if IsOnionValid(instance):
+                    if IsOnionLinkValid(instance):
-                            print("[+] Instance Name:",instance,IsOnionValid(instance))
+                            print("[+] Instance Name:",instance,IsOnionLinkValid(instance))
                            isitvalid="y"
                    else:
                            print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
@ -87,44 +87,42 @@ def main():
                        index1 = url.find("http://")
                        index2 = url.find("https://")
-                        if url.startswith("smp://") or url.startswith("xftp://"):
+                        urltype = RecognizeURLType(url)
-                            if IsSimpleXServerValid(url):
+                        if urltype == 'smp': 
-                                if url.startswith("smp"): 
+                            resp,resp_type,failed_response = send_server_checks(url)
                                    resp,resp_type,failed_response = send_server_checks(url)
-                                    if resp_type in ["chatError", "contactSubSummary"]:
+                            if resp_type in ["chatError", "contactSubSummary"]:
-                                        resp, resp_type,failed_response = send_server_checks(url)
+                                resp, resp_type,failed_response = send_server_checks(url)
-                                    if failed_response is None:
+                            if failed_response is None:
-                                        print(url, "✔️")
+                                print(url, "✔️")
-                                        df.at[i, "Status"]="YES"
+                                df.at[i, "Status"]="YES"
-                                        if df.at[i, "Score"] < 100:
+                                if df.at[i, "Score"] < 100:
-                                            df.at[i,"Score"] = df.at[i,"Score"] + 1
+                                    df.at[i,"Score"] = df.at[i,"Score"] + 1
-                                    else:
+                            else:
-                                        print(url,"❌")
+                                print(url,"❌")
-                                        df.at[i,"Status"]="NO"
+                                df.at[i,"Status"]="NO"
-                                        #if uptime >0  do -1 to the value
+                                #if uptime >0  do -1 to the value
-                                        if df.at[i,"Score"] > 0:
+                                if df.at[i,"Score"] > 0:
-                                            df.at[i,"Score"] = df.at[i,"Score"] - 1
+                                    df.at[i,"Score"] = df.at[i,"Score"] - 1
-                                else:
+                        elif urltype == 'xftp':
-                                    resp,resp_type,failed_response = send_server_checks(url)
+                            resp,resp_type,failed_response = send_server_checks(url)
-                                    if resp_type in ["chatError", "contactSubSummary"]:
+                            if resp_type in ["chatError", "contactSubSummary"]:
-                                        resp, resp_type,failed_response = send_server_checks(url)
+                                resp, resp_type,failed_response = send_server_checks(url)
                                    if failed_response is None:
                                        print(url, "✔️")
                                        df.at[i, "Status"]="YES"
                                        if df.at[i, "Score"] < 100:
                                            df.at[i,"Score"] = df.at[i,"Score"] + 1
                                    else:
                                        print(url,"❌")
                                        df.at[i,"Status"]="NO"
                                        #if uptime >0  do -1 to the value
                                        if df.at[i,"Score"] > 0:
                                            df.at[i,"Score"] = df.at[i,"Score"] - 1
                            if failed_response is None:
                                print(url, "✔️")
                                df.at[i, "Status"]="YES"
                                if df.at[i, "Score"] < 100:
                                    df.at[i,"Score"] = df.at[i,"Score"] + 1
                            else:
                                print(url,"❌")
                                df.at[i,"Status"]="NO"
                                #if uptime >0  do -1 to the value
                                if df.at[i,"Score"] > 0:
                                    df.at[i,"Score"] = df.at[i,"Score"] - 1
                        else:
                            if index1 == -1 and index2 == -1:
@ -145,14 +143,14 @@ def main():
                                if df.at[i,"Score"] > 0:
                                    df.at[i,"Score"] = df.at[i,"Score"] - 1
-                    except requests.ConnectionError as e:
+                    except requests.ConnectionError:
                        #print(e)
                        print(url,"❌")
                        df.at[i,"Status"]="NO"
                        #if uptime >0  do -1 to the value
                        if df.at[i,"Score"] > 0:
                            df.at[i,"Score"] = df.at[i,"Score"] - 1
-                    except requests.exceptions.ReadTimeout as e:
+                    except requests.exceptions.ReadTimeout:
                        #print(e)
                        print(url,"❌")
                        df.at[i,"Status"]="NO"
@ -174,79 +172,5 @@ def main():
                    df2.to_csv(csvfile, index=False)
 def IsUrlValid(url:str)->bool:
        """
        Check if url is valid both dark net end clearnet.
        """
        # check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
                # check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
                        #  if OK return True
        #if not : return False
        pattern  = re.compile("^[A-Za-z0-9:/.]+$")
        url = str(url)
        if url.endswith('.onion'):
                return IsOnionValid(url)
        else:
                if not url.__contains__('.'):
                        #print("No (DOT) in clearnet url")
                        return False
                if pattern.fullmatch(url) is None:
                        #print('Url contains invalid chars')
                        return False
                return True
 def IsOnionValid(url: str)-> bool:
    """
    Checks if the domain(param) is a valid onion domain and return True else False.
    """
        # check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
                # check that it is only url.onion or subdomain.url.onion,
                        #  if OK return True
        #if not : return False
    try:
        pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
        url = url.strip().removesuffix('/')
        if url.startswith('http://'):
            #print('URL starts with http')
            # Removes the http://
            domain = url.split('/')[2]
            if pattern.fullmatch(domain) is not None:
                if len(domain.split('.')) > 3:
                    n_subdomians = len(domain.split('.'))
                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
                    #print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
                    return False
                else:
                    if len(domain) < 62:
                        #print("Domain length is less than 62.")
                        return False
                    return True
            elif pattern.fullmatch(domain) is None:
                #print("Domain contains invalid character.")
                #print(domain)
                return False
            else:
                #print("Domain not valid")
                return False
        else:
            #TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
            #print("URL doesn't start http")
            if pattern.fullmatch(url) is not None:
                if len(url.split('.')) > 3:
                    n_subdomians = len(url.split('.'))
                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
                    return False
                else:
                    if len(url) < 62:
                        return False
                    return True
            elif pattern.fullmatch(url) is None:
                return False
            else:
                return False
    except Exception as e:
        print(f"Error: {e}")
 if __name__ == '__main__':
    main()
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -9,377 +9,264 @@ import urllib.parse
 from websockets.sync.client import connect
-PURPLE =  '\033[35;40m'
+PURPLE = '\033[35;40m'
 BOLD_PURPLE = '\033[35;40;1m'
 RED = '\033[31;40m'
 BOLD_RED = '\033[31;40;1m'
 RESET = '\033[m'
 # name should contain only up to 64 alphanumeric characters
 VALID_NAME_PATTERN = re.compile(r"^[A-Za-z0-9]{1,64}$")
 # pattern for regular urls (https://stackoverflow.com/a/3809435)
 CLEARNET_URL_PATTERN = re.compile(
    r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]"
    r"{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
 )
 # pattern for onion urls (56 bytes of base32 alphabet + .onion)
 # it works also without http(s)://, so just the hostname will also go through
 ONION_URL_PATTERN = re.compile(
    r"^(https?:\/\/)?([a-zA-Z0-9-]+\.)*[a-z2-7-]{56}\.onion[^\s]*$"
 )
 # pattern for simplex chatroom links
 SIMPLEX_CHATROOM_PATTERN = re.compile(
    r"(?:https?:\/\/(?:simplex\.chat|[^\/]+)|simplex:)\/(?:contact|invitation)#\/\?v=[\d-]+"
    r"&smp=[^&]+(?:&[^=]+=[^&]*)*(?:&data=\{[^}]*\})?"
 )
 # pattern for smp or xftp simplex server ((smp|xftp):// 44 byte key @ url [:port])
 SIMPLEX_SERVER_PATTERN = re.compile(
    r"^(smp|xftp):\/\/([a-zA-Z0-9\-_+=]{44})@([a-z2-7]{56}\.onion|"
    r"([a-zA-Z0-9\-\.]+\.[a-zA-Z0-9\-\.]+))"
    r"{1,}(?::[1-9][0-9]{0,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|"
    r"65[0-4][0-9]{2}|655[0-3][0-9]|6553[0-5])?$"
 )
 def IsSimplexChatroomValid(url: str) -> bool:
    """
    Recognizes Simplex Chatroom link.
    Returns True if URL is a SimpleX chatroom,
    False otherwise
    """
    return bool(SIMPLEX_CHATROOM_PATTERN.match(url))
 def RecognizeSimplexType(url: str) -> str:
    """
    Recognizes Simplex Server URL, returns smp, xftp or invalid
    """
    match = SIMPLEX_SERVER_PATTERN.match(url)
    if match:
        return match.group(1)
    else:
        return 'invalid'
 # stub function
 def IsXFTPServerValid(url: str) -> bool:
    """
    Returns True if URL is a valid SimpleX XFTP Server URL
    False otherwise
    """
    return RecognizeSimplexType(url) == 'xftp'
 # stub function
 def IsSMPServerValid(url: str) -> bool:
    """
    Returns True if URL is a valid SimpleX SMP Server URL
    False otherwise
    """
    return RecognizeSimplexType(url) == 'smp'
 def IsClearnetLinkValid(url: str) -> bool:
    """
    Returns True if URL is a valid clearnet URL
    False otherwise
    """
    return bool(CLEARNET_URL_PATTERN.match(url))
 def IsOnionLinkValid(url: str) -> bool:
    """
    Returns True if URL is a valid onion URL
    False otherwise
    """
    return bool(ONION_URL_PATTERN.match(url))
 def RecognizeURLType(url: str) -> str:
    """
    Recognizes URL type, can return:
    - chatroom - SimpleX chatroom
    - xftp - XFTP SimpleX server
    - smp - SMP SimpleX server
    - onion - onion URL
    - clearnet - valid clearnet url
    - invalid - none of the above (probably invalid)
    """
    # order is important here
    # (ex. simplex chatroom is also valid clearnet link)
    if IsSimplexChatroomValid(url):
        return 'chatroom'
    if IsXFTPServerValid(url):
        return 'xftp'
    if IsSMPServerValid(url):
        return 'smp'
    if IsOnionLinkValid(url):
        return 'onion'
    if IsClearnetLinkValid(url):
        return 'clearnet'
    return 'invalid'
 def IsURLValid(url: str) -> bool:
    """
    Checks if given URL is valid (RecognizeURLType recognizes it)
    """
    return RecognizeURLType(url) != 'invalid'
 #### Checking Functions to validate that links are legit ####
 def CheckUrl(url):
-        """
+    """
-        Checks if URL is actually reachable via Tor
+    Checks if URL is actually reachable via Tor
-        """
+    """
-        proxies = {
+    proxies = {
-                'http': 'socks5h://127.0.0.1:9050',
+        'http':  'socks5h://127.0.0.1:9050',
-                'https': 'socks5h://127.0.0.1:9050'
+        'https': 'socks5h://127.0.0.1:9050'
-        }
+    }
-        try:
+    try:
-                status = requests.get(url,proxies=proxies, timeout=5).status_code
+        status = requests.get(url, proxies=proxies, timeout=5).status_code
-                if status != 502:
+        return status == 200
-                        return True
+    except requests.ConnectionError:
-                else:
+        return False
-                        return False
+    except requests.exceptions.ReadTimeout:
-        except requests.ConnectionError as e:
+        return False
                return False
        except requests.exceptions.ReadTimeout as e:
                return False
 #### PROTECTIONS AGAINST MALICIOUS CSV INPUTS ####
 def IsBannerValid(path: str) -> bool:
        """
        Checks if the banner.png file has the correct dimensions (240x60)
        """
        try:
                im = Image.open(path)
        except Exception as e:
                print("ERROR, EXCEPTION")
                return False
        width, height = im.size
        if width != 240 or height != 60:
                print("INVALID BANNER DIMENSIONS, HEIGHT=",height," WIDTH=",width)
                return False
        filesizeMB=os.path.getsize(path)/1024/1024
        if filesizeMB > 5:
                print("Banner filesize too large (>5Mb): ",os.path.getsize(path)/1024/1024,"MB")
                return False
        return True
 def IsOnionValid(url: str)-> bool:
    """
-    Checks if the domain(param) is a valid onion domain and return True else False.
+    Checks if the banner.png file has the correct dimensions (240x60)
    """
    try:
-        pattern = re.compile("^[A-Za-z0-9.]+(.onion)?$")
+        im = Image.open(path)
-        url = url.strip().removesuffix('/')
+    except Exception:
-        if url.startswith('http://'):
+        print("ERROR, EXCEPTION")
-            domain = url.split('/')[2]
+        return False
-            if pattern.fullmatch(domain) is not None:
+    width, height = im.size
-                if len(domain.split('.')) > 3:
+    if width != 240 or height != 60:
-                    return False
+        print("INVALID BANNER DIMENSIONS, HEIGHT=", height, " WIDTH=", width)
-                else:
+        return False
-                    if len(domain) < 62:
+    filesizeMB = os.path.getsize(path)/1024/1024
-                        return False
+    if filesizeMB > 5:
-                    return True
+        print("Banner filesize too large (>5Mb): ",os.path.getsize(path)/1024/1024,"MB")
            elif pattern.fullmatch(domain) is None:
                return False
            else:
                return False
        else:
                        #TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
            if pattern.fullmatch(url) is not None:
                if len(url.split('.')) > 3:
                    return False
                else:
                    if len(url) < 62:
                        return False
                    return True
            elif pattern.fullmatch(url) is None:
                return False
            else:
                return False
    except Exception as e:
        return False
 def IsSimpleXChatroomValid(url: str) -> bool:
    """Validate the SimpleX chatroom URL."""
    REQUIRED_SUBSTRING = "#/?v=2-7&smp=smp%3A%2F"
    # Step 1: Check if it starts with http://, https://, or simplex:/
    if url.startswith(('http://', 'https://', 'simplex:/')):
        # Step 1.5: If http:// or https://, check for valid clearnet or onion domain
        if url.startswith(('http://', 'https://')) and not IsUrlValid(url):
            return False
    elif not url.startswith('simplex:/'):
        return False  # Must start with one of the valid protocols
    # Step 2: Check for the presence of the required substring
    if REQUIRED_SUBSTRING not in url:
        return False  # Required substring not found
    # Step 3: Extract the part after "smp=smp%3A%2F"
    smp_start = url.find("smp=smp%3A%2F")
    if smp_start == -1:
        return False  # Required substring not found
    smp_start += len("smp=smp%3A%2F")
    smp_end = url.find("&", smp_start)
    if smp_end == -1:
        smp_end = len(url)  # Take until the end if no "&" is found
    smp_value = urllib.parse.unquote(url[smp_start:smp_end])  # Decode the URL-encoded string
    # Step 3.5: Check if the smp_value contains a valid hostname
    if '@' not in smp_value:
        return False  # Must contain '@' to separate fingerprint and hostname
    fingerprint, hostname = smp_value.split('@', 1)
    if not IsUrlValid(hostname):
        return False  # Invalid hostname
    # Step 4: Check for the presence of "%2F" in the original URL
    if "%2F" not in url:
        return False  # Required substring not found
    # If all checks pass, return True
    return True
-def IsUrlValid(url:str)->bool:
+
-        """
+
-        Check if url is valid both dark net end clearnet.
+
-        """
+def IsStatusValid(status: str) -> bool:
-        pattern = re.compile(r"^[A-Za-z0-9:/._%-=#?&@]+$")
+    """
-        onion_pattern = re.compile(r"^(\w+:)?(?://)?(\w+\.)?[a-z2-7]{56}\.onion")
+    Checks if status contains only ['YES','NO']. Verbose only if False is returned
-        url = str(url)
+    """
-        if len(url) < 4:
+    pattern = ['YES','NO','']
-                return False
+    status = status.strip()
-        if onion_pattern.match(url) is not None:
+    if status not in pattern:
-                return IsOnionValid(url)
+        return False
    return True
 def IsScoreValid(score: str) -> bool:
    """
    Check the Score is only "^[0-9.,]+$" with 8 max chars.
    """
    pattern = re.compile("^[0-9.,]+$")
    score = str(score)
    score.strip()
    if score in ['','nan']:
        return True
    if pattern.fullmatch(score) is None:
        return False
    if len(score) > 8:
        return False
    return True
 def IsDescriptionValid(desc: str) -> bool:
    """
    Check the categories are only [a-zA-Z0-9.' ] with 256 max chars.
    """
    if desc == "":
        return True
    pattern = re.compile(r"^[A-Za-z0-9-.,' \"\(\)\/]+$")
    desc = str(desc)
    desc.strip()
    if pattern.fullmatch(desc) is None:
        return False
    if desc == "DEFAULT":
        return False
    elif len(desc) > 256:
        return False
    return True
 def IsCategoryValid(categories: list[str]) -> bool:
    """
    Check the categories are only [a-zA-Z0-9 ] with 64 max chars.
    """
    pattern = re.compile("^[A-Za-z0-9 ]+$")
    for category in categories:
        category.strip()
        if pattern.fullmatch(category) is None:
            return False
        elif len(category) > 64:
            return False
        else:
                if not url.__contains__('.'):
                        return False
                if url.__contains__(';'):
                        return False #required otherwise lantern thinks there are extra columns
                if pattern.fullmatch(url) is None:
                        return False
                return True
 #def IsUrlValid(url:str)->bool:
 #        """
 #        Check if url is valid both dark net end clearnet.
 #        """
 #        pattern  = re.compile("^[A-Za-z0-9:/.-]+$")
 #        url = str(url)
 #        if len(url) < 4:
 #                return False
 #        if url.endswith('.onion'):
 #                return IsOnionValid(url)
 #        else:
 #                if not url.__contains__('.'):
 #                        return False
 #                if pattern.fullmatch(url) is None:
 #                        return False
 #                return True
 def IsStatusValid(status: str)-> bool:
        """
        Checks if status contains only ['YES','NO']. Verbose only if False is returned
        """
        pattern = ['YES','NO','✔️','❌','']
        #pattern = ['YES','NO']
        status = str(status)
        status.strip()
        if (status not in pattern):
                return False
        return True
 def IsScoreValid(score:str)->bool:
        """
        Check the Score is only "^[0-9.,]+$" with 8 max chars.
        """
        pattern = re.compile("^[0-9.,]+$")
        score = str(score)
        score.strip()
        if score in ['','nan']:
            return True
        if pattern.fullmatch(score) is None:
                return False
        elif len(score) > 8:
                return False
        return True
-def IsDescriptionValid(desc:str)->bool:
+def IsNameValid(name: str) -> bool:
-        """
+    """
-        Check the categories are only [a-zA-Z0-9.' ] with 256 max chars.
+    Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long.
-        """
+    """
-        if desc == "":
+    try:
-                return True
+        return bool(VALID_NAME_PATTERN.fullmatch(name.strip()))
-        pattern = re.compile("^[A-Za-z0-9-.,' \"\(\)\/]+$")
+    except Exception:
-        desc = str(desc)
+        return False
        desc.strip()
        if pattern.fullmatch(desc) is None:
                return False
        if desc == "DEFAULT":
                return False
        elif len(desc) > 256:
                return False
        return True
 def IsCategoryValid(categories: list)-> bool:
        """
        Check the categories are only [a-zA-Z0-9 ] with 64 max chars.
        """
        pattern = re.compile("^[A-Za-z0-9 ]+$")
        for category in categories:
                category.strip()
                if pattern.fullmatch(category) is None:
                        return False
                elif len(category) > 64:
                        return False
                else:
                       return True
 def send_server_checks(url: str) -> tuple[str, str, str]:
    """
    Sends requests to sxc websocket and retuns
    response, response type and testFailure or None.
    """
    with connect(f"ws://localhost:3030") as websocket:
        query = f"/_server test 1 {url}"
        command = {
            'corrId': f"id{random.randint(0,999999)}",
            'cmd': query,
        }
        websocket.send(json.dumps(command))
        message = websocket.recv()
        response = json.loads(message)
        resp_type = response["resp"]["type"]
        failed_response = response['resp'].get('testFailure')
-def IsSimpleXServerValid(url: str) -> bool:
+    return (response, resp_type, failed_response)
        pattern = re.compile('[0-9A-Za-z-_]*')
        url = url.strip()
        try:
                if url.startswith(('smp://', 'xftp://')):
                        # Remove the protocol part
                        proless = url.split('//', 1)[-1]
                        # Split the fingerprint and hostname
                        parts = proless.split('@')
                        if len(parts) != 2:
                                return False  # Must have exactly one '@' character
                        fingerprint = parts[0]
                        hostname = parts[1].split(',')[0]  # Get the hostname before any comma
                        # Check fingerprint length and pattern
                        if len(fingerprint) == 44 and pattern.match(fingerprint):
                                # Validate the hostname
                                result = IsSimpleXUrlValid(hostname)
                                if result:
                                        # Check for an optional comma and a valid onion domain
                                        if ',' in proless:
                                                onion_part = proless.split(',')[1].strip()
                                                if not hostname_pattern.match(onion_part):
                                                        return False  
                                        return True 
                return False 
        except Exception as e:
                print(e)
                # Any error will be a false
                return False
 def IsNameValid(name: str)->bool:
        """
        Check the parameter name only contains [a-zA-Z0-9 ] and is 64 chars long.
        """
        try:
                name = str(name)
        except Exception as e:
                return False
        pattern = re.compile("^[A-Za-z0-9 ]+$")
        name = name.strip()
        if (pattern.fullmatch(name) is None):
                return False
        elif len(name) > 64:
                return False
        return True
 def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False):
        """
        Helper function to print with colors
        """
        if is_error:
                print(f"{RED}{s}{RESET}")
        elif bold:
                print(f"{BOLD_PURPLE}{s}{RESET}")
        elif is_error and bold:
                print(f"{BOLD_RED}{s}{RESET}")
        elif default:
                print(f'{s}')
        else:
                print(f"{PURPLE}{s}{RESET}")
 def IsSimpleXOnionValid(url: str)-> bool:
    """
-    Checks if the domain(param) is a valid onion domain and return True else False.
+    Helper function to print with colors
    """
-    try:
+    if is_error:
-        pattern = re.compile(r"^[A-Za-z0-9:/._%-=#?&@]+(.onion)$")
+        print(f"{RED}{s}{RESET}")
-        url_pattern = re.compile(r"^(\w+:)?(?://)?(\w+\.)?[a-z2-7]{56}\.onion")
+    elif bold:
-        url = url.strip().removesuffix('/')
+        print(f"{BOLD_PURPLE}{s}{RESET}")
-        if url.startswith('http://'):
+    elif is_error and bold:
-            domain = url.split('/')[2]
+        print(f"{BOLD_RED}{s}{RESET}")
-            if pattern.fullmatch(domain) is not None:
+    elif default:
-                if len(domain.split('.')) > 3:
+        print(f'{s}')
-                    return False
+    else:
-                else:
+        print(f"{PURPLE}{s}{RESET}")
                    if len(domain) < 62:
                        return False
                    return True
            elif pattern.fullmatch(domain) is None:
                return False
            else:
                return False
        else:
                        #TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
            if url_pattern.match(url) is not None:
                if len(url.split('.')) > 3:
                    return False
                else:
                    if len(url) < 62:
                        return False
                    return True
            elif url_pattern.match(url) is None:
                return False
            else:
                return False
    except Exception as e:
        return False
 def IsSimpleXUrlValid(url:str)->bool:
        """
        Check if url is valid both dark net end clearnet.
        """
        pattern = re.compile(r"^[A-Za-z0-9:/._%-=#?&@]+$")
        onion_pattern = re.compile(r"^(\w+:)?(?://)?(\w+\.)?[a-z2-7]{56}\.onion")
        url = str(url)
        if len(url) < 4:
                return False
        if onion_pattern.match(url) is not None:
                return IsSimpleXOnionValid(url)
        else:
                if not url.__contains__('.'):
                        return False
                if pattern.fullmatch(url) is None:
                        return False
                return True
 def send_server_checks(url:str) -> ():
        """
        Sends requests to sxc websocket and retuns
        response, response type and testFailure or None.
        """
        with connect(f"ws://localhost:3030") as websocket:
                query = f"/_server test 1 {url}"
                command = {
                        'corrId': f"id{random.randint(0,999999)}",
                        'cmd': query,
                }
                websocket.send(json.dumps(command))
                message = websocket.recv()
                response = json.loads(message)
                resp_type = response["resp"]["type"]
                failed_response = response['resp'].get('testFailure')
        return (response, resp_type, failed_response)