[untested] RecognizeURLType and IsURLValid functions are working

2025-07-01 22:16:41 +00:00 · 2025-05-30 00:21:20 +02:00 · 2025-05-30 00:21:20 +02:00 · c9a2fbcfdd
commit c9a2fbcfdd
parent 08697f5c40
5 changed files with 179 additions and 320 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 www/participants/**
 crawler/**
 scripts/__pycache__/**
+scripts/_*.py
 .env
 env/
 submissions/submission.csv
--- a/scripts/crawler.py
+++ b/scripts/crawler.py
@ -11,8 +11,10 @@ import re
 import sys
 sys.path.append("..")

-from utils import print_colors, IsUrlValid
-from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
+from utils import (
+    print_colors, IsURLValid, IsSimplexChatroomValid, RecognizeURLType
+)
+#from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
 from dotenv import load_dotenv

 # Make default parameters for arguments
@ -107,12 +109,12 @@ def add_urls(urls):
    global output_file
    for url in urls:
        parsed_url = urllib.parse.urlparse(url)
-        if IsSimpleXChatroomValid(url) and not (output_file['URL'] == url).any():
+        if IsSimplexChatroomValid(url) and not (output_file['URL'] == url).any():
            output_file.loc[-1] = ["", url, "", "SimpleX Chatroom"]
            output_file.index += 1
            output_file = output_file.sort_index()
            continue
-        elif IsSimpleXServerValid(url) and not (output_file['URL'] == url).any():
+        elif RecognizeURLType(url) in ('smp', 'xftp') and not (output_file['URL'] == url).any():
            output_file.loc[-1] = ["", url, "", "SimpleX Server"]
            output_file.index += 1
            output_file = output_file.sort_index()
@ -164,13 +166,13 @@ def extract_urls_html(url, text):
        print_colors(f'[D] Joined URL: {joined_url}')

        # Capture SimpleX URLs
-        if IsSimpleXChatroomValid(joined_url) or IsSimpleXServerValid(joined_url):
+        if RecognizeURLType(joined_url) in ('smp', 'xftp', 'chatroom'):
            if url not in result.thirdp_urls:
                result.thirdp_urls.append(joined_url)
            continue

        # Check if the URL is a .onion link or not even a web link
-        if not IsUrlValid(joined_url):
+        if not IsURLValid(joined_url):
            continue

        print_colors(f'[+] Found url: {joined_url}')
@ -266,4 +268,3 @@ for i, url in enumerate(vcsv_urls):
    crawl_url(url)
    crawler_file.to_csv(args.crawler_file, index=False)
    output_file.to_csv(args.output, index=False)
-
--- a/scripts/lantern.py
+++ b/scripts/lantern.py
@ -50,9 +50,6 @@ def main():
                os.makedirs(participantdir)


-
-
-     
    print_colors("""
 ;
 ED.
@ -94,8 +91,8 @@ def main():
        if os.path.isfile(urlpath):
            with open(urlpath) as f:
                instance = f.read().rstrip()
-            if IsOnionValid(instance):
-                print_colors(f"[+] Instance Name: {instance}. Valid:{IsOnionValid(instance)}")
+            if IsOnionLinkValid(instance):
+                print_colors(f"[+] Instance Name: {instance}. Valid:{IsOnionLinkValid(instance)}")
                break
            else:
                print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}',is_error=True )
@ -104,8 +101,8 @@ def main():
            print_colors("[+] Instance Path doesn't exist yet")
            print_colors(f"Your url will be saved here {urlpath}")
            instance = input("What is your Instance domain?(ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion): ")
-            if IsOnionValid(instance):
-                print_colors(f"[+] Instance Name: {instance}. Valid: {IsUrlValid(instance)}")
+            if IsOnionLinkValid(instance):
+                print_colors(f"[+] Instance Name: {instance}. Valid: {IsOnionLinkValid(instance)}")
                instancepath=rootpath+'www/participants/'+instance
            else:
                print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}', is_error=True )
@ -212,9 +209,9 @@ Maintenance:
                    while(IsCategoryValid(category) is not True):
                        category = input("What is the website Category? ")
                    # the url of the website (required) + check if its valid
-                    url=''
-                    while(IsUrlValid(url) is not True and IsSimpleXChatroomValid(url) is not True):
-                        url=input("What is the website URL ? ")
+                    url = ''
+                    while not IsURLValid(url):
+                        url = input("What is the website URL ? ")

                    # a quick description (optional) + check if its valid
                    desc='DEFAULT'
@ -458,7 +455,7 @@ Maintenance:
                                                value = input("What is the new name of the website? ")
                                            vdf.at[index,'Name']=value
                                        elif i == 3: # column URL
-                                            while(IsUrlValid(value) is not True or value == ''):
+                                            while(IsURLValid(value) is not True or value == ''):
                                                value = input("What is the new URL of the website? ")
                                            vdf.at[index,'URL']=value
                                        elif i == 4: # column Sensitive
@ -504,7 +501,7 @@ Maintenance:
                                                value = input("What is the new name of the website? ")
                                            uvdf.at[index,'Name']=value
                                        elif i == 3: # column URL
-                                            while(IsUrlValid(value) is not True or value == ''):
+                                            while(IsURLValid(value) is not True or value == ''):
                                                value = input("What is the new URL of the website? ")
                                            uvdf.at[index,'URL']=value
                                        elif i == 4: # column Sensitive
@ -656,7 +653,7 @@ Maintenance:
                                        csvdf.to_csv(csvfilepath, index=False)
 	
                                    ### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
-                                    if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
+                                    if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
                                        #mark the row for deletion as it has invalid inputs
                                        if i not in rows2delete:
                                            print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
@ -788,7 +785,7 @@ Maintenance:
            case 5:
                print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
                webring_participant_url = ''
-                while(IsOnionValid(webring_participant_url) is not True):
+                while(IsOnionLinkValid(webring_participant_url) is not True):
                    webring_participant_url = input("What is the onion domain of the new webring participant? (ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion)  ")
                participantdir=rootpath+'www/participants/'+webring_participant_url
                if os.path.isdir(participantdir):
@ -892,7 +889,7 @@ Maintenance:
                                    csvdf.to_csv(csvfilepath, index=False)
 	
                                ### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
-                                if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
+                                if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
                                    #mark the row for deletion as it has invalid inputs
                                    if i not in rows2delete:
                                        print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
@ -1072,7 +1069,7 @@ Maintenance:
                                    break
                                else:
                                    print_colors("[+] checking if the Word/URL is valid: ")
-                                    if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
+                                    if IsURLValid(word) or IsDescriptionValid(word):
                                        print_colors('[+] Word/URL is valid, adding the word into the sensitive wordlist')
                                        newrow=[word]
                                        print_colors(f"[+] NEWROW= {newrow}")
@ -1141,7 +1138,7 @@ Maintenance:
                                    break
                                else:
                                    print_colors("[+] Checking if the Word/URL is valid: ")
-                                    if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
+                                    if IsURLValid(word) or IsDescriptionValid(word):
                                        print_colors('[+] Word/URL is valid, adding the word into the blacklist')
                                        newrow=[word]
                                        print_colors(f"[+] NEWROW= {newrow}")
@ -1254,11 +1251,11 @@ Maintenance:
                                csvdf.to_csv(csvfilepath, index=False)
 							
                            ### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
-                            if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
+                            if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(csvdf.at[i, 'Description']) is False or  IsStatusValid(csvdf.at[i, 'Status']) is False or  IsScoreValid(csvdf.at[i, 'Score']) is False:
                                if i not in rows2delete:
                                    print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
                                    #print_colors(f"{row}")
-                                    print(IsUrlValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsUrlValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']),  IsScoreValid(csvdf.at[i, 'Score']))
+                                    print(IsURLValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsURLValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']),  IsScoreValid(csvdf.at[i, 'Score']))
                                    rows2delete.append(i)
                                    read=input("Continue?")

@ -1360,9 +1357,6 @@ Maintenance:
                                print_colors("Invalid Number",is_error=True)
                                continue

-
-                       
-                        
                except Exception as e:
                    print_colors(f'Try again {e}',is_error=True)
                    break 
@ -1371,7 +1365,6 @@ Maintenance:
                    print_colors("No more submissions to review, exiting.")
                    break 

-
            case 12:
                # review the crawled websites 
                try:       
@ -1459,12 +1452,12 @@ Maintenance:
                                crawled_df.to_csv(crawled_file_abs_path, index=False)
                            
                            elif number == 3:
-                                # Delete from crawled_onion.csv 
+                                # Delete from crawled_onion.csv
                                crawled_df.drop(index=i,inplace=True)
                                crawled_df.to_csv(crawled_file_abs_path, index=False)

                            elif number == 4:
-                                # Add to blacklist.csv 
+                                # Add to blacklist.csv
                                newrow=[link]

                                blacklist_df.loc[-1] = newrow  # adding a row
@ -1482,15 +1475,10 @@ Maintenance:
                                print_colors("Invalid Number",is_error=True)
                                continue

-
-
- 
-
-                        
                except Exception as e:
                    print_colors(f'Try again {e}',is_error=True)
-                    break 
-                
+                    break
+
                finally:
                    print_colors("No more crawled websites to review, exiting.")
                    break 
--- a/scripts/uptimechecker.py
+++ b/scripts/uptimechecker.py
@ -8,7 +8,7 @@ import requests
 import json
 import pandas as pd
 import glob
-from utils import IsSimpleXServerValid, send_server_checks
+from utils import RecognizeURLType, IsOnionLinkValid, send_server_checks



@ -46,8 +46,8 @@ def main():
                    with open(urlpath) as f:
                            instance = f.read().rstrip()
                    # check if the instance URL domain is valid
-                    if IsOnionValid(instance):
-                            print("[+] Instance Name:",instance,IsOnionValid(instance))
+                    if IsOnionLinkValid(instance):
+                            print("[+] Instance Name:",instance,IsOnionLinkValid(instance))
                            isitvalid="y"
                    else:
                            print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
@ -88,42 +88,41 @@ def main():
                        index2 = url.find("https://")

                        if url.startswith("smp://") or url.startswith("xftp://"):
-                            if IsSimpleXServerValid(url):
-                                if url.startswith("smp"): 
-                                    resp,resp_type,failed_response = send_server_checks(url)
-                                       
-                                    if resp_type in ["chatError", "contactSubSummary"]:
-                                        resp, resp_type,failed_response = send_server_checks(url)
+                            if RecognizeURLType(url) == 'smp': 
+                                resp,resp_type,failed_response = send_server_checks(url)
                                    
-                                    if failed_response is None:
-                                        print(url, "✔️")
-                                        df.at[i, "Status"]="YES"
-                                        if df.at[i, "Score"] < 100:
-                                            df.at[i,"Score"] = df.at[i,"Score"] + 1
-                                    else:
-                                        print(url,"❌")
-                                        df.at[i,"Status"]="NO"
-                                        #if uptime >0  do -1 to the value
-                                        if df.at[i,"Score"] > 0:
-                                            df.at[i,"Score"] = df.at[i,"Score"] - 1
-
+                                if resp_type in ["chatError", "contactSubSummary"]:
+                                    resp, resp_type,failed_response = send_server_checks(url)
+                                
+                                if failed_response is None:
+                                    print(url, "✔️")
+                                    df.at[i, "Status"]="YES"
+                                    if df.at[i, "Score"] < 100:
+                                        df.at[i,"Score"] = df.at[i,"Score"] + 1
                                else:
-                                    resp,resp_type,failed_response = send_server_checks(url)
-                                    
-                                    if resp_type in ["chatError", "contactSubSummary"]:
-                                        resp, resp_type,failed_response = send_server_checks(url)
-                                    
-                                    if failed_response is None:
-                                        print(url, "✔️")
-                                        df.at[i, "Status"]="YES"
-                                        if df.at[i, "Score"] < 100:
-                                            df.at[i,"Score"] = df.at[i,"Score"] + 1
-                                    else:
-                                        print(url,"❌")
-                                        df.at[i,"Status"]="NO"
-                                        #if uptime >0  do -1 to the value
-                                        if df.at[i,"Score"] > 0:
-                                            df.at[i,"Score"] = df.at[i,"Score"] - 1
+                                    print(url,"❌")
+                                    df.at[i,"Status"]="NO"
+                                    #if uptime >0  do -1 to the value
+                                    if df.at[i,"Score"] > 0:
+                                        df.at[i,"Score"] = df.at[i,"Score"] - 1
+
+                            elif RecognizeURLType(url) == 'xftp':
+                                resp,resp_type,failed_response = send_server_checks(url)
+                                
+                                if resp_type in ["chatError", "contactSubSummary"]:
+                                    resp, resp_type,failed_response = send_server_checks(url)
+                                
+                                if failed_response is None:
+                                    print(url, "✔️")
+                                    df.at[i, "Status"]="YES"
+                                    if df.at[i, "Score"] < 100:
+                                        df.at[i,"Score"] = df.at[i,"Score"] + 1
+                                else:
+                                    print(url,"❌")
+                                    df.at[i,"Status"]="NO"
+                                    #if uptime >0  do -1 to the value
+                                    if df.at[i,"Score"] > 0:
+                                        df.at[i,"Score"] = df.at[i,"Score"] - 1


                        else:
@ -173,80 +172,6 @@ def main():
                    #print(df2)
                    df2.to_csv(csvfile, index=False)

-
-def IsUrlValid(url:str)->bool:
-        """
-        Check if url is valid both dark net end clearnet.
-        """
-        # check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
-                # check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
-                        #  if OK return True
-        #if not : return False
-        pattern  = re.compile("^[A-Za-z0-9:/.]+$")
-        url = str(url)
-        if url.endswith('.onion'):
-                return IsOnionValid(url)
-        else:
-                if not url.__contains__('.'):
-                        #print("No (DOT) in clearnet url")
-                        return False
-                if pattern.fullmatch(url) is None:
-                        #print('Url contains invalid chars')
-                        return False
-                return True
-
-def IsOnionValid(url: str)-> bool:
-    """
-    Checks if the domain(param) is a valid onion domain and return True else False.
-    """
-        # check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
-                # check that it is only url.onion or subdomain.url.onion,
-                        #  if OK return True
-        #if not : return False
-    try:
-        pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
-        url = url.strip().removesuffix('/')
-        if url.startswith('http://'):
-            #print('URL starts with http')
-            # Removes the http://
-            domain = url.split('/')[2]
-            if pattern.fullmatch(domain) is not None:
-                if len(domain.split('.')) > 3:
-                    n_subdomians = len(domain.split('.'))
-                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
-                    #print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
-                    return False
-                else:
-                    if len(domain) < 62:
-                        #print("Domain length is less than 62.")
-                        return False
-                    return True
-            elif pattern.fullmatch(domain) is None:
-                #print("Domain contains invalid character.")
-                #print(domain)
-                return False
-            else:
-                #print("Domain not valid")
-                return False
-        else:
-            #TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
-            #print("URL doesn't start http")
-            if pattern.fullmatch(url) is not None:
-                if len(url.split('.')) > 3:
-                    n_subdomians = len(url.split('.'))
-                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
-                    return False
-                else:
-                    if len(url) < 62:
-                        return False
-                    return True
-            elif pattern.fullmatch(url) is None:
-                return False
-            else:
-                return False
-    except Exception as e:
-        print(f"Error: {e}")
-
        
 if __name__ == '__main__':
    main()
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -18,28 +18,108 @@ RESET = '\033[m'
 # name should contain only up to 64 alphanumeric characters
 VALID_NAME_PATTERN = re.compile(r"^[A-Za-z0-9]{1,64}$")

-# pattern for regular urls
-# TODO: this is very simplified pattern
-URL_PATTERN = re.compile(r"^[A-Za-z0-9:\/\._%-=#?&@]+$")
+# pattern for regular urls (https://stackoverflow.com/a/3809435)
+CLEARNET_URL_PATTERN = re.compile(
+    r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]"
+    r"{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
+)

 # pattern for onion urls (56 bytes of base32 alphabet + .onion)
-ONION_URL_PATTERN = re.compile(r"^(\w+:)?(?:\/\/)?(\w+\.)?[a-z2-7]{56}\.onion")
+ONION_URL_PATTERN = re.compile(
+    r"^https?:\/\/([a-zA-Z0-9-]+\.)*[a-z2-7-]{56}\.onion[^\s]*$"
+)

+# pattern for simplex chatroom links
+SIMPLEX_CHATROOM_PATTERN = re.compile(
+    r"(?:https?:\/\/(?:simplex\.chat|[^\/]+)|simplex:)\/(?:contact|invitation)#\/\?v=[\d-]+"
+    r"&smp=[^&]+(?:&[^=]+=[^&]*)*(?:&data=\{[^}]*\})?"
+)

-def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False):
+# pattern for smp or xftp simplex server ((smp|xftp):// 44 byte key @ url [:port])
+SIMPLEX_SERVER_PATTERN = re.compile(
+    r"^(smp|xftp):\/\/([a-zA-Z0-9\-_+=]{44})@([a-z2-7]{56}\.onion|"
+    r"([a-zA-Z0-9\-\.]+\.[a-zA-Z0-9\-\.]+))"
+    r"{1,}(?::[1-9][0-9]{0,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|"
+    r"65[0-4][0-9]{2}|655[0-3][0-9]|6553[0-5])?$"
+)
+
+def IsSimplexChatroomValid(url: str) -> bool:
    """
-    Helper function to print with colors
+    Recognizes Simplex Chatroom link.
+    Returns True if URL is a SimpleX chatroom,
+    False otherwise
    """
-    if is_error:
-        print(f"{RED}{s}{RESET}")
-    elif bold:
-        print(f"{BOLD_PURPLE}{s}{RESET}")
-    elif is_error and bold:
-        print(f"{BOLD_RED}{s}{RESET}")
-    elif default:
-        print(f'{s}')
+    return SIMPLEX_CHATROOM_PATTERN.match(url)
+
+def RecognizeSimplexType(url: str) -> str:
+    """
+    Recognizes Simplex Server URL, returns smp, xftp or invalid
+    """
+    match = SIMPLEX_SERVER_PATTERN.match(url)
+    if match:
+        return match.group(1)
    else:
-        print(f"{PURPLE}{s}{RESET}")
+        return 'invalid'
+
+# stub function
+def IsXFTPServerValid(url: str) -> bool:
+    """
+    Returns True if URL is a valid SimpleX XFTP Server URL
+    False otherwise
+    """
+    return RecognizeSimplexType(url) == 'xftp'
+
+# stub function
+def IsSMPServerValid(url: str) -> bool:
+    """
+    Returns True if URL is a valid SimpleX SMP Server URL
+    False otherwise
+    """
+    return RecognizeSimplexType(url) == 'smp'
+
+def IsClearnetLinkValid(url: str) -> bool:
+    """
+    Returns True if URL is a valid clearnet URL
+    False otherwise
+    """
+    return CLEARNET_URL_PATTERN.match(url)
+
+def IsOnionLinkValid(url: str) -> bool:
+    """
+    Returns True if URL is a valid onion URL
+    False otherwise
+    """
+    return ONION_URL_PATTERN.match(url)
+
+def RecognizeURLType(url: str) -> str:
+    """
+    Recognizes URL type, can return:
+    - chatroom - SimpleX chatroom
+    - xftp - XFTP SimpleX server
+    - smp - SMP SimpleX server
+    - onion - onion URL
+    - clearnet - valid clearnet url
+    - invalid - none of the above (probably invalid)
+    """
+    # order is important here
+    # (ex. simplex chatroom is also valid clearnet link)
+    if IsSimplexChatroomValid(url):
+        return 'chatroom'
+    if IsXFTPServerValid(url):
+        return 'xftp'
+    if IsSMPServerValid(url):
+        return 'smp'
+    if IsOnionLinkValid(url):
+        return 'onion'
+    if IsClearnetLinkValid(url):
+        return 'clearnet'
+    return 'invalid'
+
+def IsURLValid(url: str) -> bool:
+    """
+    Checks if given URL is valid (RecognizeURLType recognizes it)
+    """
+    return RecognizeURLType(url) != 'invalid'


 #### Checking Functions to validate that links are legit ####
@ -54,7 +134,7 @@ def CheckUrl(url):
    }
    try:
        status = requests.get(url, proxies=proxies, timeout=5).status_code
-        return bool(status == 200)
+        return status == 200
    except requests.ConnectionError:
        return False
    except requests.exceptions.ReadTimeout:
@ -82,131 +162,13 @@ def IsBannerValid(path: str) -> bool:
    return True


-def IsOnionValid(url: str) -> bool:
-    """
-    Checks if the domain(param) is a valid onion domain and return True else False.
-    """
-    try:
-        # make sure the protocol is there
-        if not url.startswith(('http://', 'https://')):
-            url = 'http://' + url.strip().removesuffix('/')
-
-        domain = url.split('/')[2]
-
-        if ONION_URL_PATTERN.fullmatch(domain):
-            parts_count = len(domain.split('.'))
-            # TODO: we probably don't really need to check 62 char length
-            # regex does that beforehand
-            return (len(domain) == 62) and (parts_count <= 3)
-    except Exception:
-        return False
-
-
-def IsSimpleXChatroomValid(url: str) -> bool:
-    """Validate the SimpleX chatroom URL."""
-    REQUIRED_SUBSTRING = "#/?v=2-7&smp=smp%3A%2F"
-
-    # Step 1: Check if it starts with http://, https://, or simplex:/
-    if url.startswith(('http://', 'https://', 'simplex:/')):
-        # Step 1.5: If http:// or https://, check for valid clearnet or onion domain
-        if url.startswith(('http://', 'https://')) \
-                and RecognizeUrlOnionClear(url) != 'invalid':
-            return False
-    elif not url.startswith('simplex:/'):
-        return False  # Must start with one of the valid protocols
-
-    # Step 2: Check for the presence of the required substring
-    if REQUIRED_SUBSTRING not in url:
-        return False  # Required substring not found
-
-    # Step 3: Extract the part after "smp=smp%3A%2F"
-    smp_start = url.find("smp=smp%3A%2F")
-    if smp_start == -1:
-        return False  # Required substring not found
-
-    smp_start += len("smp=smp%3A%2F")
-    smp_end = url.find("&", smp_start)
-    if smp_end == -1:
-        smp_end = len(url)  # Take until the end if no "&" is found
-
-    smp_value = urllib.parse.unquote(url[smp_start:smp_end])  # Decode the URL-encoded string
-
-    # Step 3.5: Check if the smp_value contains a valid hostname
-    if '@' not in smp_value:
-        return False  # Must contain '@' to separate fingerprint and hostname
-
-    fingerprint, hostname = smp_value.split('@', 1)
-    if RecognizeUrlOnionClear(hostname) != 'invalid':
-        return False  # Invalid hostname
-
-    # Step 4: Check for the presence of "%2F" in the original URL
-    if "%2F" not in url:
-        return False  # Required substring not found
-
-    # If all checks pass, return True
-    return True
-
-def RecognizeUrlOnionClear(url: str) -> str:
-    """
-    Recognize if the URL is invalid, onion or clearnet.
-    """
-
-    # early terminate preconditions
-    if len(url) < 4 or (';' in url) or ('.' not in url):
-        return 'invalid'
-
-    # check if possibly onion url, here just perliminary check
-    # IsOnionValid checks it against regex expression
-    if '.onion' in url:
-        if IsOnionValid(url):
-            return 'onion'
-
-    if URL_PATTERN.fullmatch(url):
-        return 'clearnet'
-
-    return 'invalid'
-
-
-def RecognizeUrlFull(url: str) -> str:
-    """
-    Recognize if URL is smp, xftp, simplex groupchat, onion, clearnet or just invalid
-    Depends on RecognizeUrlOnionClear
-    """
-    if IsSimpleXChatroomValid(url):
-        return 'chatroom'
-    if url.startswith(('http://', 'https://')):
-        return RecognizeUrlOnionClear(url)
-    if url.startswith('xftp://'):
-        if IsSimpleXServerValid(url):
-            return 'xftp'
-    if url.startswith('smp://'):
-        if IsSimpleXServerValid(url):
-            return 'smp'
-    return 'invalid'
-
-#def IsUrlValid(url:str)->bool:
-#        """
-#        Check if url is valid both dark net end clearnet.
-#        """
-#        pattern  = re.compile("^[A-Za-z0-9:/.-]+$")
-#        url = str(url)
-#        if len(url) < 4:
-#                return False
-#        if url.endswith('.onion'):
-#                return IsOnionValid(url)
-#        else:
-#                if not url.__contains__('.'):
-#                        return False
-#                if pattern.fullmatch(url) is None:
-#                        return False
-#                return True


 def IsStatusValid(status: str) -> bool:
    """
    Checks if status contains only ['YES','NO']. Verbose only if False is returned
    """
-    pattern = ['YES','NO','✔️','❌','']
+    pattern = ['YES','NO','']
    status = status.strip()
    if status not in pattern:
        return False
@ -230,7 +192,7 @@ def IsScoreValid(score: str) -> bool:
    return True


-def IsDescriptionValid(desc:str)->bool:
+def IsDescriptionValid(desc: str) -> bool:
    """
    Check the categories are only [a-zA-Z0-9.' ] with 256 max chars.
    """
@ -263,40 +225,6 @@ def IsCategoryValid(categories: list[str]) -> bool:
            return True


-def IsSimpleXServerValid(url: str) -> bool:
-    pattern = re.compile('[0-9A-Za-z-_]*')
-    url = url.strip()
-    try:
-
-        if url.startswith(('smp://', 'xftp://')):
-            # Remove the protocol part
-            proless = url.split('//', 1)[-1]
-            # Split the fingerprint and hostname
-            parts = proless.split('@')
-            if len(parts) != 2:
-                return False  # Must have exactly one '@' character
-
-            fingerprint = parts[0]
-            hostname = parts[1].split(',')[0]  # Get the hostname before any comma
-
-            # Check fingerprint length and pattern
-            if len(fingerprint) == 44 and pattern.match(fingerprint):
-                # Validate the hostname
-                if RecognizeUrlOnionClear(hostname) != 'invalid':
-                    # Check for an optional comma and a valid onion domain
-                    if ',' in proless:
-                        onion_part = proless.split(',')[1].strip()
-                        if RecognizeUrlOnionClear(onion_part) != 'invalid':
-                            return False
-                    return True
-            return False
-    except Exception as e:
-        print(e)
-        # Any error will be a false
-        return False
-        
-
-
 def IsNameValid(name: str) -> bool:
    """
    Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long.
@ -325,3 +253,19 @@ def send_server_checks(url: str) -> tuple[str, str, str]:
        failed_response = response['resp'].get('testFailure')

    return (response, resp_type, failed_response)
+
+
+def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False):
+    """
+    Helper function to print with colors
+    """
+    if is_error:
+        print(f"{RED}{s}{RESET}")
+    elif bold:
+        print(f"{BOLD_PURPLE}{s}{RESET}")
+    elif is_error and bold:
+        print(f"{BOLD_RED}{s}{RESET}")
+    elif default:
+        print(f'{s}')
+    else:
+        print(f"{PURPLE}{s}{RESET}")