fix simplex parsing

2025-07-01 12:46:41 +00:00 · 2025-04-14 23:26:46 +01:00 · 2025-04-14 23:26:46 +01:00 · 013673fffb
commit 013673fffb
parent 13d39dc5c2
2 changed files with 9 additions and 6 deletions
--- a/SimpleX/regex_simplexlinks.py
+++ b/SimpleX/regex_simplexlinks.py
@ -20,17 +20,18 @@ hostname_pattern = re.compile(r'^(?:[a-zA-Z0-9.-]+|[0-9]{1,3}(?:\.[0-9]{1,3}){3}
 def IsSimpleXChatroomValid(url: str) -> bool:
    """Validate the SimpleX chatroom URL."""
    REQUIRED_SUBSTRING = "contact#/?v=2-7&smp=smp%3A%2F"
    REQUIRED_SUBSTRING2 = "contact/#/?v=2-7&smp=smp%3A%2F"
    # Step 1: Check if it starts with http://, https://, or simplex:/
    if url.startswith(('http://', 'https://', 'simplex:/')):
        # Step 1.5: If http:// or https://, check for valid clearnet or onion domain
-        if url.startswith(('http://', 'https://')):
+        if url.startswith(('http://', 'https://')) and not IsUrlValid(url):
-            return IsUrlValid(url)
+            return False
    elif not url.startswith('simplex:/'):
        return False  # Must start with one of the valid protocols
    # Step 2: Check for the presence of the required substring
-    if REQUIRED_SUBSTRING not in url:
+    if REQUIRED_SUBSTRING not in url and REQUIRED_SUBSTRING2 not in url:
        return False  # Required substring not found
    # Step 3: Extract the part after "smp=smp%3A%2F"
--- a/SimpleX/utils.py
+++ b/SimpleX/utils.py
@ -6,6 +6,7 @@ def IsOnionValid(url: str)-> bool:
    """
    try:
        pattern = re.compile(r"^[A-Za-z0-9:/._%-=#?&@]+(.onion)$")
        url_pattern = re.compile(r"^(\w+:)?(?://)?(\w+\.)?[a-z2-7]{56}\.onion")
        url = url.strip().removesuffix('/')
        if url.startswith('http://'):
            domain = url.split('/')[2]
@ -22,14 +23,14 @@ def IsOnionValid(url: str)-> bool:
                return False
        else:
                        #TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
-            if pattern.fullmatch(url) is not None:
+            if url_pattern.match(url) is not None:
                if len(url.split('.')) > 3:
                    return False
                else:
                    if len(url) < 62:
                        return False
                    return True
-            elif pattern.fullmatch(url) is None:
+            elif url_pattern.match(url) is None:
                return False
            else:
                return False
@ -41,10 +42,11 @@ def IsUrlValid(url:str)->bool:
        Check if url is valid both dark net end clearnet.
        """
        pattern = re.compile(r"^[A-Za-z0-9:/._%-=#?&@]+$")
        onion_pattern = re.compile(r"^(\w+:)?(?://)?(\w+\.)?[a-z2-7]{56}\.onion")
        url = str(url)
        if len(url) < 4:
                return False
-        if url.endswith('.onion'):
+        if onion_pattern.match(url) is not None:
                return IsOnionValid(url)
        else:
                if not url.__contains__('.'):