From 574a11fdc5218978313ce1a09911c634f321e4a9 Mon Sep 17 00:00:00 2001 From: SovereigntyIsNotFreedom Date: Mon, 7 Apr 2025 12:41:21 +0100 Subject: [PATCH] issue #25: all test checked and passed. the funcs in the utils file are specifically for this folder don't use elsewhere --- SimpleX/__pycache__/utils.cpython-313.pyc | Bin 2093 -> 2109 bytes SimpleX/regex_simplexlinks.py | 81 +++++++++------------- SimpleX/utils.py | 6 +- 3 files changed, 37 insertions(+), 50 deletions(-) diff --git a/SimpleX/__pycache__/utils.cpython-313.pyc b/SimpleX/__pycache__/utils.cpython-313.pyc index 789317461ba803507ab868960ac36ab2dc3e971e..d46565c244981a200d8bcf6b86ed89cc665f5e39 100644 GIT binary patch delta 115 zcmZ20uvdWhGcPX}0}ycT{G1-MkynaYQ86ysQ8y}4x5_}*(n?=1URBpt*34*8zV%9fJS> diff --git a/SimpleX/regex_simplexlinks.py b/SimpleX/regex_simplexlinks.py index a509a8e..5bdf3a6 100644 --- a/SimpleX/regex_simplexlinks.py +++ b/SimpleX/regex_simplexlinks.py @@ -17,61 +17,48 @@ pattern = re.compile('[0-9A-Za-z-_]*') # Regular expression pattern for a valid hostname (can be an IP, domain, or onion) hostname_pattern = re.compile(r'^(?:[a-zA-Z0-9.-]+|[0-9]{1,3}(?:\.[0-9]{1,3}){3}|[a-zA-Z0-9-]+\.onion)$') - - def IsSimpleXChatroomValid(url: str) -> bool: - # Step 1: Check if it starts with http://, https://, or simplex:/ - if url.startswith(('http://', 'https://', 'simplex:/')): - # Step 1.5: If http:// or https://, check for valid clearnet or onion domain - if url.startswith(('http://', 'https://')): - parsed_url = urllib.parse.urlparse(url) - print(parsed_url.hostname) - if not IsUrlValid(parsed_url.hostname): - return False # Invalid domain - elif not url.startswith('simplex:/'): - return False # Must start with one of the valid protocols + """Validate the SimpleX chatroom URL.""" + REQUIRED_SUBSTRING = "contact#/?v=2-7&smp=smp%3A%2F" + + # Step 1: Check if it starts with http://, https://, or simplex:/ + if url.startswith(('http://', 'https://', 'simplex:/')): + # Step 1.5: If http:// or https://, check for valid clearnet or onion domain + if url.startswith(('http://', 'https://')): + return IsUrlValid(url) + elif not url.startswith('simplex:/'): + return False # Must start with one of the valid protocols + # Step 2: Check for the presence of the required substring + if REQUIRED_SUBSTRING not in url: + return False # Required substring not found - # Step 2: Check for the presence of "contact#/?v=2-7&smp=smp%3A%2F" - if "contact#/?v=2-7&smp=smp%3A%2F" not in url: - return False # Required substring not found + # Step 3: Extract the part after "smp=smp%3A%2F" + smp_start = url.find("smp=smp%3A%2F") + if smp_start == -1: + return False # Required substring not found + smp_start += len("smp=smp%3A%2F") + smp_end = url.find("&", smp_start) + if smp_end == -1: + smp_end = len(url) # Take until the end if no "&" is found - # Step 3: Extract the part after "smp=smp%3A%2F" and before the next "&" or end of string - smp_start = url.find("smp=smp%3A%2F") - if smp_start == -1: - return False # Required substring not found + smp_value = urllib.parse.unquote(url[smp_start:smp_end]) # Decode the URL-encoded string + # Step 3.5: Check if the smp_value contains a valid hostname + if '@' not in smp_value: + return False # Must contain '@' to separate fingerprint and hostname - smp_start += len("smp=smp%3A%2F") - smp_end = url.find("&", smp_start) - if smp_end == -1: - smp_end = len(url) # Take until the end if no "&" is found + fingerprint, hostname = smp_value.split('@', 1) + if not IsUrlValid(hostname): + return False # Invalid hostname + # Step 4: Check for the presence of "%2F" in the original URL + if "%2F" not in url: + return False # Required substring not found - smp_value = urllib.parse.unquote(url[smp_start:smp_end]) # Decode the URL-encoded string - - - # Step 3.5: Check if the smp_value contains a valid hostname - if '@' not in smp_value: - return False # Must contain '@' to separate fingerprint and hostname - - - fingerprint, hostname = smp_value.split('@', 1) - if not IsUrlValid(hostname): - return False # Invalid hostname - - - # Step 4: Check for the presence of "%2F" in the original URL - if "%2F" not in url: - return False # Required substring not found - - - # If all checks pass, return True - return True - - - + # If all checks pass, return True + return True #xftp://fingerprint:password@host1,host2 @@ -139,6 +126,6 @@ if __name__ == '__main__': #link = 'https://simplex.chat/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion%2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D' #link = 'simplex:/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion%2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D' #link = 'https://simplex.chat/contact#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D' - link = 'https://simplex.hackliberty.org/contact/#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D' + #link = 'https://simplex.hackliberty.org/contact/#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D' #link = 'http://b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion/contact/#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D' print(IsSimpleXChatroomValid(link)) diff --git a/SimpleX/utils.py b/SimpleX/utils.py index c704dcb..152571b 100644 --- a/SimpleX/utils.py +++ b/SimpleX/utils.py @@ -5,7 +5,7 @@ def IsOnionValid(url: str)-> bool: Checks if the domain(param) is a valid onion domain and return True else False. """ try: - pattern = re.compile("^[A-Za-z0-9.]+(.onion)?$") + pattern = re.compile(r"^[A-Za-z0-9:/._%-=#?&@]+(.onion)$") url = url.strip().removesuffix('/') if url.startswith('http://'): domain = url.split('/')[2] @@ -40,7 +40,7 @@ def IsUrlValid(url:str)->bool: """ Check if url is valid both dark net end clearnet. """ - pattern = re.compile("^[A-Za-z0-9:/.-]+$") + pattern = re.compile(r"^[A-Za-z0-9:/._%-=#?&@]+$") url = str(url) if len(url) < 4: return False @@ -51,4 +51,4 @@ def IsUrlValid(url:str)->bool: return False if pattern.fullmatch(url) is None: return False - return True \ No newline at end of file + return True