From fb09b21684248baf694befe4ee0a5b1954d8c589 Mon Sep 17 00:00:00 2001 From: SovereigntyIsNotFreedom Date: Sun, 6 Apr 2025 08:19:33 +0100 Subject: [PATCH] issue #25: still WIP --- SimpleX/__pycache__/utils.cpython-313.pyc | Bin 0 -> 2093 bytes SimpleX/regex_simplexlinks.py | 131 +++++++++++++++++++--- SimpleX/utils.py | 54 +++++++++ 3 files changed, 167 insertions(+), 18 deletions(-) create mode 100644 SimpleX/__pycache__/utils.cpython-313.pyc create mode 100644 SimpleX/utils.py diff --git a/SimpleX/__pycache__/utils.cpython-313.pyc b/SimpleX/__pycache__/utils.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..789317461ba803507ab868960ac36ab2dc3e971e GIT binary patch literal 2093 zcmb7F-%s0C6uyr0iv$uv03%>vAcKS!LQ6|q#F$t+)NWF@VVKsXEi%DL42~ULI}BJi zReJ*Q0F+hmm`C<+to;M9{E*1wq0;ukTP8Yb+V-$>^TV)=YTcE5?)ACnd_MQ)d|yr* z8axQb_WZBOk6Z}-OkS$PURj)uz~VXL5J!ECFyd?vDUN<2n@O$zxAa z5$6d>zi-l6ESXL$Xjv@BsR|^S9PrTaoZht&_%0M9-qM%>PwnW1&tI?P%hVj2A^A1q zdgXlvj#4`~o0W|qyRLR0A&#y_ge#l;5+bqQ3t}xa&Q1!EZFAI2!@uL^thfcZ#hAYu zF=x9F)mg2eI&-e^?-AnR+)4NOY-_)6ys5gM^!!&1%}+lwzXQQ z+}qS@MOSO>L9xc_t+@~n?JCpVh;zvF6rFUJ=LD5N?}0Z#MZD{ud2XkK__E51306x9 z?4pw9MLC?|F`tgGqRR5@3NML^tRjnwT-j!Md66wQDLad^0xL+Wz)tgEhdt|Uk7H9q zU-CoiBSW`_A6*I$mr^6|hfFHE?n-G|W;_}_Ax(SVq=rvOS5Kczf}M@jnKmq#uDFuU zh>~DBR1J$6vmOg+Wkpc4iG;XndQ^?an);Qfr63?9iJDoL$VyV0*WxLYmIT?X`*1Zb zWHcx^0)zu()0N>hO~A70EFaLM1eGwBg<+0GQ%YKhCV4d@;OM7VSrX!!8eQb^vMgvr z5-*d~C?}>flJG?|tBI02oLMvL@2Q`VTlt(^5N-oF@-5W~ee)C*-Cd5B{=zQDG)DQ+$2ef_$xKc^bLOB-~->)W2(oIHr+y&>HjDzvoiKiGY6pzb}+ zP3bND8`A}UYu?Z5em2)?_`|yY+HtUTXJ!A%?vvbYg9#hK$l;U`94d6MzcB3wV|!oc zW(;Pa&>bptbQc0``9QB8=*^88fk8cRwZL?~VmgaX)W}pogNFZI-G7CEs{3oZYq{Xw zw??r4Fkl1+E108utGV92Z)zb&jlh5&h?XILKlV1CA_@-eZ~n7L!{QH>XyJDs`0q5J zADc?HuZgU0jDUR+CY~3!i05U4=S~m(veN8E zhPT;>8E4hU?fD*y-C!ZP3SF--fz%bBEuW zfSw|}nAn^+aOb@}y0_<;=_|GSQDgWO)1GGrbY|d?&NG*F<}zu_V0%6o(u1KK2ZYdr zSBQ`4IE7E5h@*VE4p^J^1x1lc=fR|~9}w<_sa&;tBCRZDC1Dcxf)9Qmq(;F&H&fJ4 lsIF-5rCN64MFi8q>}xU=?^1rMb7!=OU^;02jZ7uLzX6x&qV@m) literal 0 HcmV?d00001 diff --git a/SimpleX/regex_simplexlinks.py b/SimpleX/regex_simplexlinks.py index 7bf7c90..a509a8e 100644 --- a/SimpleX/regex_simplexlinks.py +++ b/SimpleX/regex_simplexlinks.py @@ -1,34 +1,128 @@ import re -def IsSimpleXChatroomValid(url: str) -> bool: - #simplex:/ contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40 b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion %2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D - #simplex:/ contact#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40 smp5.simplex.im %2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D - print("isSimplexChatroomValid", url) +from utils import IsUrlValid +import urllib.parse + + #simplex:/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40 b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion %2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D + #simplex:/contact#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40 smp5.simplex.im %2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D #TODO: 1) check if it starts with http:// or https:// OR simplex:/ #TODO: 1.5) if http:// or https:// check if it has a valid clearnet or onion domain (else if simplex:/ then no need for the domain check) #TODO: 2) check if AFTER it has the "contact#/?v=2-7&smp=smp%3A%2F%2F[...]%3D%40" string in it - #TODO: 3) check if AFTER it has a valid hostname (can be an ip, a clearnet domain or an onion domain) + #TODO: 3) check if AFTER it has a valid hostname (can be an ip, a clearnet domain or an onion domain) #TODO: 4) check if AFTER it has the %2F[...]%3D%3D%22%7D - #TODO: only if all is OK up until 4 (or 5) return True, else False + #TODO: only if all is OK up until 4 (or 5) return True, else False + # Regular expression pattern for a valid hostname (can be an IP, domain, or onion) + +pattern = re.compile('[0-9A-Za-z-_]*') + +# Regular expression pattern for a valid hostname (can be an IP, domain, or onion) +hostname_pattern = re.compile(r'^(?:[a-zA-Z0-9.-]+|[0-9]{1,3}(?:\.[0-9]{1,3}){3}|[a-zA-Z0-9-]+\.onion)$') + + + +def IsSimpleXChatroomValid(url: str) -> bool: + # Step 1: Check if it starts with http://, https://, or simplex:/ + if url.startswith(('http://', 'https://', 'simplex:/')): + # Step 1.5: If http:// or https://, check for valid clearnet or onion domain + if url.startswith(('http://', 'https://')): + parsed_url = urllib.parse.urlparse(url) + print(parsed_url.hostname) + if not IsUrlValid(parsed_url.hostname): + return False # Invalid domain + elif not url.startswith('simplex:/'): + return False # Must start with one of the valid protocols + + + # Step 2: Check for the presence of "contact#/?v=2-7&smp=smp%3A%2F" + if "contact#/?v=2-7&smp=smp%3A%2F" not in url: + return False # Required substring not found + + + # Step 3: Extract the part after "smp=smp%3A%2F" and before the next "&" or end of string + smp_start = url.find("smp=smp%3A%2F") + if smp_start == -1: + return False # Required substring not found + + + smp_start += len("smp=smp%3A%2F") + smp_end = url.find("&", smp_start) + if smp_end == -1: + smp_end = len(url) # Take until the end if no "&" is found + + + smp_value = urllib.parse.unquote(url[smp_start:smp_end]) # Decode the URL-encoded string + + + # Step 3.5: Check if the smp_value contains a valid hostname + if '@' not in smp_value: + return False # Must contain '@' to separate fingerprint and hostname + + + fingerprint, hostname = smp_value.split('@', 1) + if not IsUrlValid(hostname): + return False # Invalid hostname + + + # Step 4: Check for the presence of "%2F" in the original URL + if "%2F" not in url: + return False # Required substring not found + + + # If all checks pass, return True + return True + + -def IsSimpleXServerValid(url: str) -> bool: + #xftp://fingerprint:password@host1,host2 # format 1 : smp://[:]@[,] # format 2 : xftp://[:]@[,] #xftp://Rh19D5e4Eez37DEE9hAlXDB3gZa1BdFYJTPgJWPO9OI=@xftp5.simplexonflux.com - #TODO: 1) check if it starts with smp:// OR xftp:// - #TODO: 2) check if AFTER it has the 44 char fingerprint (with the following chars a-zA-Z0-9-_ (base64 format?)) - #TODO: 3) check if AFTER it has the @ string in it - #TODO: 4) check if AFTER it has a valid domain (can be an ip, a clearnet domain or an onion domain) - #TODO: 5) ONLY IF THERE IS A COMMA AFTER, check if it has a valid onion domain + #TODO: 2) check if AFTER it has the 44 char fingerprint (with the following chars a-zA-Z0-9-_ (base64 format?)) + #TODO: 3) check if AFTER it has the @ string in it + # the try block checks for that + #TODO: 4) check if AFTER it has a valid domain (can be an ip, a clearnet domain or an onion domain) + #TODO: 5) ONLY IF THERE IS A COMMA AFTER, check if it has a valid onion domain #TODO: only if all is OK up until 4 (or 5) return True, else False - print("IsSimplexServerValid", url) + + + + +def IsSimpleXServerValid(url: str) -> bool: + url = url.strip() + try: + if url.startswith(('smp://', 'xftp://')): + # Remove the protocol part + proless = url.split('//', 1)[-1] + # Split the fingerprint and hostname + parts = proless.split('@') + if len(parts) != 2: + return False # Must have exactly one '@' character + + fingerprint = parts[0] + hostname = parts[1].split(',')[0] # Get the hostname before any comma + + # Check fingerprint length and pattern + if len(fingerprint) == 44 and pattern.match(fingerprint): + # Validate the hostname + result = IsUrlValid(hostname) + if result: + # Check for an optional comma and a valid onion domain + if ',' in proless: + onion_part = proless.split(',')[1].strip() + if not hostname_pattern.match(onion_part): + return False + return True + return False + except Exception as e: + # Any error will be a false + return False if __name__ == '__main__': ### SHOULD RETURN TRUE: ### - link = 'smp://BD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI=@b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion' + #link = 'smp://BD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI=@b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion' #link = 'smp://KO7hwMqeal3RmpOmt_1xGRwWh723vbDMmuyYxC6tfKM=@smp1.taurix.net' #link = 'smp://PN7-uqLBToqlf1NxHEaiL35lV2vBpXq8Nj8BW11bU48=@smp6.simplexonflux.com' #link = 'smp://OQuEA1D0jtlUDVALyVGWMr8LcMDan6g1CN_d23y2cTI=@65.109.174.146' @@ -37,13 +131,14 @@ if __name__ == '__main__': #link = 'xftp://Rh19D5e4Eez37DEE9hAlXDB3gZa1BdFYJTPgJWPO9OI=@xftp5.simplexonflux.com' #link = 'xftp://__t00f17zicHnk2E8n5-AI-YYxQB5sWCY2oYw2m9ZUg=@xftp2.adminforge.de:4433' #link = 'xftp://9bkubN5akZbxDn48tXed-DoZd_fiSQEiIfn0u3M81LQ=@xftp2.asriyan.me:15510' - IsSimpleXServerValid(link) + ### ALL TESTS PASSED ### + #print(IsSimpleXServerValid(link)) ### SHOULD RETURN TRUE: ### - link = 'https://simplex.chat/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion%2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D' + #link = 'https://simplex.chat/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion%2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D' #link = 'simplex:/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion%2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D' #link = 'https://simplex.chat/contact#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D' - #link = 'https://simplex.hackliberty.org/contact/#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D' + link = 'https://simplex.hackliberty.org/contact/#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D' #link = 'http://b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion/contact/#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D' - IsSimpleXChatroomValid(link) + print(IsSimpleXChatroomValid(link)) diff --git a/SimpleX/utils.py b/SimpleX/utils.py new file mode 100644 index 0000000..c704dcb --- /dev/null +++ b/SimpleX/utils.py @@ -0,0 +1,54 @@ +import re + +def IsOnionValid(url: str)-> bool: + """ + Checks if the domain(param) is a valid onion domain and return True else False. + """ + try: + pattern = re.compile("^[A-Za-z0-9.]+(.onion)?$") + url = url.strip().removesuffix('/') + if url.startswith('http://'): + domain = url.split('/')[2] + if pattern.fullmatch(domain) is not None: + if len(domain.split('.')) > 3: + return False + else: + if len(domain) < 62: + return False + return True + elif pattern.fullmatch(domain) is None: + return False + else: + return False + else: + #TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false) + if pattern.fullmatch(url) is not None: + if len(url.split('.')) > 3: + return False + else: + if len(url) < 62: + return False + return True + elif pattern.fullmatch(url) is None: + return False + else: + return False + except Exception as e: + return False + +def IsUrlValid(url:str)->bool: + """ + Check if url is valid both dark net end clearnet. + """ + pattern = re.compile("^[A-Za-z0-9:/.-]+$") + url = str(url) + if len(url) < 4: + return False + if url.endswith('.onion'): + return IsOnionValid(url) + else: + if not url.__contains__('.'): + return False + if pattern.fullmatch(url) is None: + return False + return True \ No newline at end of file