darknet-lantern/SimpleX/regex_simplexlinks.py

131 lines
7.8 KiB
Python

import re
from SimpleX.utils import IsUrlValid
import urllib.parse
#simplex:/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40 b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion %2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D
#simplex:/contact#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40 smp5.simplex.im %2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D
#TODO: 1) check if it starts with http:// or https:// OR simplex:/
#TODO: 1.5) if http:// or https:// check if it has a valid clearnet or onion domain (else if simplex:/ then no need for the domain check)
#TODO: 2) check if AFTER it has the "contact#/?v=2-7&smp=smp%3A%2F%2F[...]%3D%40" string in it
#TODO: 3) check if AFTER it has a valid hostname (can be an ip, a clearnet domain or an onion domain)
#TODO: 4) check if AFTER it has the %2F[...]%3D%3D%22%7D
#TODO: only if all is OK up until 4 (or 5) return True, else False
# Regular expression pattern for a valid hostname (can be an IP, domain, or onion)
pattern = re.compile('[0-9A-Za-z-_]*')
# Regular expression pattern for a valid hostname (can be an IP, domain, or onion)
hostname_pattern = re.compile(r'^(?:[a-zA-Z0-9.-]+|[0-9]{1,3}(?:\.[0-9]{1,3}){3}|[a-zA-Z0-9-]+\.onion)$')
def IsSimpleXChatroomValid(url: str) -> bool:
"""Validate the SimpleX chatroom URL."""
REQUIRED_SUBSTRING = "#/?v=2-7&smp=smp%3A%2F"
# Step 1: Check if it starts with http://, https://, or simplex:/
if url.startswith(('http://', 'https://', 'simplex:/')):
# Step 1.5: If http:// or https://, check for valid clearnet or onion domain
if url.startswith(('http://', 'https://')) and not IsUrlValid(url):
return False
elif not url.startswith('simplex:/'):
return False # Must start with one of the valid protocols
# Step 2: Check for the presence of the required substring
if REQUIRED_SUBSTRING not in url:
return False # Required substring not found
# Step 3: Extract the part after "smp=smp%3A%2F"
smp_start = url.find("smp=smp%3A%2F")
if smp_start == -1:
return False # Required substring not found
smp_start += len("smp=smp%3A%2F")
smp_end = url.find("&", smp_start)
if smp_end == -1:
smp_end = len(url) # Take until the end if no "&" is found
smp_value = urllib.parse.unquote(url[smp_start:smp_end]) # Decode the URL-encoded string
# Step 3.5: Check if the smp_value contains a valid hostname
if '@' not in smp_value:
return False # Must contain '@' to separate fingerprint and hostname
fingerprint, hostname = smp_value.split('@', 1)
if not IsUrlValid(hostname):
return False # Invalid hostname
# Step 4: Check for the presence of "%2F" in the original URL
if "%2F" not in url:
return False # Required substring not found
# If all checks pass, return True
return True
#xftp://fingerprint:password@host1,host2
# format 1 : smp://[:]@<public_hostname>[,<onion_hostname>]
# format 2 : xftp://[:]@<public_hostname>[,<onion_hostname>]
#xftp://Rh19D5e4Eez37DEE9hAlXDB3gZa1BdFYJTPgJWPO9OI=@xftp5.simplexonflux.com
#TODO: 1) check if it starts with smp:// OR xftp://
#TODO: 2) check if AFTER it has the 44 char fingerprint (with the following chars a-zA-Z0-9-_ (base64 format?))
#TODO: 3) check if AFTER it has the @ string in it
# the try block checks for that
#TODO: 4) check if AFTER it has a valid domain (can be an ip, a clearnet domain or an onion domain)
#TODO: 5) ONLY IF THERE IS A COMMA AFTER, check if it has a valid onion domain
#TODO: only if all is OK up until 4 (or 5) return True, else False
def IsSimpleXServerValid(url: str) -> bool:
url = url.strip()
try:
if url.startswith(('smp://', 'xftp://')):
# Remove the protocol part
proless = url.split('//', 1)[-1]
# Split the fingerprint and hostname
parts = proless.split('@')
if len(parts) != 2:
return False # Must have exactly one '@' character
fingerprint = parts[0]
hostname = parts[1].split(',')[0] # Get the hostname before any comma
# Check fingerprint length and pattern
if len(fingerprint) == 44 and pattern.match(fingerprint):
# Validate the hostname
result = IsUrlValid(hostname)
if result:
# Check for an optional comma and a valid onion domain
if ',' in proless:
onion_part = proless.split(',')[1].strip()
if not hostname_pattern.match(onion_part):
return False
return True
return False
except Exception as e:
# Any error will be a false
return False
if __name__ == '__main__':
### SHOULD RETURN TRUE: ###
#link = 'smp://BD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI=@b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion'
#link = 'smp://KO7hwMqeal3RmpOmt_1xGRwWh723vbDMmuyYxC6tfKM=@smp1.taurix.net'
#link = 'smp://PN7-uqLBToqlf1NxHEaiL35lV2vBpXq8Nj8BW11bU48=@smp6.simplexonflux.com'
#link = 'smp://OQuEA1D0jtlUDVALyVGWMr8LcMDan6g1CN_d23y2cTI=@65.109.174.146'
#link = 'xftp://emX7ForsbdpIscNiDZ6b0HTbfFUayn00C1wmeVTofYA=@wg54vc6p3dscshywvt2wninachqoarrodtunapds7t7p47sn5e3qonid.onion:5233'
#link = 'xftp://Rh19D5e4Eez37DEE9hAlXDB3gZa1BdFYJTPgJWPO9OI=@xftp5.simplexonflux.com'
#link = 'xftp://__t00f17zicHnk2E8n5-AI-YYxQB5sWCY2oYw2m9ZUg=@xftp2.adminforge.de:4433'
#link = 'xftp://9bkubN5akZbxDn48tXed-DoZd_fiSQEiIfn0u3M81LQ=@xftp2.asriyan.me:15510'
### ALL TESTS PASSED ###
#print(IsSimpleXServerValid(link))
### SHOULD RETURN TRUE: ###
#link = 'https://simplex.chat/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion%2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D'
#link = 'simplex:/contact#/?v=2-7&smp=smp%3A%2F%2FBD4qkVq8lJUgjHt0kUaxeQBYsKaxDejeecxm6-2vOwI%3D%40b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion%2FOReK0M4-3C5NeZyQx_yFuTHSknVVS-3h%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEANi5VHx-Q1mIKmgZEg2ls47NGSlntttvcgLLbfKBpym4%253D&data=%7B%22groupLinkId%22%3A%22ndniy85i4DjITgVhB-MXnQ%3D%3D%22%7D'
#link = 'https://simplex.chat/contact#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D'
#link = 'https://simplex.hackliberty.org/contact/#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D'
#link = 'http://b6geeakpwskovltbesvy3b6ah3ewxfmnhnshojndmpp7wcv2df7bnead.onion/contact/#/?v=2-7&smp=smp%3A%2F%2Fhpq7_4gGJiilmz5Rf-CswuU5kZGkm_zOIooSw6yALRg%3D%40smp5.simplex.im%2F2KNui9H8xxaPTuHAsQzJlfLmz_SOMsFk%23%2F%3Fv%3D1-3%26dh%3DMCowBQYDK2VuAyEA8BPETTg3ooyvQ1LiMGeCFbh2MeK9NyubT6NLE8EJzyA%253D%26srv%3Djjbyvoemxysm7qxap7m5d5m35jzv5qq6gnlv7s4rsn7tdwwmuqciwpid.onion&data=%7B%22type%22%3A%22group%22%2C%22groupLinkId%22%3A%22wNDKKQR3EW8Y045FsVofSg%3D%3D%22%7D'
print(IsSimpleXChatroomValid(link))