[untested] RecognizeURLType and IsURLValid functions are working

This commit is contained in:
oxeo0 2025-05-30 00:21:20 +02:00
parent 08697f5c40
commit c9a2fbcfdd
5 changed files with 179 additions and 320 deletions

View file

@ -8,7 +8,7 @@ import requests
import json
import pandas as pd
import glob
from utils import IsSimpleXServerValid, send_server_checks
from utils import RecognizeURLType, IsOnionLinkValid, send_server_checks
@ -46,8 +46,8 @@ def main():
with open(urlpath) as f:
instance = f.read().rstrip()
# check if the instance URL domain is valid
if IsOnionValid(instance):
print("[+] Instance Name:",instance,IsOnionValid(instance))
if IsOnionLinkValid(instance):
print("[+] Instance Name:",instance,IsOnionLinkValid(instance))
isitvalid="y"
else:
print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
@ -88,42 +88,41 @@ def main():
index2 = url.find("https://")
if url.startswith("smp://") or url.startswith("xftp://"):
if IsSimpleXServerValid(url):
if url.startswith("smp"):
resp,resp_type,failed_response = send_server_checks(url)
if resp_type in ["chatError", "contactSubSummary"]:
resp, resp_type,failed_response = send_server_checks(url)
if RecognizeURLType(url) == 'smp':
resp,resp_type,failed_response = send_server_checks(url)
if failed_response is None:
print(url, "✔️")
df.at[i, "Status"]="YES"
if df.at[i, "Score"] < 100:
df.at[i,"Score"] = df.at[i,"Score"] + 1
else:
print(url,"")
df.at[i,"Status"]="NO"
#if uptime >0 do -1 to the value
if df.at[i,"Score"] > 0:
df.at[i,"Score"] = df.at[i,"Score"] - 1
if resp_type in ["chatError", "contactSubSummary"]:
resp, resp_type,failed_response = send_server_checks(url)
if failed_response is None:
print(url, "✔️")
df.at[i, "Status"]="YES"
if df.at[i, "Score"] < 100:
df.at[i,"Score"] = df.at[i,"Score"] + 1
else:
resp,resp_type,failed_response = send_server_checks(url)
if resp_type in ["chatError", "contactSubSummary"]:
resp, resp_type,failed_response = send_server_checks(url)
if failed_response is None:
print(url, "✔️")
df.at[i, "Status"]="YES"
if df.at[i, "Score"] < 100:
df.at[i,"Score"] = df.at[i,"Score"] + 1
else:
print(url,"")
df.at[i,"Status"]="NO"
#if uptime >0 do -1 to the value
if df.at[i,"Score"] > 0:
df.at[i,"Score"] = df.at[i,"Score"] - 1
print(url,"")
df.at[i,"Status"]="NO"
#if uptime >0 do -1 to the value
if df.at[i,"Score"] > 0:
df.at[i,"Score"] = df.at[i,"Score"] - 1
elif RecognizeURLType(url) == 'xftp':
resp,resp_type,failed_response = send_server_checks(url)
if resp_type in ["chatError", "contactSubSummary"]:
resp, resp_type,failed_response = send_server_checks(url)
if failed_response is None:
print(url, "✔️")
df.at[i, "Status"]="YES"
if df.at[i, "Score"] < 100:
df.at[i,"Score"] = df.at[i,"Score"] + 1
else:
print(url,"")
df.at[i,"Status"]="NO"
#if uptime >0 do -1 to the value
if df.at[i,"Score"] > 0:
df.at[i,"Score"] = df.at[i,"Score"] - 1
else:
@ -173,80 +172,6 @@ def main():
#print(df2)
df2.to_csv(csvfile, index=False)
def IsUrlValid(url:str)->bool:
"""
Check if url is valid both dark net end clearnet.
"""
# check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
# check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
# if OK return True
#if not : return False
pattern = re.compile("^[A-Za-z0-9:/.]+$")
url = str(url)
if url.endswith('.onion'):
return IsOnionValid(url)
else:
if not url.__contains__('.'):
#print("No (DOT) in clearnet url")
return False
if pattern.fullmatch(url) is None:
#print('Url contains invalid chars')
return False
return True
def IsOnionValid(url: str)-> bool:
"""
Checks if the domain(param) is a valid onion domain and return True else False.
"""
# check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
# check that it is only url.onion or subdomain.url.onion,
# if OK return True
#if not : return False
try:
pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
url = url.strip().removesuffix('/')
if url.startswith('http://'):
#print('URL starts with http')
# Removes the http://
domain = url.split('/')[2]
if pattern.fullmatch(domain) is not None:
if len(domain.split('.')) > 3:
n_subdomians = len(domain.split('.'))
# Checks if there is more than 1 subdomain. "subdomain.url.onion" only
#print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
return False
else:
if len(domain) < 62:
#print("Domain length is less than 62.")
return False
return True
elif pattern.fullmatch(domain) is None:
#print("Domain contains invalid character.")
#print(domain)
return False
else:
#print("Domain not valid")
return False
else:
#TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
#print("URL doesn't start http")
if pattern.fullmatch(url) is not None:
if len(url.split('.')) > 3:
n_subdomians = len(url.split('.'))
# Checks if there is more than 1 subdomain. "subdomain.url.onion" only
return False
else:
if len(url) < 62:
return False
return True
elif pattern.fullmatch(url) is None:
return False
else:
return False
except Exception as e:
print(f"Error: {e}")
if __name__ == '__main__':
main()