mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 22:16:41 +00:00
[untested] RecognizeURLType and IsURLValid functions are working
This commit is contained in:
parent
08697f5c40
commit
c9a2fbcfdd
5 changed files with 179 additions and 320 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -2,6 +2,7 @@
|
||||||
www/participants/**
|
www/participants/**
|
||||||
crawler/**
|
crawler/**
|
||||||
scripts/__pycache__/**
|
scripts/__pycache__/**
|
||||||
|
scripts/_*.py
|
||||||
.env
|
.env
|
||||||
env/
|
env/
|
||||||
submissions/submission.csv
|
submissions/submission.csv
|
||||||
|
|
|
@ -11,8 +11,10 @@ import re
|
||||||
import sys
|
import sys
|
||||||
sys.path.append("..")
|
sys.path.append("..")
|
||||||
|
|
||||||
from utils import print_colors, IsUrlValid
|
from utils import (
|
||||||
from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
|
print_colors, IsURLValid, IsSimplexChatroomValid, RecognizeURLType
|
||||||
|
)
|
||||||
|
#from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# Make default parameters for arguments
|
# Make default parameters for arguments
|
||||||
|
@ -107,12 +109,12 @@ def add_urls(urls):
|
||||||
global output_file
|
global output_file
|
||||||
for url in urls:
|
for url in urls:
|
||||||
parsed_url = urllib.parse.urlparse(url)
|
parsed_url = urllib.parse.urlparse(url)
|
||||||
if IsSimpleXChatroomValid(url) and not (output_file['URL'] == url).any():
|
if IsSimplexChatroomValid(url) and not (output_file['URL'] == url).any():
|
||||||
output_file.loc[-1] = ["", url, "", "SimpleX Chatroom"]
|
output_file.loc[-1] = ["", url, "", "SimpleX Chatroom"]
|
||||||
output_file.index += 1
|
output_file.index += 1
|
||||||
output_file = output_file.sort_index()
|
output_file = output_file.sort_index()
|
||||||
continue
|
continue
|
||||||
elif IsSimpleXServerValid(url) and not (output_file['URL'] == url).any():
|
elif RecognizeURLType(url) in ('smp', 'xftp') and not (output_file['URL'] == url).any():
|
||||||
output_file.loc[-1] = ["", url, "", "SimpleX Server"]
|
output_file.loc[-1] = ["", url, "", "SimpleX Server"]
|
||||||
output_file.index += 1
|
output_file.index += 1
|
||||||
output_file = output_file.sort_index()
|
output_file = output_file.sort_index()
|
||||||
|
@ -164,13 +166,13 @@ def extract_urls_html(url, text):
|
||||||
print_colors(f'[D] Joined URL: {joined_url}')
|
print_colors(f'[D] Joined URL: {joined_url}')
|
||||||
|
|
||||||
# Capture SimpleX URLs
|
# Capture SimpleX URLs
|
||||||
if IsSimpleXChatroomValid(joined_url) or IsSimpleXServerValid(joined_url):
|
if RecognizeURLType(joined_url) in ('smp', 'xftp', 'chatroom'):
|
||||||
if url not in result.thirdp_urls:
|
if url not in result.thirdp_urls:
|
||||||
result.thirdp_urls.append(joined_url)
|
result.thirdp_urls.append(joined_url)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check if the URL is a .onion link or not even a web link
|
# Check if the URL is a .onion link or not even a web link
|
||||||
if not IsUrlValid(joined_url):
|
if not IsURLValid(joined_url):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print_colors(f'[+] Found url: {joined_url}')
|
print_colors(f'[+] Found url: {joined_url}')
|
||||||
|
@ -266,4 +268,3 @@ for i, url in enumerate(vcsv_urls):
|
||||||
crawl_url(url)
|
crawl_url(url)
|
||||||
crawler_file.to_csv(args.crawler_file, index=False)
|
crawler_file.to_csv(args.crawler_file, index=False)
|
||||||
output_file.to_csv(args.output, index=False)
|
output_file.to_csv(args.output, index=False)
|
||||||
|
|
||||||
|
|
|
@ -50,9 +50,6 @@ def main():
|
||||||
os.makedirs(participantdir)
|
os.makedirs(participantdir)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print_colors("""
|
print_colors("""
|
||||||
;
|
;
|
||||||
ED.
|
ED.
|
||||||
|
@ -94,8 +91,8 @@ def main():
|
||||||
if os.path.isfile(urlpath):
|
if os.path.isfile(urlpath):
|
||||||
with open(urlpath) as f:
|
with open(urlpath) as f:
|
||||||
instance = f.read().rstrip()
|
instance = f.read().rstrip()
|
||||||
if IsOnionValid(instance):
|
if IsOnionLinkValid(instance):
|
||||||
print_colors(f"[+] Instance Name: {instance}. Valid:{IsOnionValid(instance)}")
|
print_colors(f"[+] Instance Name: {instance}. Valid:{IsOnionLinkValid(instance)}")
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}',is_error=True )
|
print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}',is_error=True )
|
||||||
|
@ -104,8 +101,8 @@ def main():
|
||||||
print_colors("[+] Instance Path doesn't exist yet")
|
print_colors("[+] Instance Path doesn't exist yet")
|
||||||
print_colors(f"Your url will be saved here {urlpath}")
|
print_colors(f"Your url will be saved here {urlpath}")
|
||||||
instance = input("What is your Instance domain?(ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion): ")
|
instance = input("What is your Instance domain?(ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion): ")
|
||||||
if IsOnionValid(instance):
|
if IsOnionLinkValid(instance):
|
||||||
print_colors(f"[+] Instance Name: {instance}. Valid: {IsUrlValid(instance)}")
|
print_colors(f"[+] Instance Name: {instance}. Valid: {IsOnionLinkValid(instance)}")
|
||||||
instancepath=rootpath+'www/participants/'+instance
|
instancepath=rootpath+'www/participants/'+instance
|
||||||
else:
|
else:
|
||||||
print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}', is_error=True )
|
print_colors(f'[-] Invalid instance name in ~/.darknet_participant_url: {instance}', is_error=True )
|
||||||
|
@ -212,9 +209,9 @@ Maintenance:
|
||||||
while(IsCategoryValid(category) is not True):
|
while(IsCategoryValid(category) is not True):
|
||||||
category = input("What is the website Category? ")
|
category = input("What is the website Category? ")
|
||||||
# the url of the website (required) + check if its valid
|
# the url of the website (required) + check if its valid
|
||||||
url=''
|
url = ''
|
||||||
while(IsUrlValid(url) is not True and IsSimpleXChatroomValid(url) is not True):
|
while not IsURLValid(url):
|
||||||
url=input("What is the website URL ? ")
|
url = input("What is the website URL ? ")
|
||||||
|
|
||||||
# a quick description (optional) + check if its valid
|
# a quick description (optional) + check if its valid
|
||||||
desc='DEFAULT'
|
desc='DEFAULT'
|
||||||
|
@ -458,7 +455,7 @@ Maintenance:
|
||||||
value = input("What is the new name of the website? ")
|
value = input("What is the new name of the website? ")
|
||||||
vdf.at[index,'Name']=value
|
vdf.at[index,'Name']=value
|
||||||
elif i == 3: # column URL
|
elif i == 3: # column URL
|
||||||
while(IsUrlValid(value) is not True or value == ''):
|
while(IsURLValid(value) is not True or value == ''):
|
||||||
value = input("What is the new URL of the website? ")
|
value = input("What is the new URL of the website? ")
|
||||||
vdf.at[index,'URL']=value
|
vdf.at[index,'URL']=value
|
||||||
elif i == 4: # column Sensitive
|
elif i == 4: # column Sensitive
|
||||||
|
@ -504,7 +501,7 @@ Maintenance:
|
||||||
value = input("What is the new name of the website? ")
|
value = input("What is the new name of the website? ")
|
||||||
uvdf.at[index,'Name']=value
|
uvdf.at[index,'Name']=value
|
||||||
elif i == 3: # column URL
|
elif i == 3: # column URL
|
||||||
while(IsUrlValid(value) is not True or value == ''):
|
while(IsURLValid(value) is not True or value == ''):
|
||||||
value = input("What is the new URL of the website? ")
|
value = input("What is the new URL of the website? ")
|
||||||
uvdf.at[index,'URL']=value
|
uvdf.at[index,'URL']=value
|
||||||
elif i == 4: # column Sensitive
|
elif i == 4: # column Sensitive
|
||||||
|
@ -656,7 +653,7 @@ Maintenance:
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
csvdf.to_csv(csvfilepath, index=False)
|
||||||
|
|
||||||
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
||||||
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
||||||
#mark the row for deletion as it has invalid inputs
|
#mark the row for deletion as it has invalid inputs
|
||||||
if i not in rows2delete:
|
if i not in rows2delete:
|
||||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
||||||
|
@ -788,7 +785,7 @@ Maintenance:
|
||||||
case 5:
|
case 5:
|
||||||
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
||||||
webring_participant_url = ''
|
webring_participant_url = ''
|
||||||
while(IsOnionValid(webring_participant_url) is not True):
|
while(IsOnionLinkValid(webring_participant_url) is not True):
|
||||||
webring_participant_url = input("What is the onion domain of the new webring participant? (ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion) ")
|
webring_participant_url = input("What is the onion domain of the new webring participant? (ex: lantern.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion) ")
|
||||||
participantdir=rootpath+'www/participants/'+webring_participant_url
|
participantdir=rootpath+'www/participants/'+webring_participant_url
|
||||||
if os.path.isdir(participantdir):
|
if os.path.isdir(participantdir):
|
||||||
|
@ -892,7 +889,7 @@ Maintenance:
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
csvdf.to_csv(csvfilepath, index=False)
|
||||||
|
|
||||||
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
||||||
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
||||||
#mark the row for deletion as it has invalid inputs
|
#mark the row for deletion as it has invalid inputs
|
||||||
if i not in rows2delete:
|
if i not in rows2delete:
|
||||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
||||||
|
@ -1072,7 +1069,7 @@ Maintenance:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print_colors("[+] checking if the Word/URL is valid: ")
|
print_colors("[+] checking if the Word/URL is valid: ")
|
||||||
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
if IsURLValid(word) or IsDescriptionValid(word):
|
||||||
print_colors('[+] Word/URL is valid, adding the word into the sensitive wordlist')
|
print_colors('[+] Word/URL is valid, adding the word into the sensitive wordlist')
|
||||||
newrow=[word]
|
newrow=[word]
|
||||||
print_colors(f"[+] NEWROW= {newrow}")
|
print_colors(f"[+] NEWROW= {newrow}")
|
||||||
|
@ -1141,7 +1138,7 @@ Maintenance:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print_colors("[+] Checking if the Word/URL is valid: ")
|
print_colors("[+] Checking if the Word/URL is valid: ")
|
||||||
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
if IsURLValid(word) or IsDescriptionValid(word):
|
||||||
print_colors('[+] Word/URL is valid, adding the word into the blacklist')
|
print_colors('[+] Word/URL is valid, adding the word into the blacklist')
|
||||||
newrow=[word]
|
newrow=[word]
|
||||||
print_colors(f"[+] NEWROW= {newrow}")
|
print_colors(f"[+] NEWROW= {newrow}")
|
||||||
|
@ -1254,11 +1251,11 @@ Maintenance:
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
csvdf.to_csv(csvfilepath, index=False)
|
||||||
|
|
||||||
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
||||||
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
||||||
if i not in rows2delete:
|
if i not in rows2delete:
|
||||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
||||||
#print_colors(f"{row}")
|
#print_colors(f"{row}")
|
||||||
print(IsUrlValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsUrlValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']), IsScoreValid(csvdf.at[i, 'Score']))
|
print(IsURLValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsURLValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']), IsScoreValid(csvdf.at[i, 'Score']))
|
||||||
rows2delete.append(i)
|
rows2delete.append(i)
|
||||||
read=input("Continue?")
|
read=input("Continue?")
|
||||||
|
|
||||||
|
@ -1360,9 +1357,6 @@ Maintenance:
|
||||||
print_colors("Invalid Number",is_error=True)
|
print_colors("Invalid Number",is_error=True)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_colors(f'Try again {e}',is_error=True)
|
print_colors(f'Try again {e}',is_error=True)
|
||||||
break
|
break
|
||||||
|
@ -1371,7 +1365,6 @@ Maintenance:
|
||||||
print_colors("No more submissions to review, exiting.")
|
print_colors("No more submissions to review, exiting.")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
case 12:
|
case 12:
|
||||||
# review the crawled websites
|
# review the crawled websites
|
||||||
try:
|
try:
|
||||||
|
@ -1459,12 +1452,12 @@ Maintenance:
|
||||||
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
||||||
|
|
||||||
elif number == 3:
|
elif number == 3:
|
||||||
# Delete from crawled_onion.csv
|
# Delete from crawled_onion.csv
|
||||||
crawled_df.drop(index=i,inplace=True)
|
crawled_df.drop(index=i,inplace=True)
|
||||||
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
||||||
|
|
||||||
elif number == 4:
|
elif number == 4:
|
||||||
# Add to blacklist.csv
|
# Add to blacklist.csv
|
||||||
newrow=[link]
|
newrow=[link]
|
||||||
|
|
||||||
blacklist_df.loc[-1] = newrow # adding a row
|
blacklist_df.loc[-1] = newrow # adding a row
|
||||||
|
@ -1482,15 +1475,10 @@ Maintenance:
|
||||||
print_colors("Invalid Number",is_error=True)
|
print_colors("Invalid Number",is_error=True)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_colors(f'Try again {e}',is_error=True)
|
print_colors(f'Try again {e}',is_error=True)
|
||||||
break
|
break
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
print_colors("No more crawled websites to review, exiting.")
|
print_colors("No more crawled websites to review, exiting.")
|
||||||
break
|
break
|
||||||
|
|
|
@ -8,7 +8,7 @@ import requests
|
||||||
import json
|
import json
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import glob
|
import glob
|
||||||
from utils import IsSimpleXServerValid, send_server_checks
|
from utils import RecognizeURLType, IsOnionLinkValid, send_server_checks
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,8 +46,8 @@ def main():
|
||||||
with open(urlpath) as f:
|
with open(urlpath) as f:
|
||||||
instance = f.read().rstrip()
|
instance = f.read().rstrip()
|
||||||
# check if the instance URL domain is valid
|
# check if the instance URL domain is valid
|
||||||
if IsOnionValid(instance):
|
if IsOnionLinkValid(instance):
|
||||||
print("[+] Instance Name:",instance,IsOnionValid(instance))
|
print("[+] Instance Name:",instance,IsOnionLinkValid(instance))
|
||||||
isitvalid="y"
|
isitvalid="y"
|
||||||
else:
|
else:
|
||||||
print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
|
print('[-] Invalid instance name in ~/.darknet_participant_url:', instance)
|
||||||
|
@ -88,42 +88,41 @@ def main():
|
||||||
index2 = url.find("https://")
|
index2 = url.find("https://")
|
||||||
|
|
||||||
if url.startswith("smp://") or url.startswith("xftp://"):
|
if url.startswith("smp://") or url.startswith("xftp://"):
|
||||||
if IsSimpleXServerValid(url):
|
if RecognizeURLType(url) == 'smp':
|
||||||
if url.startswith("smp"):
|
resp,resp_type,failed_response = send_server_checks(url)
|
||||||
resp,resp_type,failed_response = send_server_checks(url)
|
|
||||||
|
|
||||||
if resp_type in ["chatError", "contactSubSummary"]:
|
|
||||||
resp, resp_type,failed_response = send_server_checks(url)
|
|
||||||
|
|
||||||
if failed_response is None:
|
if resp_type in ["chatError", "contactSubSummary"]:
|
||||||
print(url, "✔️")
|
resp, resp_type,failed_response = send_server_checks(url)
|
||||||
df.at[i, "Status"]="YES"
|
|
||||||
if df.at[i, "Score"] < 100:
|
if failed_response is None:
|
||||||
df.at[i,"Score"] = df.at[i,"Score"] + 1
|
print(url, "✔️")
|
||||||
else:
|
df.at[i, "Status"]="YES"
|
||||||
print(url,"❌")
|
if df.at[i, "Score"] < 100:
|
||||||
df.at[i,"Status"]="NO"
|
df.at[i,"Score"] = df.at[i,"Score"] + 1
|
||||||
#if uptime >0 do -1 to the value
|
|
||||||
if df.at[i,"Score"] > 0:
|
|
||||||
df.at[i,"Score"] = df.at[i,"Score"] - 1
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
resp,resp_type,failed_response = send_server_checks(url)
|
print(url,"❌")
|
||||||
|
df.at[i,"Status"]="NO"
|
||||||
if resp_type in ["chatError", "contactSubSummary"]:
|
#if uptime >0 do -1 to the value
|
||||||
resp, resp_type,failed_response = send_server_checks(url)
|
if df.at[i,"Score"] > 0:
|
||||||
|
df.at[i,"Score"] = df.at[i,"Score"] - 1
|
||||||
if failed_response is None:
|
|
||||||
print(url, "✔️")
|
elif RecognizeURLType(url) == 'xftp':
|
||||||
df.at[i, "Status"]="YES"
|
resp,resp_type,failed_response = send_server_checks(url)
|
||||||
if df.at[i, "Score"] < 100:
|
|
||||||
df.at[i,"Score"] = df.at[i,"Score"] + 1
|
if resp_type in ["chatError", "contactSubSummary"]:
|
||||||
else:
|
resp, resp_type,failed_response = send_server_checks(url)
|
||||||
print(url,"❌")
|
|
||||||
df.at[i,"Status"]="NO"
|
if failed_response is None:
|
||||||
#if uptime >0 do -1 to the value
|
print(url, "✔️")
|
||||||
if df.at[i,"Score"] > 0:
|
df.at[i, "Status"]="YES"
|
||||||
df.at[i,"Score"] = df.at[i,"Score"] - 1
|
if df.at[i, "Score"] < 100:
|
||||||
|
df.at[i,"Score"] = df.at[i,"Score"] + 1
|
||||||
|
else:
|
||||||
|
print(url,"❌")
|
||||||
|
df.at[i,"Status"]="NO"
|
||||||
|
#if uptime >0 do -1 to the value
|
||||||
|
if df.at[i,"Score"] > 0:
|
||||||
|
df.at[i,"Score"] = df.at[i,"Score"] - 1
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -173,80 +172,6 @@ def main():
|
||||||
#print(df2)
|
#print(df2)
|
||||||
df2.to_csv(csvfile, index=False)
|
df2.to_csv(csvfile, index=False)
|
||||||
|
|
||||||
|
|
||||||
def IsUrlValid(url:str)->bool:
|
|
||||||
"""
|
|
||||||
Check if url is valid both dark net end clearnet.
|
|
||||||
"""
|
|
||||||
# check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
|
|
||||||
# check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
|
|
||||||
# if OK return True
|
|
||||||
#if not : return False
|
|
||||||
pattern = re.compile("^[A-Za-z0-9:/.]+$")
|
|
||||||
url = str(url)
|
|
||||||
if url.endswith('.onion'):
|
|
||||||
return IsOnionValid(url)
|
|
||||||
else:
|
|
||||||
if not url.__contains__('.'):
|
|
||||||
#print("No (DOT) in clearnet url")
|
|
||||||
return False
|
|
||||||
if pattern.fullmatch(url) is None:
|
|
||||||
#print('Url contains invalid chars')
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def IsOnionValid(url: str)-> bool:
|
|
||||||
"""
|
|
||||||
Checks if the domain(param) is a valid onion domain and return True else False.
|
|
||||||
"""
|
|
||||||
# check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
|
|
||||||
# check that it is only url.onion or subdomain.url.onion,
|
|
||||||
# if OK return True
|
|
||||||
#if not : return False
|
|
||||||
try:
|
|
||||||
pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
|
|
||||||
url = url.strip().removesuffix('/')
|
|
||||||
if url.startswith('http://'):
|
|
||||||
#print('URL starts with http')
|
|
||||||
# Removes the http://
|
|
||||||
domain = url.split('/')[2]
|
|
||||||
if pattern.fullmatch(domain) is not None:
|
|
||||||
if len(domain.split('.')) > 3:
|
|
||||||
n_subdomians = len(domain.split('.'))
|
|
||||||
# Checks if there is more than 1 subdomain. "subdomain.url.onion" only
|
|
||||||
#print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
if len(domain) < 62:
|
|
||||||
#print("Domain length is less than 62.")
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
elif pattern.fullmatch(domain) is None:
|
|
||||||
#print("Domain contains invalid character.")
|
|
||||||
#print(domain)
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
#print("Domain not valid")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
#TODO : edit the url to make sure it has http:// at the beginning, in case if it's missing? (problem is that it only returns true or false)
|
|
||||||
#print("URL doesn't start http")
|
|
||||||
if pattern.fullmatch(url) is not None:
|
|
||||||
if len(url.split('.')) > 3:
|
|
||||||
n_subdomians = len(url.split('.'))
|
|
||||||
# Checks if there is more than 1 subdomain. "subdomain.url.onion" only
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
if len(url) < 62:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
elif pattern.fullmatch(url) is None:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
284
scripts/utils.py
284
scripts/utils.py
|
@ -18,28 +18,108 @@ RESET = '\033[m'
|
||||||
# name should contain only up to 64 alphanumeric characters
|
# name should contain only up to 64 alphanumeric characters
|
||||||
VALID_NAME_PATTERN = re.compile(r"^[A-Za-z0-9]{1,64}$")
|
VALID_NAME_PATTERN = re.compile(r"^[A-Za-z0-9]{1,64}$")
|
||||||
|
|
||||||
# pattern for regular urls
|
# pattern for regular urls (https://stackoverflow.com/a/3809435)
|
||||||
# TODO: this is very simplified pattern
|
CLEARNET_URL_PATTERN = re.compile(
|
||||||
URL_PATTERN = re.compile(r"^[A-Za-z0-9:\/\._%-=#?&@]+$")
|
r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]"
|
||||||
|
r"{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
|
||||||
|
)
|
||||||
|
|
||||||
# pattern for onion urls (56 bytes of base32 alphabet + .onion)
|
# pattern for onion urls (56 bytes of base32 alphabet + .onion)
|
||||||
ONION_URL_PATTERN = re.compile(r"^(\w+:)?(?:\/\/)?(\w+\.)?[a-z2-7]{56}\.onion")
|
ONION_URL_PATTERN = re.compile(
|
||||||
|
r"^https?:\/\/([a-zA-Z0-9-]+\.)*[a-z2-7-]{56}\.onion[^\s]*$"
|
||||||
|
)
|
||||||
|
|
||||||
|
# pattern for simplex chatroom links
|
||||||
|
SIMPLEX_CHATROOM_PATTERN = re.compile(
|
||||||
|
r"(?:https?:\/\/(?:simplex\.chat|[^\/]+)|simplex:)\/(?:contact|invitation)#\/\?v=[\d-]+"
|
||||||
|
r"&smp=[^&]+(?:&[^=]+=[^&]*)*(?:&data=\{[^}]*\})?"
|
||||||
|
)
|
||||||
|
|
||||||
def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False):
|
# pattern for smp or xftp simplex server ((smp|xftp):// 44 byte key @ url [:port])
|
||||||
|
SIMPLEX_SERVER_PATTERN = re.compile(
|
||||||
|
r"^(smp|xftp):\/\/([a-zA-Z0-9\-_+=]{44})@([a-z2-7]{56}\.onion|"
|
||||||
|
r"([a-zA-Z0-9\-\.]+\.[a-zA-Z0-9\-\.]+))"
|
||||||
|
r"{1,}(?::[1-9][0-9]{0,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|"
|
||||||
|
r"65[0-4][0-9]{2}|655[0-3][0-9]|6553[0-5])?$"
|
||||||
|
)
|
||||||
|
|
||||||
|
def IsSimplexChatroomValid(url: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Helper function to print with colors
|
Recognizes Simplex Chatroom link.
|
||||||
|
Returns True if URL is a SimpleX chatroom,
|
||||||
|
False otherwise
|
||||||
"""
|
"""
|
||||||
if is_error:
|
return SIMPLEX_CHATROOM_PATTERN.match(url)
|
||||||
print(f"{RED}{s}{RESET}")
|
|
||||||
elif bold:
|
def RecognizeSimplexType(url: str) -> str:
|
||||||
print(f"{BOLD_PURPLE}{s}{RESET}")
|
"""
|
||||||
elif is_error and bold:
|
Recognizes Simplex Server URL, returns smp, xftp or invalid
|
||||||
print(f"{BOLD_RED}{s}{RESET}")
|
"""
|
||||||
elif default:
|
match = SIMPLEX_SERVER_PATTERN.match(url)
|
||||||
print(f'{s}')
|
if match:
|
||||||
|
return match.group(1)
|
||||||
else:
|
else:
|
||||||
print(f"{PURPLE}{s}{RESET}")
|
return 'invalid'
|
||||||
|
|
||||||
|
# stub function
|
||||||
|
def IsXFTPServerValid(url: str) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if URL is a valid SimpleX XFTP Server URL
|
||||||
|
False otherwise
|
||||||
|
"""
|
||||||
|
return RecognizeSimplexType(url) == 'xftp'
|
||||||
|
|
||||||
|
# stub function
|
||||||
|
def IsSMPServerValid(url: str) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if URL is a valid SimpleX SMP Server URL
|
||||||
|
False otherwise
|
||||||
|
"""
|
||||||
|
return RecognizeSimplexType(url) == 'smp'
|
||||||
|
|
||||||
|
def IsClearnetLinkValid(url: str) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if URL is a valid clearnet URL
|
||||||
|
False otherwise
|
||||||
|
"""
|
||||||
|
return CLEARNET_URL_PATTERN.match(url)
|
||||||
|
|
||||||
|
def IsOnionLinkValid(url: str) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if URL is a valid onion URL
|
||||||
|
False otherwise
|
||||||
|
"""
|
||||||
|
return ONION_URL_PATTERN.match(url)
|
||||||
|
|
||||||
|
def RecognizeURLType(url: str) -> str:
|
||||||
|
"""
|
||||||
|
Recognizes URL type, can return:
|
||||||
|
- chatroom - SimpleX chatroom
|
||||||
|
- xftp - XFTP SimpleX server
|
||||||
|
- smp - SMP SimpleX server
|
||||||
|
- onion - onion URL
|
||||||
|
- clearnet - valid clearnet url
|
||||||
|
- invalid - none of the above (probably invalid)
|
||||||
|
"""
|
||||||
|
# order is important here
|
||||||
|
# (ex. simplex chatroom is also valid clearnet link)
|
||||||
|
if IsSimplexChatroomValid(url):
|
||||||
|
return 'chatroom'
|
||||||
|
if IsXFTPServerValid(url):
|
||||||
|
return 'xftp'
|
||||||
|
if IsSMPServerValid(url):
|
||||||
|
return 'smp'
|
||||||
|
if IsOnionLinkValid(url):
|
||||||
|
return 'onion'
|
||||||
|
if IsClearnetLinkValid(url):
|
||||||
|
return 'clearnet'
|
||||||
|
return 'invalid'
|
||||||
|
|
||||||
|
def IsURLValid(url: str) -> bool:
|
||||||
|
"""
|
||||||
|
Checks if given URL is valid (RecognizeURLType recognizes it)
|
||||||
|
"""
|
||||||
|
return RecognizeURLType(url) != 'invalid'
|
||||||
|
|
||||||
|
|
||||||
#### Checking Functions to validate that links are legit ####
|
#### Checking Functions to validate that links are legit ####
|
||||||
|
@ -54,7 +134,7 @@ def CheckUrl(url):
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
status = requests.get(url, proxies=proxies, timeout=5).status_code
|
status = requests.get(url, proxies=proxies, timeout=5).status_code
|
||||||
return bool(status == 200)
|
return status == 200
|
||||||
except requests.ConnectionError:
|
except requests.ConnectionError:
|
||||||
return False
|
return False
|
||||||
except requests.exceptions.ReadTimeout:
|
except requests.exceptions.ReadTimeout:
|
||||||
|
@ -82,131 +162,13 @@ def IsBannerValid(path: str) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def IsOnionValid(url: str) -> bool:
|
|
||||||
"""
|
|
||||||
Checks if the domain(param) is a valid onion domain and return True else False.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# make sure the protocol is there
|
|
||||||
if not url.startswith(('http://', 'https://')):
|
|
||||||
url = 'http://' + url.strip().removesuffix('/')
|
|
||||||
|
|
||||||
domain = url.split('/')[2]
|
|
||||||
|
|
||||||
if ONION_URL_PATTERN.fullmatch(domain):
|
|
||||||
parts_count = len(domain.split('.'))
|
|
||||||
# TODO: we probably don't really need to check 62 char length
|
|
||||||
# regex does that beforehand
|
|
||||||
return (len(domain) == 62) and (parts_count <= 3)
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def IsSimpleXChatroomValid(url: str) -> bool:
|
|
||||||
"""Validate the SimpleX chatroom URL."""
|
|
||||||
REQUIRED_SUBSTRING = "#/?v=2-7&smp=smp%3A%2F"
|
|
||||||
|
|
||||||
# Step 1: Check if it starts with http://, https://, or simplex:/
|
|
||||||
if url.startswith(('http://', 'https://', 'simplex:/')):
|
|
||||||
# Step 1.5: If http:// or https://, check for valid clearnet or onion domain
|
|
||||||
if url.startswith(('http://', 'https://')) \
|
|
||||||
and RecognizeUrlOnionClear(url) != 'invalid':
|
|
||||||
return False
|
|
||||||
elif not url.startswith('simplex:/'):
|
|
||||||
return False # Must start with one of the valid protocols
|
|
||||||
|
|
||||||
# Step 2: Check for the presence of the required substring
|
|
||||||
if REQUIRED_SUBSTRING not in url:
|
|
||||||
return False # Required substring not found
|
|
||||||
|
|
||||||
# Step 3: Extract the part after "smp=smp%3A%2F"
|
|
||||||
smp_start = url.find("smp=smp%3A%2F")
|
|
||||||
if smp_start == -1:
|
|
||||||
return False # Required substring not found
|
|
||||||
|
|
||||||
smp_start += len("smp=smp%3A%2F")
|
|
||||||
smp_end = url.find("&", smp_start)
|
|
||||||
if smp_end == -1:
|
|
||||||
smp_end = len(url) # Take until the end if no "&" is found
|
|
||||||
|
|
||||||
smp_value = urllib.parse.unquote(url[smp_start:smp_end]) # Decode the URL-encoded string
|
|
||||||
|
|
||||||
# Step 3.5: Check if the smp_value contains a valid hostname
|
|
||||||
if '@' not in smp_value:
|
|
||||||
return False # Must contain '@' to separate fingerprint and hostname
|
|
||||||
|
|
||||||
fingerprint, hostname = smp_value.split('@', 1)
|
|
||||||
if RecognizeUrlOnionClear(hostname) != 'invalid':
|
|
||||||
return False # Invalid hostname
|
|
||||||
|
|
||||||
# Step 4: Check for the presence of "%2F" in the original URL
|
|
||||||
if "%2F" not in url:
|
|
||||||
return False # Required substring not found
|
|
||||||
|
|
||||||
# If all checks pass, return True
|
|
||||||
return True
|
|
||||||
|
|
||||||
def RecognizeUrlOnionClear(url: str) -> str:
|
|
||||||
"""
|
|
||||||
Recognize if the URL is invalid, onion or clearnet.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# early terminate preconditions
|
|
||||||
if len(url) < 4 or (';' in url) or ('.' not in url):
|
|
||||||
return 'invalid'
|
|
||||||
|
|
||||||
# check if possibly onion url, here just perliminary check
|
|
||||||
# IsOnionValid checks it against regex expression
|
|
||||||
if '.onion' in url:
|
|
||||||
if IsOnionValid(url):
|
|
||||||
return 'onion'
|
|
||||||
|
|
||||||
if URL_PATTERN.fullmatch(url):
|
|
||||||
return 'clearnet'
|
|
||||||
|
|
||||||
return 'invalid'
|
|
||||||
|
|
||||||
|
|
||||||
def RecognizeUrlFull(url: str) -> str:
|
|
||||||
"""
|
|
||||||
Recognize if URL is smp, xftp, simplex groupchat, onion, clearnet or just invalid
|
|
||||||
Depends on RecognizeUrlOnionClear
|
|
||||||
"""
|
|
||||||
if IsSimpleXChatroomValid(url):
|
|
||||||
return 'chatroom'
|
|
||||||
if url.startswith(('http://', 'https://')):
|
|
||||||
return RecognizeUrlOnionClear(url)
|
|
||||||
if url.startswith('xftp://'):
|
|
||||||
if IsSimpleXServerValid(url):
|
|
||||||
return 'xftp'
|
|
||||||
if url.startswith('smp://'):
|
|
||||||
if IsSimpleXServerValid(url):
|
|
||||||
return 'smp'
|
|
||||||
return 'invalid'
|
|
||||||
|
|
||||||
#def IsUrlValid(url:str)->bool:
|
|
||||||
# """
|
|
||||||
# Check if url is valid both dark net end clearnet.
|
|
||||||
# """
|
|
||||||
# pattern = re.compile("^[A-Za-z0-9:/.-]+$")
|
|
||||||
# url = str(url)
|
|
||||||
# if len(url) < 4:
|
|
||||||
# return False
|
|
||||||
# if url.endswith('.onion'):
|
|
||||||
# return IsOnionValid(url)
|
|
||||||
# else:
|
|
||||||
# if not url.__contains__('.'):
|
|
||||||
# return False
|
|
||||||
# if pattern.fullmatch(url) is None:
|
|
||||||
# return False
|
|
||||||
# return True
|
|
||||||
|
|
||||||
|
|
||||||
def IsStatusValid(status: str) -> bool:
|
def IsStatusValid(status: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Checks if status contains only ['YES','NO']. Verbose only if False is returned
|
Checks if status contains only ['YES','NO']. Verbose only if False is returned
|
||||||
"""
|
"""
|
||||||
pattern = ['YES','NO','✔️','❌','']
|
pattern = ['YES','NO','']
|
||||||
status = status.strip()
|
status = status.strip()
|
||||||
if status not in pattern:
|
if status not in pattern:
|
||||||
return False
|
return False
|
||||||
|
@ -230,7 +192,7 @@ def IsScoreValid(score: str) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def IsDescriptionValid(desc:str)->bool:
|
def IsDescriptionValid(desc: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check the categories are only [a-zA-Z0-9.' ] with 256 max chars.
|
Check the categories are only [a-zA-Z0-9.' ] with 256 max chars.
|
||||||
"""
|
"""
|
||||||
|
@ -263,40 +225,6 @@ def IsCategoryValid(categories: list[str]) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def IsSimpleXServerValid(url: str) -> bool:
|
|
||||||
pattern = re.compile('[0-9A-Za-z-_]*')
|
|
||||||
url = url.strip()
|
|
||||||
try:
|
|
||||||
|
|
||||||
if url.startswith(('smp://', 'xftp://')):
|
|
||||||
# Remove the protocol part
|
|
||||||
proless = url.split('//', 1)[-1]
|
|
||||||
# Split the fingerprint and hostname
|
|
||||||
parts = proless.split('@')
|
|
||||||
if len(parts) != 2:
|
|
||||||
return False # Must have exactly one '@' character
|
|
||||||
|
|
||||||
fingerprint = parts[0]
|
|
||||||
hostname = parts[1].split(',')[0] # Get the hostname before any comma
|
|
||||||
|
|
||||||
# Check fingerprint length and pattern
|
|
||||||
if len(fingerprint) == 44 and pattern.match(fingerprint):
|
|
||||||
# Validate the hostname
|
|
||||||
if RecognizeUrlOnionClear(hostname) != 'invalid':
|
|
||||||
# Check for an optional comma and a valid onion domain
|
|
||||||
if ',' in proless:
|
|
||||||
onion_part = proless.split(',')[1].strip()
|
|
||||||
if RecognizeUrlOnionClear(onion_part) != 'invalid':
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
# Any error will be a false
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def IsNameValid(name: str) -> bool:
|
def IsNameValid(name: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long.
|
Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long.
|
||||||
|
@ -325,3 +253,19 @@ def send_server_checks(url: str) -> tuple[str, str, str]:
|
||||||
failed_response = response['resp'].get('testFailure')
|
failed_response = response['resp'].get('testFailure')
|
||||||
|
|
||||||
return (response, resp_type, failed_response)
|
return (response, resp_type, failed_response)
|
||||||
|
|
||||||
|
|
||||||
|
def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False):
|
||||||
|
"""
|
||||||
|
Helper function to print with colors
|
||||||
|
"""
|
||||||
|
if is_error:
|
||||||
|
print(f"{RED}{s}{RESET}")
|
||||||
|
elif bold:
|
||||||
|
print(f"{BOLD_PURPLE}{s}{RESET}")
|
||||||
|
elif is_error and bold:
|
||||||
|
print(f"{BOLD_RED}{s}{RESET}")
|
||||||
|
elif default:
|
||||||
|
print(f'{s}')
|
||||||
|
else:
|
||||||
|
print(f"{PURPLE}{s}{RESET}")
|
Loading…
Add table
Add a link
Reference in a new issue