make special case for simplex links

This commit is contained in:
cynthia 2025-04-15 00:34:57 +01:00
parent ca4d949175
commit bf40730292

View file

@ -6,8 +6,13 @@ import argparse
import os import os
import pwd import pwd
import re import re
from utils import print_colors, IsUrlValid
# To have the ability to load the SimpleX module
import sys
sys.path.append("..")
from utils import print_colors, IsUrlValid
from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
from dotenv import load_dotenv from dotenv import load_dotenv
# Make default parameters for arguments # Make default parameters for arguments
@ -99,7 +104,17 @@ def add_urls(urls):
global output_file global output_file
for url in urls: for url in urls:
parsed_url = urllib.parse.urlparse(url) parsed_url = urllib.parse.urlparse(url)
if (output_file['Hostname'] == parsed_url.hostname).any() or parsed_url.hostname is None or parsed_url.hostname in vcsv_hostnames or not parsed_url.hostname.endswith(".onion"): if IsSimpleXChatroomValid(url) and not (output_file['URL'] == url).any():
output_file.loc[-1] = ["", url, "SimpleX Chatroom"]
output_file.index += 1
output_file = output_file.sort_index()
continue
elif IsSimpleXServerValid(url) and not (output_file['URL'] == url).any():
output_file.loc[-1] = ["", url, "SimpleX Server"]
output_file.index += 1
output_file = output_file.sort_index()
continue
elif (output_file['Hostname'] == parsed_url.hostname).any() or parsed_url.hostname is None or parsed_url.hostname in vcsv_hostnames or not parsed_url.hostname.endswith(".onion"):
continue continue
# Get information about the URL # Get information about the URL
@ -144,6 +159,13 @@ def extract_urls_html(url, text):
jurl_parsed = urllib.parse.urlparse(joined_url) jurl_parsed = urllib.parse.urlparse(joined_url)
print_colors(f'[D] Joined URL: {joined_url}') print_colors(f'[D] Joined URL: {joined_url}')
# Capture SimpleX URLs
if IsSimpleXChatroomValid(joined_url) or IsSimpleXServerValid(joined_url):
if url not in result.thirdp_urls:
result.thirdp_urls.append(joined_url)
continue
# Check if the URL is a .onion link or not even a web link # Check if the URL is a .onion link or not even a web link
if not IsUrlValid(joined_url): if not IsUrlValid(joined_url):
continue continue