mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 22:16:41 +00:00
[untested] RecognizeURLType and IsURLValid functions are working
This commit is contained in:
parent
08697f5c40
commit
c9a2fbcfdd
5 changed files with 179 additions and 320 deletions
|
@ -11,8 +11,10 @@ import re
|
|||
import sys
|
||||
sys.path.append("..")
|
||||
|
||||
from utils import print_colors, IsUrlValid
|
||||
from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
|
||||
from utils import (
|
||||
print_colors, IsURLValid, IsSimplexChatroomValid, RecognizeURLType
|
||||
)
|
||||
#from SimpleX.regex_simplexlinks import IsSimpleXChatroomValid, IsSimpleXServerValid
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Make default parameters for arguments
|
||||
|
@ -107,12 +109,12 @@ def add_urls(urls):
|
|||
global output_file
|
||||
for url in urls:
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
if IsSimpleXChatroomValid(url) and not (output_file['URL'] == url).any():
|
||||
if IsSimplexChatroomValid(url) and not (output_file['URL'] == url).any():
|
||||
output_file.loc[-1] = ["", url, "", "SimpleX Chatroom"]
|
||||
output_file.index += 1
|
||||
output_file = output_file.sort_index()
|
||||
continue
|
||||
elif IsSimpleXServerValid(url) and not (output_file['URL'] == url).any():
|
||||
elif RecognizeURLType(url) in ('smp', 'xftp') and not (output_file['URL'] == url).any():
|
||||
output_file.loc[-1] = ["", url, "", "SimpleX Server"]
|
||||
output_file.index += 1
|
||||
output_file = output_file.sort_index()
|
||||
|
@ -164,13 +166,13 @@ def extract_urls_html(url, text):
|
|||
print_colors(f'[D] Joined URL: {joined_url}')
|
||||
|
||||
# Capture SimpleX URLs
|
||||
if IsSimpleXChatroomValid(joined_url) or IsSimpleXServerValid(joined_url):
|
||||
if RecognizeURLType(joined_url) in ('smp', 'xftp', 'chatroom'):
|
||||
if url not in result.thirdp_urls:
|
||||
result.thirdp_urls.append(joined_url)
|
||||
continue
|
||||
|
||||
# Check if the URL is a .onion link or not even a web link
|
||||
if not IsUrlValid(joined_url):
|
||||
if not IsURLValid(joined_url):
|
||||
continue
|
||||
|
||||
print_colors(f'[+] Found url: {joined_url}')
|
||||
|
@ -266,4 +268,3 @@ for i, url in enumerate(vcsv_urls):
|
|||
crawl_url(url)
|
||||
crawler_file.to_csv(args.crawler_file, index=False)
|
||||
output_file.to_csv(args.output, index=False)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue