mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 22:06:40 +00:00
Merge pull request 'main' (#4) from nihilist/darknet-lantern:main into main
Reviewed-on: http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/doctor_dev/darknet-lantern/pulls/4
This commit is contained in:
commit
86dbbfe50a
7 changed files with 600 additions and 358 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -1,10 +1,11 @@
|
||||||
.git
|
.git
|
||||||
www/participants/**
|
www/participants/**
|
||||||
crawler/**
|
crawler/**
|
||||||
scripts/__pycache__/**
|
__pycache__/
|
||||||
scripts/_*.py
|
|
||||||
.env
|
.env
|
||||||
env/
|
env/
|
||||||
submissions/submission.csv
|
submissions/submission.csv
|
||||||
venv/
|
venv/
|
||||||
|
local_testing/*
|
||||||
|
!your_folder/.gitkeep
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
beautifulsoup4==4.13.3
|
beautifulsoup4==4.13.3
|
||||||
certifi==2025.4.26
|
certifi==2025.4.26
|
||||||
charset-normalizer==3.4.1
|
charset-normalizer==3.4.2
|
||||||
dotenv==0.9.9
|
|
||||||
idna==3.10
|
idna==3.10
|
||||||
numpy==2.2.6
|
numpy==2.2.6
|
||||||
pandas==2.2.3
|
pandas==2.2.3
|
||||||
|
@ -15,4 +14,4 @@ requests==2.32.3
|
||||||
six==1.17.0
|
six==1.17.0
|
||||||
tzdata==2025.2
|
tzdata==2025.2
|
||||||
urllib3==2.4.0
|
urllib3==2.4.0
|
||||||
websockets==15.0.1
|
websockets==15.0.1
|
||||||
|
|
56
scripts/conf.py
Normal file
56
scripts/conf.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
ROOT_PATH = '/srv/darknet-lantern/'
|
||||||
|
STATIC_PATH = ROOT_PATH + 'www/'
|
||||||
|
TEMPLATE_PATH = ROOT_PATH + 'templates/'
|
||||||
|
|
||||||
|
PARTICIPANT_DIR = STATIC_PATH + 'participants/'
|
||||||
|
OFFICIAL_PARTICIPANTS_FILE = STATIC_PATH + '.official_participants'
|
||||||
|
WEBRING_CSV_FILE = 'webring-participants.csv'
|
||||||
|
|
||||||
|
LOCAL_DIR = '' # Assign on script startup
|
||||||
|
|
||||||
|
PROXIES = {
|
||||||
|
'http': 'socks5h://127.0.0.1:9050',
|
||||||
|
'https': 'socks5h://127.0.0.1:9050'
|
||||||
|
}
|
||||||
|
|
||||||
|
CSV_FILES = [
|
||||||
|
'verified.csv',
|
||||||
|
'unverified.csv',
|
||||||
|
'blacklist.csv',
|
||||||
|
'sensitive.csv',
|
||||||
|
'webring-participants.csv'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
############ REGEX ############
|
||||||
|
|
||||||
|
# name should contain only up to 64 alphanumeric characters
|
||||||
|
VALID_NAME_PATTERN = re.compile(r"^[A-Za-z0-9]{1,64}$")
|
||||||
|
|
||||||
|
# pattern for regular urls (https://stackoverflow.com/a/3809435)
|
||||||
|
CLEARNET_URL_PATTERN = re.compile(
|
||||||
|
r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]"
|
||||||
|
r"{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# pattern for onion urls (56 bytes of base32 alphabet + .onion)
|
||||||
|
# it works also without http(s)://, so just the hostname will also go through
|
||||||
|
ONION_URL_PATTERN = re.compile(
|
||||||
|
r"^(https?:\/\/)?([a-zA-Z0-9-]+\.)*[a-z2-7-]{56}\.onion[^\s]*$"
|
||||||
|
)
|
||||||
|
|
||||||
|
# pattern for simplex chatroom links
|
||||||
|
SIMPLEX_CHATROOM_PATTERN = re.compile(
|
||||||
|
r"(?:https?:\/\/(?:simplex\.chat|[^\/]+)|simplex:)\/(?:contact|invitation)#\/\?v=[\d-]+"
|
||||||
|
r"&smp=[^&]+(?:&[^=]+=[^&]*)*(?:&data=\{[^}]*\})?"
|
||||||
|
)
|
||||||
|
|
||||||
|
# pattern for smp or xftp simplex server ((smp|xftp):// 44 byte key @ url [:port])
|
||||||
|
SIMPLEX_SERVER_PATTERN = re.compile(
|
||||||
|
r"^(smp|xftp):\/\/([a-zA-Z0-9\-_+=]{44})@([a-z2-7]{56}\.onion|"
|
||||||
|
r"([a-zA-Z0-9\-\.]+\.[a-zA-Z0-9\-\.]+))"
|
||||||
|
r"{1,}(?::[1-9][0-9]{0,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|"
|
||||||
|
r"65[0-4][0-9]{2}|655[0-3][0-9]|6553[0-5])?$"
|
||||||
|
)
|
|
@ -1,6 +1,8 @@
|
||||||
from utils import *
|
from utils import *
|
||||||
|
import logic.lantern_logic as lantern
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
import os, pwd
|
import os, pwd
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
|
@ -49,7 +51,6 @@ def main():
|
||||||
print("Official participant ",line.strip() , "'s directory doesnt exist, creating it")
|
print("Official participant ",line.strip() , "'s directory doesnt exist, creating it")
|
||||||
os.makedirs(participantdir)
|
os.makedirs(participantdir)
|
||||||
|
|
||||||
|
|
||||||
print_colors("""
|
print_colors("""
|
||||||
;
|
;
|
||||||
ED.
|
ED.
|
||||||
|
@ -529,260 +530,67 @@ Maintenance:
|
||||||
#####################################################
|
#####################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#check if it works when you have a second webring participant
|
|
||||||
case 4:
|
case 4:
|
||||||
print_colors("4) Synchronize new links from existing webring participants, into your unverified.csv file")
|
print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
|
||||||
participantsdir=rootpath+'www/participants/'
|
|
||||||
name=''
|
|
||||||
desc=''
|
|
||||||
trusted=''
|
|
||||||
status=''
|
|
||||||
score=''
|
|
||||||
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
|
||||||
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
|
||||||
for participant in os.listdir(participantsdir):
|
|
||||||
participantdir=participantsdir+participant
|
|
||||||
|
|
||||||
# NOTE check if the webring participant is yourself, if it is, then skip it
|
try:
|
||||||
if participant != myinstance: # prod: dont use your own intance
|
|
||||||
#if participant == myinstance: # preprod testing only on your own instance
|
|
||||||
#overwrite the existing files in the participant's directory, with their version (download all the csv files from them again)
|
|
||||||
basewurl='http://'+participant+'/participants/'+participant+'/'
|
|
||||||
print_colors(f"{basewurl}")
|
|
||||||
print_colors(f"[+] Downloading the files of: {participant} ")
|
|
||||||
w_vcsv=basewurl+'verified.csv'
|
|
||||||
w_uvcsv=basewurl+'unverified.csv'
|
|
||||||
w_blcsv=basewurl+'blacklist.csv'
|
|
||||||
w_scsv=basewurl+'sensitive.csv'
|
|
||||||
w_webcsv=basewurl+'webring-participants.csv'
|
|
||||||
|
|
||||||
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
|
print_colors('[+] Syncing official webrings to local webrings')
|
||||||
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
|
|
||||||
print_colors("[-] Webring Participant isn't reachable, skipping", is_error=True)
|
webring_df = get_local_webring_participants()
|
||||||
else: #if the webring participant is reachable, proceed
|
|
||||||
print_colors("[+] Webring Participant is reachable, updating their csv files:")
|
|
||||||
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
|
|
||||||
# FOR EACH CSV FILE TO GET:
|
|
||||||
# URL: basewurl / FILE.CSV
|
|
||||||
# PATH: participantdir / FILE.CSV
|
|
||||||
# download the external csv file and save it into the "text" variable:
|
|
||||||
#response = urllib.request.urlopen(basewurl+i)
|
|
||||||
response = requests.get(basewurl+i, proxies=proxies)
|
|
||||||
#data = response.read() # a `bytes` object
|
|
||||||
#text = data.decode('utf-8')
|
|
||||||
text = response.text
|
|
||||||
# save the text variable into the destination file:
|
|
||||||
csvfilepath=participantdir+'/'+i
|
|
||||||
with open(csvfilepath, "w") as file:
|
|
||||||
file.write(text)
|
|
||||||
f = open(csvfilepath,"r")
|
|
||||||
|
|
||||||
# download the banner.png image:
|
current_instance = get_current_instance()
|
||||||
|
|
||||||
|
for participant in webring_df.itertuples(index=False, name='columns'):
|
||||||
|
# Check if the participant is my instance
|
||||||
|
if current_instance in participant:
|
||||||
|
continue
|
||||||
|
|
||||||
bannerurl=basewurl+'banner.png'
|
if not is_participant_reachable(participant.URL):
|
||||||
bannerpath=participantdir+'/banner.png'
|
print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
|
||||||
r = requests.get(bannerurl, stream=True, proxies=proxies)
|
continue
|
||||||
with open(bannerpath, 'wb') as f:
|
|
||||||
r.raw.decode_content = True
|
print_colors('[+] Downloading participant\'s files to store locally')
|
||||||
shutil.copyfileobj(r.raw, f)
|
lantern.download_participant_data(participant.URL)
|
||||||
|
|
||||||
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
print_colors('[+] Reading local blacklist and sensitive words')
|
||||||
if IsBannerValid(bannerpath):
|
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# if false, overwrite it with the template banner png file
|
|
||||||
os.remove(bannerpath)
|
|
||||||
# copy templates/banner.png to bannerpath
|
|
||||||
bannertemplatepath=templatepath+'banner.png'
|
|
||||||
shutil.copyfile(bannertemplatepath, bannerpath)
|
|
||||||
|
|
||||||
|
print_colors('[+] Reading local verified and unverified')
|
||||||
|
local_verified_df, local_unverified_df = get_local_verified_and_unverified()
|
||||||
|
|
||||||
|
participant_url = generate_local_participant_dir(participant.URL)
|
||||||
|
|
||||||
# check if the participant is already listed in webring-participants.csv or not, and add them if not already listed
|
print_colors('[+] Reading webrring participant\'s verified and unverified')
|
||||||
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_url)
|
||||||
filter_wdf = wdf[wdf.URL.str.contains(participant,na=False)]
|
|
||||||
# check if there are no results, dont proceed if there are none!
|
|
||||||
if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed
|
|
||||||
newrow=[name,participant,desc,trusted,status,score]
|
|
||||||
wdf.loc[-1] = newrow # adding a row
|
|
||||||
wdf.index = wdf.index + 1 # shifting index
|
|
||||||
wdf = wdf.sort_index() # sorting by index
|
|
||||||
wdf.to_csv(webringcsvfile, index=False)
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# iterate through the participant's verified.csv and unverified.csv files
|
print_colors('[+] Removing unvalidated and blacklisted rows')
|
||||||
for w in ['verified.csv','unverified.csv']:
|
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist)
|
||||||
csvfilepath=participantdir+'/'+w
|
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist)
|
||||||
print_colors(f"{csvfilepath}")
|
|
||||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
||||||
|
|
||||||
print("[+] Removing the participant's duplicate entries... ")
|
print_colors('[+] Marking sensitive rows')
|
||||||
# REMOVE DUPLICATES !!! do not accept any duplicate from remote participants
|
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
|
||||||
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
|
||||||
csvdf = csvdf.drop_duplicates(subset=['Name'], keep="first", inplace=False)
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
if participant.Trusted == 'YES':
|
||||||
|
print_colors('[+] This participant is trusted, copying participant\'s verified to local verified')
|
||||||
|
local_verified_df = merge_verification_df(local_verified_df, participant_verified_df)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
|
||||||
|
local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df)
|
||||||
|
|
||||||
|
print_colors('[+] Copying participant\'s unverified to local unverified')
|
||||||
|
local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df)
|
||||||
|
|
||||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
print_colors('[+] Saving local verified and unverified')
|
||||||
|
save_local_verified_and_unverified(local_verified_df, local_unverified_df)
|
||||||
|
|
||||||
bldf[['blacklisted-words']].iterrows()
|
except Exception as err:
|
||||||
rows2delete= [] # it is an empty list at first
|
print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
|
||||||
for i,j in csvdf.iterrows():
|
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
|
||||||
# check the number of columns in said row,
|
|
||||||
# print('rowcolnum:',len(row),' colnum:',len(csvdf.columns))
|
|
||||||
# print_colors(f"{row}")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
################################ SANITY CHECKS ####################################
|
|
||||||
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
|
|
||||||
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
|
|
||||||
csvdf.at[i, 'Status'] = "YES"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
else:
|
|
||||||
csvdf.at[i, 'Status'] = "NO"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
|
|
||||||
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
|
|
||||||
csvdf.at[i, 'Sensitive'] = "YES"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
else:
|
|
||||||
csvdf.at[i, 'Sensitive'] = "NO"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
|
|
||||||
print('sync:::', csvdf.at[i, 'Instance'])
|
|
||||||
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
||||||
if IsURLValid(str(csvdf.at[i, 'Instance'])) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
||||||
#mark the row for deletion as it has invalid inputs
|
|
||||||
if i not in rows2delete:
|
|
||||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
|
||||||
print(row)
|
|
||||||
rows2delete.append(i) #mark the row for deletion if not already done
|
|
||||||
|
|
||||||
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
||||||
for k,l in bldf.iterrows():
|
|
||||||
blword=bldf.at[k, 'blacklisted-words']
|
|
||||||
if any(blword in str(x) for x in row) == True:
|
|
||||||
if i not in rows2delete:
|
|
||||||
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
|
|
||||||
rows2delete.append(i) #mark the row for deletion if not already done
|
|
||||||
else:
|
|
||||||
if i not in rows2delete:
|
|
||||||
# not a blacklisted link, therefore it is suitable to be added to your own csv files:
|
|
||||||
################################ CHECKING FOR DUPLICATES! #########################
|
|
||||||
# for each link in the participant's verified/unverified csv files,
|
|
||||||
# check if the link is already listed in your own verified.csv or unverified.csv
|
|
||||||
filterterm=csvdf.at[i, 'URL']
|
|
||||||
#print('1)',filterterm)
|
|
||||||
filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
|
|
||||||
filter_vdf2= vdf[vdf.Name.str.contains(filterterm,na=False)] # do not accept the new link if the name already exists in verified.csv
|
|
||||||
#print('2)',filter_vdf)
|
|
||||||
#print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
|
|
||||||
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
|
||||||
# TODO DELETE ALL DUPLICATES OF UVDF !
|
|
||||||
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
|
||||||
uvdf = uvdf.drop_duplicates(subset=['Name'], keep="first", inplace=False)
|
|
||||||
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
|
|
||||||
filter_uvdf2= uvdf[uvdf.Name.str.contains(filterterm,na=False)] # do not accept the new link if the name already exists in unverified.csv
|
|
||||||
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0 and len(filter_uvdf2.index) == 0 and len(filter_vdf2.index) == 0 :
|
|
||||||
newrow=row
|
|
||||||
uvdf.loc[-1] = newrow # adding a row
|
|
||||||
uvdf.index = uvdf.index + 1 # shifting index
|
|
||||||
uvdf = uvdf.sort_index() # sorting by index
|
|
||||||
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
||||||
|
|
||||||
print("[+] NEW ROW =",newrow)
|
|
||||||
print_colors("[+] New row added to your own unverified.csv file!")
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
#print_colors(f'[-] Skipping row as it is already added in {w} {row}',is_error=True)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE###################
|
|
||||||
|
|
||||||
|
|
||||||
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
|
|
||||||
for k,l in sedf.iterrows():
|
|
||||||
seword=sedf.at[k, 'sensitive-words']
|
|
||||||
if any(seword in str(x) for x in row) == True:
|
|
||||||
if csvdf.at[i, 'Sensitive'] != 'NO':
|
|
||||||
print_colors(f"Marking row {i} as sensitive, as it matches with a sensitive word")
|
|
||||||
csvdf.at[i, 'Sensitive']='YES'
|
|
||||||
|
|
||||||
#print_colors(f'[-] Rows to delete: {rows2delete}', is_error=True)
|
|
||||||
# only delete rows after you've gone through all the unverified.csv OR verified.csv rows'
|
|
||||||
# check for NAME duplicates and mark them for deletion:
|
|
||||||
# remove name duplicates that are in unverifie.csv yet exist in verified.csv (as verified.csv takes the priority)
|
|
||||||
if w == 'unverified.csv':
|
|
||||||
try:
|
|
||||||
# check if the given row Name already exists in verified.csv
|
|
||||||
filterterm=csvdf.at[i, 'Name']
|
|
||||||
filter_vdf= vdf[vdf.Name.str.contains(filterterm,na=False)]
|
|
||||||
print('[+] CHECKING FOR DUPLIATES: ',filterterm)
|
|
||||||
if len(filter_vdf.index) != 0:
|
|
||||||
# drop the unverified.csv row if its name already exists in verified.csv
|
|
||||||
print('[+] DUPLICATE FOUND, MARKING ROW FOR DELETION: ',row)
|
|
||||||
rows2delete.append(i) #mark the row for deletion if not already done
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
for i in rows2delete:
|
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
|
||||||
print_colors(f'[+] REMOVING ROW: {i}{row}')
|
|
||||||
csvdf.drop(i, inplace= True)
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
rows2delete= [] # it is an empty list at first
|
|
||||||
|
|
||||||
# fill missing description in our unverified.csv that other participants verified.csv have filled
|
|
||||||
if w == 'verified.csv':
|
|
||||||
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
|
||||||
# merge participant's verified.csv on our unverified.csv on URL
|
|
||||||
merged_df = uvdf.merge(csvdf[['URL', 'Description']],
|
|
||||||
on='URL',
|
|
||||||
how='left',
|
|
||||||
suffixes=('', '_participant'))
|
|
||||||
# filter empty description that has participant's description
|
|
||||||
no_descr_filter = ((merged_df['Description'].isna()) | (merged_df['Description'].str.strip() == '')) & \
|
|
||||||
(~merged_df['Description_participant'].isna()) & (merged_df['Description_participant'].str.strip() != '')
|
|
||||||
no_descr_filter_count = no_descr_filter.sum()
|
|
||||||
# update our empty description if the participant has any filled description
|
|
||||||
if no_descr_filter_count > 0:
|
|
||||||
merged_df.loc[no_descr_filter, 'Description'] = merged_df.loc[no_descr_filter, 'Description_participant']
|
|
||||||
# keep only original columns
|
|
||||||
uvdf_updated = merged_df[uvdf.columns]
|
|
||||||
uvdf_updated.to_csv(unverifiedcsvfile, index=False)
|
|
||||||
print(f'[+] Updated {no_descr_filter_count} empty description(s) in your unverified.csv found on partipant\'s {w}')
|
|
||||||
# remove all name duplicates from your own unverified.csv file:
|
|
||||||
for i,j in uvdf.iterrows():
|
|
||||||
row=uvdf.loc[i,:].values.tolist()
|
|
||||||
# check if the given row Name already exists in verified.csv
|
|
||||||
filterterm=uvdf.at[i, 'Name']
|
|
||||||
filter_vdf= vdf[vdf.Name.str.contains(filterterm,na=False)]
|
|
||||||
print('[+] CHECKING FOR DUPLIATES: ',filterterm)
|
|
||||||
if len(filter_vdf.index) != 0:
|
|
||||||
# drop the unverified.csv row if its name already exists in verified.csv
|
|
||||||
print('[+] DUPLICATE FOUND, MARKING ROW FOR DELETION: ',row)
|
|
||||||
rows2delete.append(i) #mark the row for deletion if not already done
|
|
||||||
for i in rows2delete:
|
|
||||||
row=uvdf.loc[i,:].values.tolist()
|
|
||||||
print_colors(f'[+] REMOVING ROW: {i}{row}')
|
|
||||||
uvdf.drop(i, inplace= True)
|
|
||||||
uvdf.to_csv(unverifiedcsvfile, index=False)
|
|
||||||
rows2delete= [] # it is an empty list at first
|
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
case 5:
|
case 5:
|
||||||
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
||||||
webring_participant_url = ''
|
webring_participant_url = ''
|
||||||
|
@ -1197,93 +1005,50 @@ Maintenance:
|
||||||
|
|
||||||
case 9:
|
case 9:
|
||||||
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
|
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
|
||||||
for w in ['verified.csv', 'unverified.csv']:
|
|
||||||
csvfilepath = os.path.join(instancepath, w)
|
try:
|
||||||
print_colors(f"Processing file: {csvfilepath}")
|
|
||||||
try:
|
print_colors('[+] Reading local verified and unverified')
|
||||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
verified_df, unverified_df = get_local_verified_and_unverified()
|
||||||
print_colors(f"Removing duplicates in {csvfilepath}")
|
|
||||||
#print_colors(f"{csvdf[['URL']]}")
|
print_colors('[+] Removing cross dataframe replications')
|
||||||
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
|
||||||
#print_colors(f"{csvdf[['URL']]}")
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
print_colors('[+] Saving local verified and unverified')
|
||||||
print_colors(f"Cleaned data:\n{csvdf[['URL']]}")
|
save_local_verified_and_unverified(verified_df, unverified_df)
|
||||||
except FileNotFoundError:
|
|
||||||
print_colors(f"File not found: {csvfilepath}")
|
except Exception as err:
|
||||||
except Exception as e:
|
print_colors("[-] Option 9 failed suddently, please try again", is_error=True)
|
||||||
print_colors(f"An error occurred while processing {csvfilepath}: {e}")
|
|
||||||
break
|
|
||||||
break
|
break
|
||||||
|
|
||||||
case 10:
|
case 10:
|
||||||
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
|
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
|
||||||
participantspath = rootpath+'www/participants/'
|
|
||||||
for participant in os.listdir(participantspath):
|
|
||||||
print_colors(f"Participant: {participant}")
|
|
||||||
#read=input("Continue?")
|
|
||||||
participantdir= participantspath+participant
|
|
||||||
################ BEGIN SANITY CHECKS FOR EACH PARTICIPANTS ##############
|
|
||||||
# iterate through the participant's verified.csv and unverified.csv files
|
|
||||||
for w in ['verified.csv','unverified.csv']:
|
|
||||||
csvfilepath=participantdir+'/'+w
|
|
||||||
print_colors(f"{csvfilepath}")
|
|
||||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
||||||
rows2delete= [] # it is an empty list at first
|
|
||||||
for i,j in csvdf.iterrows():
|
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
|
||||||
#print_colors(f"{row}")
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
print_colors('[+] Reading local blacklist and sensitive words')
|
||||||
|
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
|
||||||
|
|
||||||
|
for participant in os.listdir(conf.PARTICIPANT_DIR):
|
||||||
|
participant_local_dir = conf.PARTICIPANT_DIR + participant + '/'
|
||||||
|
|
||||||
################################ SANITY CHECKS ####################################
|
print_colors('[+] Reading webrring participant\'s verified and unverified')
|
||||||
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
|
participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_local_dir)
|
||||||
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
|
|
||||||
csvdf.at[i, 'Status'] = "YES"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
else:
|
|
||||||
csvdf.at[i, 'Status'] = "NO"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
|
|
||||||
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
|
print_colors('[+] Removing unverified and blacklisted rows')
|
||||||
csvdf.at[i, 'Sensitive'] = "YES"
|
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist)
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist)
|
||||||
else:
|
|
||||||
csvdf.at[i, 'Sensitive'] = "NO"
|
print_colors('[+] Marking sensitive rows')
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
|
||||||
|
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
|
||||||
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
||||||
if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
||||||
if i not in rows2delete:
|
|
||||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
|
||||||
#print_colors(f"{row}")
|
|
||||||
print(IsURLValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsURLValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']), IsScoreValid(csvdf.at[i, 'Score']))
|
|
||||||
rows2delete.append(i)
|
|
||||||
read=input("Continue?")
|
|
||||||
|
|
||||||
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
print_colors('[+] Saving local participant verified and unverified')
|
||||||
for k,l in bldf.iterrows():
|
save_local_participant_verified_and_unverified(participant_verified_df, participant_unverified_df, participant_local_dir)
|
||||||
blword=bldf.at[k, 'blacklisted-words']
|
|
||||||
if any(blword in str(x) for x in row) == True:
|
except Exception as err:
|
||||||
if i not in rows2delete:
|
print_colors("[-] Option 10 failed suddently, please try again", is_error=True)
|
||||||
print_colors(f"Marking row {i} for deletion, as it matches with the blacklisted word {blword}")
|
|
||||||
rows2delete.append(i)
|
|
||||||
#read=input("Continue?")
|
|
||||||
### SANITY CHECK 3: Mark all rows that match sensitive words to be sensitive = YES
|
|
||||||
for k,l in sedf.iterrows():
|
|
||||||
seword=sedf.at[k, 'sensitive-words']
|
|
||||||
if any(seword in str(x) for x in row) == True:
|
|
||||||
print_colors(f"Marking row {i} as sensitive, as it matches with the sensitive word {seword}")
|
|
||||||
csvdf.at[i, 'Sensitive']="YES"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
#read=input("Continue?")
|
|
||||||
|
|
||||||
|
|
||||||
for i in rows2delete:
|
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
|
||||||
print_colors(f'[+] REMOVING ROW : {i} {row}')
|
|
||||||
csvdf.drop(i, inplace= True)
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
#read=input("Continue?")
|
|
||||||
break
|
break
|
||||||
|
|
||||||
case 11:
|
case 11:
|
||||||
|
|
0
scripts/local_testing/.gitkeep
Normal file
0
scripts/local_testing/.gitkeep
Normal file
96
scripts/logic/lantern_logic.py
Normal file
96
scripts/logic/lantern_logic.py
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
import utils
|
||||||
|
import os
|
||||||
|
import conf
|
||||||
|
import requests
|
||||||
|
|
||||||
|
def download_participant_data(participant):
|
||||||
|
"""
|
||||||
|
Downloads the participants csv files and banner
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
participant (str): The url of the webring participant.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean: True if all files downloaded, False if any of them failed
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
utils.print_colors(f"[+] Downloading webring {participant} csv files and banner")
|
||||||
|
|
||||||
|
local_participant_dir = utils.generate_local_participant_dir(participant)
|
||||||
|
|
||||||
|
os.makedirs(local_participant_dir, exist_ok=True)
|
||||||
|
|
||||||
|
for file_name in conf.CSV_FILES:
|
||||||
|
|
||||||
|
csv_res = requests.get(f'{utils.generate_participant_url(participant)}{file_name}', proxies=conf.PROXIES, timeout=10)
|
||||||
|
|
||||||
|
with open(f'{local_participant_dir}{file_name}', "w") as file:
|
||||||
|
file.write(csv_res.text)
|
||||||
|
|
||||||
|
banner_res = requests.get(f'{utils.generate_participant_url(participant)}banner.png', stream=True, proxies=conf.PROXIES, timeout=10)
|
||||||
|
|
||||||
|
banner_path = f'{local_participant_dir}banner.png'
|
||||||
|
|
||||||
|
with open(banner_path, 'wb') as f:
|
||||||
|
f.write(banner_res.content)
|
||||||
|
|
||||||
|
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
||||||
|
if not utils.IsBannerValid(banner_path):
|
||||||
|
# if false, overwrite it with the template banner png file
|
||||||
|
os.remove(banner_path)
|
||||||
|
shutil.copyfile(f'{conf.TEMPLATE_PATH}banner.png', banner_path)
|
||||||
|
|
||||||
|
utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner")
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors("[-] Downloading webring participant's files failed.", is_error=True)
|
||||||
|
|
||||||
|
def clean_csv(df, blacklist):
|
||||||
|
"""
|
||||||
|
Cleans duplications and blacklisted rows
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (dataframe): The dataframe we want to clean.
|
||||||
|
blacklist (list): The blacklisted words.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dataframe: Cleaned dataframe.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not df.empty:
|
||||||
|
df = utils.remove_duplications(df)
|
||||||
|
|
||||||
|
df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)]
|
||||||
|
|
||||||
|
if not df.empty:
|
||||||
|
df = df[df.apply(utils.is_row_valid, axis=1)]
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors("[-] cleaning dataframe failed", is_error=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def mark_sensitive(df, sensitive_list):
|
||||||
|
"""
|
||||||
|
Marks rows as sensitive
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (dataframe): The dataframe we want to mark.
|
||||||
|
sensitive (list): The sensitive words.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dataframe: Marked dataframe.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not df.empty:
|
||||||
|
sensitive_rows = df.apply(lambda row: any(word in str(value) for word in sensitive_list for value in row), axis=1)
|
||||||
|
|
||||||
|
df.loc[sensitive_rows, 'Sensitive'] = 'YES'
|
||||||
|
df.loc[~sensitive_rows, 'Sensitive'] = 'NO'
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors("[-] MArking sensitive words failed.", is_error=True)
|
||||||
|
|
||||||
|
return df
|
397
scripts/utils.py
397
scripts/utils.py
|
@ -7,6 +7,8 @@ import json
|
||||||
#from SimpleX.utils import IsUrlValid
|
#from SimpleX.utils import IsUrlValid
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from websockets.sync.client import connect
|
from websockets.sync.client import connect
|
||||||
|
import conf
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
PURPLE = '\033[35;40m'
|
PURPLE = '\033[35;40m'
|
||||||
|
@ -15,34 +17,24 @@ RED = '\033[31;40m'
|
||||||
BOLD_RED = '\033[31;40;1m'
|
BOLD_RED = '\033[31;40;1m'
|
||||||
RESET = '\033[m'
|
RESET = '\033[m'
|
||||||
|
|
||||||
# name should contain only up to 64 alphanumeric characters
|
def get_current_instance():
|
||||||
VALID_NAME_PATTERN = re.compile(r"^[A-Za-z0-9]{1,64}$")
|
"""
|
||||||
|
Checks if all URL files are actually reachable via Tor
|
||||||
|
|
||||||
# pattern for regular urls (https://stackoverflow.com/a/3809435)
|
Returns:
|
||||||
CLEARNET_URL_PATTERN = re.compile(
|
str: the local instance onion url
|
||||||
r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]"
|
"""
|
||||||
r"{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
|
|
||||||
)
|
|
||||||
|
|
||||||
# pattern for onion urls (56 bytes of base32 alphabet + .onion)
|
#expanduser gives the current user directory
|
||||||
# it works also without http(s)://, so just the hostname will also go through
|
instance_file = os.path.expanduser("~") + '/.darknet_participant_url'
|
||||||
ONION_URL_PATTERN = re.compile(
|
|
||||||
r"^(https?:\/\/)?([a-zA-Z0-9-]+\.)*[a-z2-7-]{56}\.onion[^\s]*$"
|
|
||||||
)
|
|
||||||
|
|
||||||
# pattern for simplex chatroom links
|
with open(instance_file) as f:
|
||||||
SIMPLEX_CHATROOM_PATTERN = re.compile(
|
return f.read().rstrip()
|
||||||
r"(?:https?:\/\/(?:simplex\.chat|[^\/]+)|simplex:)\/(?:contact|invitation)#\/\?v=[\d-]+"
|
|
||||||
r"&smp=[^&]+(?:&[^=]+=[^&]*)*(?:&data=\{[^}]*\})?"
|
|
||||||
)
|
|
||||||
|
|
||||||
# pattern for smp or xftp simplex server ((smp|xftp):// 44 byte key @ url [:port])
|
#Set the local dir on script run
|
||||||
SIMPLEX_SERVER_PATTERN = re.compile(
|
conf.LOCAL_DIR = conf.PARTICIPANT_DIR + get_current_instance() + '/'
|
||||||
r"^(smp|xftp):\/\/([a-zA-Z0-9\-_+=]{44})@([a-z2-7]{56}\.onion|"
|
|
||||||
r"([a-zA-Z0-9\-\.]+\.[a-zA-Z0-9\-\.]+))"
|
###################### Validations ######################
|
||||||
r"{1,}(?::[1-9][0-9]{0,4}|[1-5][0-9]{4}|6[0-4][0-9]{3}|"
|
|
||||||
r"65[0-4][0-9]{2}|655[0-3][0-9]|6553[0-5])?$"
|
|
||||||
)
|
|
||||||
|
|
||||||
def IsSimplexChatroomValid(url: str) -> bool:
|
def IsSimplexChatroomValid(url: str) -> bool:
|
||||||
"""
|
"""
|
||||||
|
@ -50,13 +42,13 @@ def IsSimplexChatroomValid(url: str) -> bool:
|
||||||
Returns True if URL is a SimpleX chatroom,
|
Returns True if URL is a SimpleX chatroom,
|
||||||
False otherwise
|
False otherwise
|
||||||
"""
|
"""
|
||||||
return bool(SIMPLEX_CHATROOM_PATTERN.match(url))
|
return bool(conf.SIMPLEX_CHATROOM_PATTERN.match(url))
|
||||||
|
|
||||||
def RecognizeSimplexType(url: str) -> str:
|
def RecognizeSimplexType(url: str) -> str:
|
||||||
"""
|
"""
|
||||||
Recognizes Simplex Server URL, returns smp, xftp or invalid
|
Recognizes Simplex Server URL, returns smp, xftp or invalid
|
||||||
"""
|
"""
|
||||||
match = SIMPLEX_SERVER_PATTERN.match(url)
|
match = conf.SIMPLEX_SERVER_PATTERN.match(url)
|
||||||
if match:
|
if match:
|
||||||
return match.group(1)
|
return match.group(1)
|
||||||
else:
|
else:
|
||||||
|
@ -83,14 +75,14 @@ def IsClearnetLinkValid(url: str) -> bool:
|
||||||
Returns True if URL is a valid clearnet URL
|
Returns True if URL is a valid clearnet URL
|
||||||
False otherwise
|
False otherwise
|
||||||
"""
|
"""
|
||||||
return bool(CLEARNET_URL_PATTERN.match(url))
|
return bool(conf.CLEARNET_URL_PATTERN.match(url))
|
||||||
|
|
||||||
def IsOnionLinkValid(url: str) -> bool:
|
def IsOnionLinkValid(url: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Returns True if URL is a valid onion URL
|
Returns True if URL is a valid onion URL
|
||||||
False otherwise
|
False otherwise
|
||||||
"""
|
"""
|
||||||
return bool(ONION_URL_PATTERN.match(url))
|
return bool(conf.ONION_URL_PATTERN.match(url))
|
||||||
|
|
||||||
def RecognizeURLType(url: str) -> str:
|
def RecognizeURLType(url: str) -> str:
|
||||||
"""
|
"""
|
||||||
|
@ -123,8 +115,6 @@ def IsURLValid(url: str) -> bool:
|
||||||
return RecognizeURLType(url) != 'invalid'
|
return RecognizeURLType(url) != 'invalid'
|
||||||
|
|
||||||
|
|
||||||
#### Checking Functions to validate that links are legit ####
|
|
||||||
|
|
||||||
def CheckUrl(url):
|
def CheckUrl(url):
|
||||||
"""
|
"""
|
||||||
Checks if URL is actually reachable via Tor
|
Checks if URL is actually reachable via Tor
|
||||||
|
@ -141,6 +131,31 @@ def CheckUrl(url):
|
||||||
except requests.exceptions.ReadTimeout:
|
except requests.exceptions.ReadTimeout:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
###TODO: should replace checkUrl
|
||||||
|
# checks if all the webring participants are reachable
|
||||||
|
def is_participant_reachable(instance):
|
||||||
|
"""
|
||||||
|
Checks if all URL files are actually reachable via Tor
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
instance (str): The participant onion address
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean: False if any file is unreachable, True if all are reachable
|
||||||
|
"""
|
||||||
|
|
||||||
|
url = generate_participant_url(instance)
|
||||||
|
|
||||||
|
# Checks all files on a webring participant , if all reached returns true
|
||||||
|
for file_name in conf.CSV_FILES:
|
||||||
|
try:
|
||||||
|
status = requests.get(f'{url}{file_name}',proxies=conf.PROXIES, timeout=10).status_code
|
||||||
|
if status != 200:
|
||||||
|
return False
|
||||||
|
except Exception as err:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
#### PROTECTIONS AGAINST MALICIOUS CSV INPUTS ####
|
#### PROTECTIONS AGAINST MALICIOUS CSV INPUTS ####
|
||||||
def IsBannerValid(path: str) -> bool:
|
def IsBannerValid(path: str) -> bool:
|
||||||
|
@ -162,9 +177,6 @@ def IsBannerValid(path: str) -> bool:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def IsStatusValid(status: str) -> bool:
|
def IsStatusValid(status: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Checks if status contains only ['YES','NO']. Verbose only if False is returned
|
Checks if status contains only ['YES','NO']. Verbose only if False is returned
|
||||||
|
@ -176,7 +188,6 @@ def IsStatusValid(status: str) -> bool:
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def IsScoreValid(score: str) -> bool:
|
def IsScoreValid(score: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check the Score is only "^[0-9.,]+$" with 8 max chars.
|
Check the Score is only "^[0-9.,]+$" with 8 max chars.
|
||||||
|
@ -231,7 +242,7 @@ def IsNameValid(name: str) -> bool:
|
||||||
Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long.
|
Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return bool(VALID_NAME_PATTERN.fullmatch(name.strip()))
|
return bool(conf.VALID_NAME_PATTERN.fullmatch(name.strip()))
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -253,7 +264,321 @@ def send_server_checks(url: str) -> tuple[str, str, str]:
|
||||||
resp_type = response["resp"]["type"]
|
resp_type = response["resp"]["type"]
|
||||||
failed_response = response['resp'].get('testFailure')
|
failed_response = response['resp'].get('testFailure')
|
||||||
|
|
||||||
return (response, resp_type, failed_response)
|
return (response, resp_type, failed_response)
|
||||||
|
|
||||||
|
def is_row_valid(row):
|
||||||
|
"""
|
||||||
|
validates dataframe row to check if all field are valid
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
row (dict): dataframe row
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Boolean: True if row is valid, False if row isn't valid
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
IsURLValid(row['Instance']) and
|
||||||
|
IsCategoryValid(row['Category']) and
|
||||||
|
IsNameValid(row['Name']) and
|
||||||
|
IsURLValid(row['URL']) and
|
||||||
|
IsStatusValid(row['Sensitive']) and
|
||||||
|
IsDescriptionValid(row['Description']) and
|
||||||
|
IsStatusValid(row['Status']) and
|
||||||
|
IsScoreValid(row['Score'])
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
return False
|
||||||
|
|
||||||
|
###################### General ######################
|
||||||
|
|
||||||
|
def merge_verification_df(receiving_df, merging_df):
|
||||||
|
"""
|
||||||
|
merges 2 dataframes of type verified or unverified (do not merge duplications by name or url)
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
receiving_df (Dataframe): dataframe we want to receive the data
|
||||||
|
merging_df (Dataframe): dataframe we want to merge into the receiving dataframe
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dataframe: the combined dataframe will be returned
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
filtered_df = merging_df[~((merging_df['URL'].isin(receiving_df['URL'])) | merging_df['Name'].isin(receiving_df['Name']))]
|
||||||
|
|
||||||
|
if filtered_df.empty:
|
||||||
|
return receiving_df
|
||||||
|
|
||||||
|
elif receiving_df.empty:
|
||||||
|
return filtered_df
|
||||||
|
|
||||||
|
else:
|
||||||
|
return pd.concat([receiving_df, filtered_df], ignore_index=True)
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
return receiving_df
|
||||||
|
|
||||||
|
def remove_duplications(df):
|
||||||
|
"""
|
||||||
|
remove url and name duplications from the dataframe
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
df (Dataframe): the dataframe to remove duplications from
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dataframe: the dataframe after all duplications were removed
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
df = df.drop_duplicates(subset='Name')
|
||||||
|
df = df.drop_duplicates(subset='URL')
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Removing duplication failed',is_error=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def remove_cross_dataframe_replications(main_df, sub_df):
|
||||||
|
"""
|
||||||
|
remove replications from sub_df that exist in main_df
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
main_df (Dataframe): the dataframe to keep replications
|
||||||
|
sub_df (Dataframe): the dataframe to remove replications
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dataframe: the main_df with removed duplications
|
||||||
|
Dataframe: the sub_df with removed duplications and removed replications
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
|
||||||
|
main_df = remove_duplications(main_df)
|
||||||
|
sub_df = remove_duplications(sub_df)
|
||||||
|
|
||||||
|
mask = sub_df['URL'].isin(main_df['URL']) | sub_df['Name'].isin(main_df['Name'])
|
||||||
|
|
||||||
|
sub_df = sub_df[~mask]
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Removing cross dataframe duplications failed',is_error=True)
|
||||||
|
|
||||||
|
|
||||||
|
return main_df, sub_df
|
||||||
|
|
||||||
|
###TODO: can later remove the inputs and have a "global" local verified and unverified or a class of the local(lantern host) participant
|
||||||
|
def save_local_verified_and_unverified(verified_df, unverified_df):
|
||||||
|
"""
|
||||||
|
saves the local verified and unverified
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
verified_df (Dataframe): local verified rows dataframe
|
||||||
|
unverified_df (Dataframe): local unverified rows dataframe
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if successful, False if not
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
current_instance = get_current_instance() + '/'
|
||||||
|
|
||||||
|
verified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv', index=False)
|
||||||
|
|
||||||
|
unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False)
|
||||||
|
|
||||||
|
print_colors('[+] Verified and unverified saved successfully')
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Saving verified and unverified failed',is_error=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def save_local_participant_verified_and_unverified(verified_df, unverified_df, participant):
|
||||||
|
"""
|
||||||
|
saves the local verified and unverified of a participant
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
verified_df (Dataframe): local verified rows dataframe
|
||||||
|
unverified_df (Dataframe): local unverified rows dataframe
|
||||||
|
participant (str): participant's onion local path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if successful, False if not
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
|
||||||
|
verified_df.to_csv(f'{participant}verified.csv', index=False)
|
||||||
|
|
||||||
|
unverified_df.to_csv(f'{participant}unverified.csv', index=False)
|
||||||
|
|
||||||
|
print_colors('[+] Verified and unverified saved successfully')
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Saving verified and unverified failed',is_error=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
###################### Getters/Generators ######################
|
||||||
|
def generate_participant_url(participant):
|
||||||
|
"""
|
||||||
|
generates url of the webring participant
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
participant(str): participant's onion address/instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: the url of the webring participant
|
||||||
|
"""
|
||||||
|
|
||||||
|
return f'http://{participant}/participants/{participant}/'
|
||||||
|
|
||||||
|
def generate_local_participant_dir(participant):
|
||||||
|
"""
|
||||||
|
generates local files path of the webring participant
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
participant(str): participant's onion address/instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: the local path of the webring participant's files
|
||||||
|
"""
|
||||||
|
|
||||||
|
return f'{conf.PARTICIPANT_DIR}{participant}/'
|
||||||
|
|
||||||
|
def get_participant_local_verified_and_unverified(participant):
|
||||||
|
"""
|
||||||
|
reads the local verified csv and the local unverified csv of a participant
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
participant (str): participant's local files path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
verified_df(Dataframe): verified.csv as dataframe
|
||||||
|
unverified_df(Dataframe): unverified.csv as dataframe
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
verified_df = pd.read_csv(f'{participant}verified.csv')
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print_colors("[-] File not found: verified.csv", is_error=True)
|
||||||
|
return pd.Dataframe(), pd.Dataframe()
|
||||||
|
|
||||||
|
try:
|
||||||
|
unverified_df = pd.read_csv(f'{participant}unverified.csv')
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print_colors("[-] Participant File not found: unverified.csv", is_error=True)
|
||||||
|
return pd.Dataframe(), pd.Dataframe()
|
||||||
|
|
||||||
|
return verified_df, unverified_df
|
||||||
|
|
||||||
|
|
||||||
|
def get_official_participants():
|
||||||
|
"""
|
||||||
|
reads all the official webring participants
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: list of all the official webring participants
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
current_instance = get_current_instance()
|
||||||
|
|
||||||
|
with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file:
|
||||||
|
return [line.strip() for line in file if current_instance not in line]
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Couldn\'t read official webring participants file',is_error=True )
|
||||||
|
|
||||||
|
def get_local_blacklist_and_sensitive():
|
||||||
|
"""
|
||||||
|
reads the local blacklisted words and the local sensitive words
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
blacklist(list): list of all the words that are blacklisted
|
||||||
|
sensitive_list(list): list of all the words that are sensitive
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
current_instance = get_current_instance() + '/'
|
||||||
|
try:
|
||||||
|
blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv')
|
||||||
|
blacklist = blacklist_df.iloc[:, 0].tolist()
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print_colors("[-] File not found: blacklist.csv", is_error=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv')
|
||||||
|
sensitive_list = sensitive_df.iloc[:, 0].tolist()
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print_colors("[-] File not found: sensitive.csv", is_error=True)
|
||||||
|
|
||||||
|
|
||||||
|
return blacklist, sensitive_list
|
||||||
|
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True)
|
||||||
|
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
def get_local_verified_and_unverified():
|
||||||
|
"""
|
||||||
|
reads the local verified csv and the local unverified csv of the instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
verified_df(Dataframe): verified.csv as dataframe
|
||||||
|
unverified_df(Dataframe): unverified.csv as dataframe
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
current_instance = get_current_instance() + '/'
|
||||||
|
try:
|
||||||
|
verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv')
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print_colors("[-] File not found: verified.csv", is_error=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv')
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print_colors("[-] File not found: unverified.csv", is_error=True)
|
||||||
|
|
||||||
|
return verified_df, unverified_df
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Failed reading the verified and unverified files',is_error=True)
|
||||||
|
|
||||||
|
return pd.DataFrame(), pd.DataFrame()
|
||||||
|
|
||||||
|
def get_local_webring_participants():
|
||||||
|
"""
|
||||||
|
make sure the official participants are registered in the webring csv file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dataframe: the verified local webring participants dataframe
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
webring_df = pd.read_csv(conf.LOCAL_DIR + conf.WEBRING_CSV_FILE)
|
||||||
|
|
||||||
|
# finds any missing official webrings in the local webring file
|
||||||
|
missing_participants = set(get_official_participants()) - set(webring_df['URL'])
|
||||||
|
|
||||||
|
for participant in missing_participants:
|
||||||
|
new_row = [{'Name': '','URL': participant,'Description': '','Trusted': 'NO','Status': '','Score': ''}]
|
||||||
|
webring_df = pd.concat([webring_df, pd.DataFrame(new_row)], ignore_index=True)
|
||||||
|
|
||||||
|
webring_df.to_csv(conf.LOCAL_DIR + conf.WEBRING_CSV_FILE, index=False)
|
||||||
|
|
||||||
|
return webring_df
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors(f'[-] failed reading webring participants file',is_error=True )
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
|
||||||
def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False):
|
def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue