Refactored option 4 + added conf.py + added some TODO comments for review

This commit is contained in:
doctor_dev 2025-05-30 12:12:09 +00:00
parent 4b33e51d11
commit b07ac08547
No known key found for this signature in database
GPG key ID: F12F7F71CB84AEAA
6 changed files with 429 additions and 281 deletions

2
.gitignore vendored
View file

@ -1,7 +1,7 @@
.git
www/participants/**
crawler/**
scripts/__pycache__/**
__pycache__/
.env
env/
submissions/submission.csv

View file

@ -1,15 +1,17 @@
beautifulsoup4==4.13.3
certifi==2024.12.14
charset-normalizer==3.4.1
certifi==2025.4.26
charset-normalizer==3.4.2
dotenv==0.9.9
idna==3.10
numpy==2.2.2
numpy==2.2.6
pandas==2.2.3
pillow==11.2.1
PySocks==1.7.1
python-dateutil==2.9.0.post0
python-socks==2.6.1
pytz==2024.2
python-dotenv==1.1.0
pytz==2025.2
requests==2.32.3
six==1.17.0
tzdata==2025.1
urllib3==2.3.0
python-dotenv==1.0.1
tzdata==2025.2
urllib3==2.4.0
websockets==15.0.1

22
scripts/conf.py Normal file
View file

@ -0,0 +1,22 @@
ROOT_PATH = '/srv/darknet-lantern/'
STATIC_PATH = ROOT_PATH + 'www/'
TEMPLATE_PATH = ROOT_PATH + 'templates/'
PARTICIPANT_DIR = STATIC_PATH + 'participants/'
OFFICIAL_PARTICIPANTS_FILE = STATIC_PATH + '.official_participants'
WEBRING_CSV_FILE = 'webring-participants.csv'
LOCAL_DIR = '' # Assign on script startup
PROXIES = {
'http': 'socks5h://127.0.0.1:9050',
'https': 'socks5h://127.0.0.1:9050'
}
CSV_FILES = [
'verified.csv',
'unverified.csv',
'blacklist.csv',
'sensitive.csv',
'webring-participants.csv'
]

View file

@ -1,6 +1,9 @@
###TODO: importing * is bad practice should import just utils and use it like in lantern_logic.py
from utils import *
import logic.lantern_logic as lantern
from dotenv import load_dotenv
import os, pwd
import pandas as pd
import requests
@ -532,259 +535,59 @@ Maintenance:
#####################################################
#check if it works when you have a second webring participant
case 4:
print_colors("4) Synchronize new links from existing webring participants, into your unverified.csv file")
participantsdir=rootpath+'www/participants/'
name=''
desc=''
trusted=''
status=''
score=''
webringcsvfile=instancepath+'/'+'webring-participants.csv'
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
for participant in os.listdir(participantsdir):
participantdir=participantsdir+participant
print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
# NOTE check if the webring participant is yourself, if it is, then skip it
if participant != myinstance: # prod: dont use your own intance
#if participant == myinstance: # preprod testing only on your own instance
#overwrite the existing files in the participant's directory, with their version (download all the csv files from them again)
basewurl='http://'+participant+'/participants/'+participant+'/'
print_colors(f"{basewurl}")
print_colors(f"[+] Downloading the files of: {participant} ")
w_vcsv=basewurl+'verified.csv'
w_uvcsv=basewurl+'unverified.csv'
w_blcsv=basewurl+'blacklist.csv'
w_scsv=basewurl+'sensitive.csv'
w_webcsv=basewurl+'webring-participants.csv'
print_colors('[+] Syncing official webrings to local webrings')
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
print_colors("[-] Webring Participant isn't reachable, skipping", is_error=True)
else: #if the webring participant is reachable, proceed
print_colors("[+] Webring Participant is reachable, updating their csv files:")
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
# FOR EACH CSV FILE TO GET:
# URL: basewurl / FILE.CSV
# PATH: participantdir / FILE.CSV
# download the external csv file and save it into the "text" variable:
#response = urllib.request.urlopen(basewurl+i)
response = requests.get(basewurl+i, proxies=proxies)
#data = response.read() # a `bytes` object
#text = data.decode('utf-8')
text = response.text
# save the text variable into the destination file:
csvfilepath=participantdir+'/'+i
with open(csvfilepath, "w") as file:
file.write(text)
f = open(csvfilepath,"r")
webring_df = verify_official_participants_registered()
# download the banner.png image:
current_instance = get_current_instance()
for participant in webring_df.itertuples(index=False, name='columns'):
# Check if the participant is my instance
if current_instance in participant:
continue
bannerurl=basewurl+'banner.png'
bannerpath=participantdir+'/banner.png'
r = requests.get(bannerurl, stream=True, proxies=proxies)
with open(bannerpath, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
if not is_participant_reachable(participant.URL):
print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
continue
print_colors('[+] Downloading participant\'s files to store locally')
lantern.download_participant_data(participant.URL)
# SANITY CHECK ON THE BANNER PNG IMAGE:
if IsBannerValid(bannerpath):
pass
else:
# if false, overwrite it with the template banner png file
os.remove(bannerpath)
# copy templates/banner.png to bannerpath
bannertemplatepath=templatepath+'banner.png'
shutil.copyfile(bannertemplatepath, bannerpath)
print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
print_colors('[+] Reading local verified and unverified')
local_verified_df, local_unverified_df = get_local_verified_and_unverified()
participant_url = generate_local_participant_dir(participant.URL)
# check if the participant is already listed in webring-participants.csv or not, and add them if not already listed
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
filter_wdf = wdf[wdf.URL.str.contains(participant,na=False)]
# check if there are no results, dont proceed if there are none!
if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed
newrow=[name,participant,desc,trusted,status,score]
wdf.loc[-1] = newrow # adding a row
wdf.index = wdf.index + 1 # shifting index
wdf = wdf.sort_index() # sorting by index
wdf.to_csv(webringcsvfile, index=False)
else:
pass
print_colors('[+] Reading webrring participant\'s verified and unverified, and removing unverified and blacklisted rows')
participant_verified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}verified.csv'), local_blacklist)
participant_unverified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}unverified.csv'), local_blacklist)
# iterate through the participant's verified.csv and unverified.csv files
for w in ['verified.csv','unverified.csv']:
csvfilepath=participantdir+'/'+w
print_colors(f"{csvfilepath}")
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
if participant.Trusted == 'YES':
print_colors('[+] This participant is trusted, copying participant\'s verified to local verified')
local_verified_df = merge_verification_df(local_verified_df, participant_verified_df)
else:
print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df)
print_colors('[+] Copying participant\'s unverified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df)
print("[+] Removing the participant's duplicate entries... ")
# REMOVE DUPLICATES !!! do not accept any duplicate from remote participants
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
csvdf = csvdf.drop_duplicates(subset=['Name'], keep="first", inplace=False)
csvdf.to_csv(csvfilepath, index=False)
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
bldf[['blacklisted-words']].iterrows()
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
row=csvdf.loc[i,:].values.tolist()
# check the number of columns in said row,
# print('rowcolnum:',len(row),' colnum:',len(csvdf.columns))
# print_colors(f"{row}")
################################ SANITY CHECKS ####################################
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
csvdf.at[i, 'Status'] = "YES"
csvdf.to_csv(csvfilepath, index=False)
else:
csvdf.at[i, 'Status'] = "NO"
csvdf.to_csv(csvfilepath, index=False)
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
csvdf.at[i, 'Sensitive'] = "YES"
csvdf.to_csv(csvfilepath, index=False)
else:
csvdf.at[i, 'Sensitive'] = "NO"
csvdf.to_csv(csvfilepath, index=False)
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
#mark the row for deletion as it has invalid inputs
if i not in rows2delete:
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
print(row)
rows2delete.append(i) #mark the row for deletion if not already done
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
for k,l in bldf.iterrows():
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
if i not in rows2delete:
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
else:
if i not in rows2delete:
# not a blacklisted link, therefore it is suitable to be added to your own csv files:
################################ CHECKING FOR DUPLICATES! #########################
# for each link in the participant's verified/unverified csv files,
# check if the link is already listed in your own verified.csv or unverified.csv
filterterm=csvdf.at[i, 'URL']
#print('1)',filterterm)
filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
filter_vdf2= vdf[vdf.Name.str.contains(filterterm,na=False)] # do not accept the new link if the name already exists in verified.csv
#print('2)',filter_vdf)
#print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
# TODO DELETE ALL DUPLICATES OF UVDF !
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
uvdf = uvdf.drop_duplicates(subset=['Name'], keep="first", inplace=False)
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
filter_uvdf2= uvdf[uvdf.Name.str.contains(filterterm,na=False)] # do not accept the new link if the name already exists in unverified.csv
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0 and len(filter_uvdf2.index) == 0 and len(filter_vdf2.index) == 0 :
newrow=row
uvdf.loc[-1] = newrow # adding a row
uvdf.index = uvdf.index + 1 # shifting index
uvdf = uvdf.sort_index() # sorting by index
uvdf.to_csv(unverifiedcsvfile, index=False)
print("[+] NEW ROW =",newrow)
print_colors("[+] New row added to your own unverified.csv file!")
else:
pass
#print_colors(f'[-] Skipping row as it is already added in {w} {row}',is_error=True)
###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE###################
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
for k,l in sedf.iterrows():
seword=sedf.at[k, 'sensitive-words']
if any(seword in str(x) for x in row) == True:
if csvdf.at[i, 'Sensitive'] != 'NO':
print_colors(f"Marking row {i} as sensitive, as it matches with a sensitive word")
csvdf.at[i, 'Sensitive']='YES'
#print_colors(f'[-] Rows to delete: {rows2delete}', is_error=True)
# only delete rows after you've gone through all the unverified.csv OR verified.csv rows'
# check for NAME duplicates and mark them for deletion:
# remove name duplicates that are in unverifie.csv yet exist in verified.csv (as verified.csv takes the priority)
if w == 'unverified.csv':
try:
# check if the given row Name already exists in verified.csv
filterterm=csvdf.at[i, 'Name']
filter_vdf= vdf[vdf.Name.str.contains(filterterm,na=False)]
print('[+] CHECKING FOR DUPLIATES: ',filterterm)
if len(filter_vdf.index) != 0:
# drop the unverified.csv row if its name already exists in verified.csv
print('[+] DUPLICATE FOUND, MARKING ROW FOR DELETION: ',row)
rows2delete.append(i) #mark the row for deletion if not already done
except:
pass
for i in rows2delete:
row=csvdf.loc[i,:].values.tolist()
print_colors(f'[+] REMOVING ROW: {i}{row}')
csvdf.drop(i, inplace= True)
csvdf.to_csv(csvfilepath, index=False)
rows2delete= [] # it is an empty list at first
# fill missing description in our unverified.csv that other participants verified.csv have filled
if w == 'verified.csv':
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
# merge participant's verified.csv on our unverified.csv on URL
merged_df = uvdf.merge(csvdf[['URL', 'Description']],
on='URL',
how='left',
suffixes=('', '_participant'))
# filter empty description that has participant's description
no_descr_filter = ((merged_df['Description'].isna()) | (merged_df['Description'].str.strip() == '')) & \
(~merged_df['Description_participant'].isna()) & (merged_df['Description_participant'].str.strip() != '')
no_descr_filter_count = no_descr_filter.sum()
# update our empty description if the participant has any filled description
if no_descr_filter_count > 0:
merged_df.loc[no_descr_filter, 'Description'] = merged_df.loc[no_descr_filter, 'Description_participant']
# keep only original columns
uvdf_updated = merged_df[uvdf.columns]
uvdf_updated.to_csv(unverifiedcsvfile, index=False)
print(f'[+] Updated {no_descr_filter_count} empty description(s) in your unverified.csv found on partipant\'s {w}')
# remove all name duplicates from your own unverified.csv file:
for i,j in uvdf.iterrows():
row=uvdf.loc[i,:].values.tolist()
# check if the given row Name already exists in verified.csv
filterterm=uvdf.at[i, 'Name']
filter_vdf= vdf[vdf.Name.str.contains(filterterm,na=False)]
print('[+] CHECKING FOR DUPLIATES: ',filterterm)
if len(filter_vdf.index) != 0:
# drop the unverified.csv row if its name already exists in verified.csv
print('[+] DUPLICATE FOUND, MARKING ROW FOR DELETION: ',row)
rows2delete.append(i) #mark the row for deletion if not already done
for i in rows2delete:
row=uvdf.loc[i,:].values.tolist()
print_colors(f'[+] REMOVING ROW: {i}{row}')
uvdf.drop(i, inplace= True)
uvdf.to_csv(unverifiedcsvfile, index=False)
rows2delete= [] # it is an empty list at first
print_colors('[+] Saving local verified and unverified')
save_local_verified_and_unverified(local_verified_df, local_unverified_df)
break
case 5:
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
webring_participant_url = ''

View file

@ -0,0 +1,96 @@
import utils
import os
import conf
import requests
def download_participant_data(participant):
"""
Downloads the participants csv files and banner
Parameters:
participant (str): The url of the webring participant.
Returns:
Boolean: True if all files downloaded, False if any of them failed
"""
try:
utils.print_colors(f"[+] Downloading webring {participant} csv files and banner")
local_participant_dir = utils.generate_local_participant_dir(participant)
os.makedirs(local_participant_dir, exist_ok=True)
for file_name in conf.CSV_FILES:
csv_res = requests.get(f'{utils.generate_participant_url(participant)}{file_name}', proxies=conf.PROXIES, timeout=10)
with open(f'{local_participant_dir}{file_name}', "w") as file:
file.write(csv_res.text)
banner_res = requests.get(f'{utils.generate_participant_url(participant)}banner.png', stream=True, proxies=conf.PROXIES, timeout=10)
banner_path = f'{local_participant_dir}banner.png'
with open(banner_path, 'wb') as f:
f.write(banner_res.content)
# SANITY CHECK ON THE BANNER PNG IMAGE:
if not utils.IsBannerValid(banner_path):
# if false, overwrite it with the template banner png file
os.remove(banner_path)
shutil.copyfile(f'{conf.TEMPLATE_PATH}banner.png', banner_path)
utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner")
except Exception:
print_colors("[-] Downloading webring participant's files failed.", is_error=True)
def clean_csv(df, blacklist):
"""
Cleans duplications and blacklisted rows
Parameters:
df (dataframe): The dataframe we want to clean.
blacklist (list): The blacklisted words.
Returns:
Dataframe: Cleaned dataframe.
"""
try:
if not df.empty:
df = utils.remove_duplications(df)
df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)]
if not df.empty:
df = df[df.apply(utils.is_row_valid, axis=1)]
except Exception:
print_colors("[-] cleaning dataframe failed", is_error=True)
return df
def mark_sensitive(df, sensitive_list):
"""
Marks rows as sensitive
Parameters:
df (dataframe): The dataframe we want to mark.
sensitive (list): The sensitive words.
Returns:
Dataframe: Marked dataframe.
"""
try:
if not df.empty:
sensitive_rows = df.apply(lambda row: any(word in str(value) for word in sensitive_list for value in row), axis=1)
df.loc[sensitive_rows, 'Sensitive'] = 'YES'
df.loc[~sensitive_rows, 'Sensitive'] = 'NO'
except Exception:
print_colors("[-] MArking sensitive words failed.", is_error=True)
return df

View file

@ -7,7 +7,8 @@ import json
#from SimpleX.utils import IsUrlValid
import urllib.parse
from websockets.sync.client import connect
import conf
import pandas as pd
PURPLE = '\033[35;40m'
BOLD_PURPLE = '\033[35;40;1m'
@ -16,8 +17,24 @@ BOLD_RED = '\033[31;40;1m'
RESET = '\033[m'
def get_current_instance():
"""
Checks if all URL files are actually reachable via Tor
#### Checking Functions to validate that links are legit ####
Returns:
str: the local instance onion url
"""
#expanduser gives the current user directory
instance_file = os.path.expanduser("~") + '/.darknet_participant_url'
with open(instance_file) as f:
return f.read().rstrip()
#Set the local dir on script run
conf.LOCAL_DIR = conf.PARTICIPANT_DIR + get_current_instance() + '/'
###################### Validations ######################
def CheckUrl(url):
"""
@ -29,7 +46,7 @@ def CheckUrl(url):
}
try:
status = requests.get(url,proxies=proxies, timeout=5).status_code
if status != 502:
if status == 200:
return True
else:
return False
@ -38,6 +55,31 @@ def CheckUrl(url):
except requests.exceptions.ReadTimeout as e:
return False
###TODO: should replace checkUrl
# checks if all the webring participants are reachable
def is_participant_reachable(instance):
"""
Checks if all URL files are actually reachable via Tor
Parameters:
instance (str): The participant onion address
Returns:
Boolean: False if any file is unreachable, True if all are reachable
"""
url = generate_participant_url(instance)
# Checks all files on a webring participant , if all reached returns true
for file_name in conf.CSV_FILES:
try:
status = requests.get(f'{url}{file_name}',proxies=conf.PROXIES, timeout=10).status_code
if status != 200:
return False
except Exception:
return False
return True
#### PROTECTIONS AGAINST MALICIOUS CSV INPUTS ####
def IsBannerValid(path: str) -> bool:
@ -59,7 +101,6 @@ def IsBannerValid(path: str) -> bool:
return False
return True
def IsOnionValid(url: str)-> bool:
"""
Checks if the domain(param) is a valid onion domain and return True else False.
@ -159,25 +200,6 @@ def IsUrlValid(url:str)->bool:
return False
return True
#def IsUrlValid(url:str)->bool:
# """
# Check if url is valid both dark net end clearnet.
# """
# pattern = re.compile("^[A-Za-z0-9:/.-]+$")
# url = str(url)
# if len(url) < 4:
# return False
# if url.endswith('.onion'):
# return IsOnionValid(url)
# else:
# if not url.__contains__('.'):
# return False
# if pattern.fullmatch(url) is None:
# return False
# return True
def IsStatusValid(status: str)-> bool:
"""
Checks if status contains only ['YES','NO']. Verbose only if False is returned
@ -191,7 +213,6 @@ def IsStatusValid(status: str)-> bool:
return True
def IsScoreValid(score:str)->bool:
"""
Check the Score is only "^[0-9.,]+$" with 8 max chars.
@ -207,7 +228,6 @@ def IsScoreValid(score:str)->bool:
return False
return True
def IsDescriptionValid(desc:str)->bool:
"""
Check the categories are only [a-zA-Z0-9.' ] with 256 max chars.
@ -239,8 +259,6 @@ def IsCategoryValid(categories: list)-> bool:
else:
return True
def IsSimpleXServerValid(url: str) -> bool:
pattern = re.compile('[0-9A-Za-z-_]*')
url = url.strip()
@ -274,8 +292,6 @@ def IsSimpleXServerValid(url: str) -> bool:
# Any error will be a false
return False
def IsNameValid(name: str)->bool:
"""
Check the parameter name only contains [a-zA-Z0-9 ] and is 64 chars long.
@ -292,7 +308,6 @@ def IsNameValid(name: str)->bool:
return False
return True
def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool=False):
"""
Helper function to print with colors
@ -308,8 +323,6 @@ def print_colors(s:str=' ', bold:bool=False, is_error:bool = False, default:bool
else:
print(f"{PURPLE}{s}{RESET}")
def IsSimpleXOnionValid(url: str)-> bool:
"""
Checks if the domain(param) is a valid onion domain and return True else False.
@ -383,3 +396,215 @@ def send_server_checks(url:str) -> ():
failed_response = response['resp'].get('testFailure')
return (response, resp_type, failed_response)
def is_row_valid(row):
"""
validates dataframe row to check if all field are valid
Parameters:
row (dict): dataframe row
Returns:
Boolean: True if row is valid, False if row isn't valid
"""
try:
return (
IsUrlValid(row['Instance']) and
IsCategoryValid(row['Category']) and
IsNameValid(row['Name']) and
IsUrlValid(row['URL']) and
IsStatusValid(row['Sensitive']) and
IsDescriptionValid(row['Description']) and
IsStatusValid(row['Status']) and
IsScoreValid(row['Score'])
)
except Exception:
return False
###################### General ######################
def merge_verification_df(receiving_df, merging_df):
"""
merges 2 dataframes of type verified or unverified (do not merge duplications by name or url)
Parameters:
receiving_df (Dataframe): dataframe we want to receive the data
merging_df (Dataframe): dataframe we want to merge into the receiving dataframe
Returns:
Dataframe: the combined dataframe will be returned
"""
try:
filtered_df = merging_df[~((merging_df['URL'].isin(receiving_df['URL'])) | merging_df['Name'].isin(receiving_df['Name']))]
if filtered_df.empty:
return receiving_df
elif receiving_df.empty:
return filtered_df
else:
return pd.concat([receiving_df, filtered_df], ignore_index=True)
except Exception:
return receiving_df
def remove_duplications(df):
"""
remove url and name duplications from the dataframe
Parameters:
df (Dataframe): the dataframe to remove duplications from
Returns:
Dataframe: the dataframe after all duplications were removed
"""
try:
df = df.drop_duplicates(subset='Name')
df = df.drop_duplicates(subset='URL')
except Exception:
pass
return df
###TODO: can later remove the inputs and have a "global" local verified and unverified or a class of the local(lantern host) participant
def save_local_verified_and_unverified(verified_df, unverified_df):
"""
saves the local verified and unverified
Parameters:
verified_df (Dataframe): local verified rows dataframe
unverified_df (Dataframe): local unverified rows dataframe
Returns:
Dataframe: the combined dataframe will be returned
"""
try:
current_instance = get_current_instance() + '/'
verified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv', index=False)
unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False)
return True
except Exception:
print_colors('[-] Saving verified and unverified failed',is_error=True )
return False
###################### Getters/Generators ######################
def generate_participant_url(participant):
"""
generates url of the webring participant
Parameters:
participant(str): participant's onion address/instance
Returns:
str: the url of the webring participant
"""
return f'http://{participant}/participants/{participant}/'
def generate_local_participant_dir(participant):
"""
generates local files path of the webring participant
Parameters:
participant(str): participant's onion address/instance
Returns:
str: the local path of the webring participant's files
"""
return f'{conf.PARTICIPANT_DIR}{participant}/'
def get_official_participants():
"""
reads all the official webring participants
Returns:
list: list of all the official webring participants
"""
try:
current_instance = get_current_instance()
with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file:
return [line.strip() for line in file if current_instance not in line]
except Exception:
print_colors('[-] Couldn\'t read official webring participants file',is_error=True )
def get_local_blacklist_and_sensitive():
"""
reads the local blacklisted words and the local sensitive words
Returns:
blacklist(list): list of all the words that are blacklisted
sensitive_list(list): list of all the words that are sensitive
"""
try:
current_instance = get_current_instance() + '/'
blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv')
blacklist = blacklist_df.iloc[:, 0].tolist()
sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv')
sensitive_list = sensitive_df.iloc[:, 0].tolist()
return blacklist, sensitive_list
except Exception:
print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True )
return [], []
def get_local_verified_and_unverified():
"""
reads the local verified csv and the local unverified csv
Returns:
verified_df(Dataframe): verified.csv as dataframe
unverified_df(Dataframe): unverified.csv as dataframe
"""
try:
current_instance = get_current_instance() + '/'
verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv')
unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv')
return verified_df, unverified_df
except Exception:
print_colors('[-] Failed reading the verified and unverified files',is_error=True )
return pd.DataFrame(), pd.DataFrame()
def get_local_webring_participants():
"""
make sure the official participants are registered in the webring csv file
Returns:
Dataframe: the verified local webring participants dataframe
"""
try:
webring_df = pd.read_csv(conf.LOCAL_DIR + conf.WEBRING_CSV_FILE)
# finds any missing official webrings in the local webring file
missing_participants = set(get_official_participants()) - set(webring_df['URL'])
for participant in missing_participants:
new_row = [{'Name': '','URL': participant,'Description': '','Trusted': 'NO','Status': '','Score': ''}]
webring_df = pd.concat([webring_df, pd.DataFrame(new_row)], ignore_index=True)
webring_df.to_csv(conf.LOCAL_DIR + conf.WEBRING_CSV_FILE, index=False)
return webring_df
except Exception:
print_colors(f'[-] failed reading webring participants file',is_error=True )
return pd.DataFrame()