Merge pull request 'issue-80/issue-20_option-6-refactor-and-trusted-webring' (#82) from doctor_dev/darknet-lantern:issue-80/issue-20_option-6-refactor-and-trusted-webring into main

Reviewed-on: http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern/pulls/82
This commit is contained in:
oxeo0 2025-06-07 22:30:54 +02:00
commit 0240c82df0
5 changed files with 663 additions and 324 deletions

View file

@ -1,6 +1,7 @@
from utils import *
import logic.lantern_logic as lantern
from dotenv import load_dotenv
import logic.options as options
import os, pwd
@ -149,6 +150,11 @@ def main():
webpdf = pd.read_csv(webpcsvfile, on_bad_lines='skip')
print_colors(f"[+] file exists, your Webring URL is {instance}")
if 'Blacklisted' not in webpdf.columns:
webpdf['Blacklisted'] = 'NO'
webpdf.to_csv(webpcsvfile)
##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
if len(sys.argv) == 2 and sys.argv[1] == "4":
print("4) Synchronize new links from existing webring participants into your unverified.csv file")
@ -224,6 +230,8 @@ Maintenance:
else:
sensi = 'YES'
#TODO: add blacklisting default to no when refactoring
newrow=[instance,category,name,url,sensi,desc,'YES','100']
print_colors(f"[+] NEWROW= {newrow}")
# (rest is automatic: status, score, instance is = '' because it is your own instance)
@ -529,65 +537,9 @@ Maintenance:
# 6) Trust/UnTrust/Blacklist a webring participant
#####################################################
case 4:
print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
try:
print_colors('[+] Syncing official webrings to local webrings')
webring_df = get_local_webring_participants()
current_instance = get_current_instance()
for participant in webring_df.itertuples(index=False, name='columns'):
# Check if the participant is my instance
if current_instance in participant:
continue
if not is_participant_reachable(participant.URL):
print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
continue
print_colors('[+] Downloading participant\'s files to store locally')
lantern.download_participant_data(participant.URL)
print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
print_colors('[+] Reading local verified and unverified')
local_verified_df, local_unverified_df = get_local_verified_and_unverified()
participant_url = generate_local_participant_dir(participant.URL)
print_colors('[+] Reading webrring participant\'s verified and unverified')
participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_url)
print_colors('[+] Removing unvalidated and blacklisted rows')
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist)
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist)
print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
if participant.Trusted == 'YES':
print_colors('[+] This participant is trusted, copying participant\'s verified to local verified')
local_verified_df = merge_verification_df(local_verified_df, participant_verified_df)
else:
print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df)
print_colors('[+] Copying participant\'s unverified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df)
print_colors('[+] Saving local verified and unverified')
save_local_verified_and_unverified(local_verified_df, local_unverified_df)
except Exception as err:
print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
options.run_option_4()
break
@ -731,124 +683,9 @@ Maintenance:
##############################################
case 6:
while True:
print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
webringcsvfile=instancepath+'/'+'webring-participants.csv'
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
print_colors(f'{wdf[["URL","Trusted"]]}')
try:
index = int(input("What is the index of the webring participant that you want to edit? -1 to exit ").strip())
if index == -1:
break
elif index in wdf.index:
choice = int(input("Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist the webring participant?").strip())
while True:
match choice:
case 1:
# trust the webring participant
choice2=input("You're about to trust another peer, this means that you're going to automatically trust all of the links they have in their verified.csv file! If this is a malicious peer, this action might be potentially risky! Do you want to continue ? (y/n)")
if choice2 == "y":
print_colors(f'[+] Trusting webring participant {wdf.at[index,"URL"]}')
## Warning: In future versions of panda '✔️' will not work. It will show an error.
wdf.at[index,"Trusted"]= 'YES'
wdf.to_csv(webringcsvfile, index=False)
break
else:
print_colors("[-] not trusting webring participant", is_error=True)
break
options.run_option_6()
case 2:
print_colors(f'[+] UnTrusting webring participant {wdf.at[index,"URL"]}')
## Warning: In future versions of panda '' will not work. It will show an error. Maybe change to a 0,1
wdf.at[index,"Trusted"]='NO'
wdf.to_csv(webringcsvfile, index=False)
break
case 3:
print_colors(f'[+] Blacklisting webring participant {wdf.at[index,"URL"]}')
instance2blacklist=wdf.at[index,"URL"]
newrow=[instance2blacklist]
print_colors(f"[+] NEWROW= {newrow}")
# (rest is automatic: status, score, instance is = '' because it is your own instance)
# check if the entry doesn't already exist in verified.csv and in unverified.csv
# if it doesnt exist, add it into unverified.csv
bldf.loc[-1] = newrow # adding a row
bldf.index = bldf.index + 1 # shifting index
bldf = bldf.sort_index() # sorting by index
print_colors("[+] New row added! now writing the csv file:")
bldf.to_csv(blcsvfile, index=False)
# remove all of the entries that came from that participant (drop the lines in your own verified+unverified.csv that have that instance in the instance column)
rows2delete= [] # it is an empty list at first
for i,j in vdf.iterrows():
row=vdf.loc[i,:].values.tolist()
for k,l in bldf.iterrows():
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
if i not in rows2delete:
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
for i in rows2delete:
row=vdf.loc[i,:].values.tolist()
print_colors(f'[+] REMOVING ROW: {i} {row}')
vdf.drop(i, inplace= True)
vdf.to_csv(verifiedcsvfile, index=False)
print_colors(f"{vdf}")
rows2delete= [] # it is an empty list at first
rows2delete= [] # it is an empty list at first
for i,j in uvdf.iterrows():
row=uvdf.loc[i,:].values.tolist()
for k,l in bldf.iterrows():
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
if i not in rows2delete:
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
for i in rows2delete:
row=uvdf.loc[i,:].values.tolist()
print_colors(f'[+] REMOVING ROW: {i} {row}')
uvdf.drop(i, inplace= True)
uvdf.to_csv(unverifiedcsvfile, index=False)
print_colors(f"{uvdf}")
rows2delete= [] # it is an empty list at first
# find all rows that match with the instance name in wdf aswell to remove them
for i,j in wdf.iterrows():
row=wdf.loc[i,:].values.tolist()
for k,l in bldf.iterrows():
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
if i not in rows2delete:
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
rows2delete.append(i) #mark the row for deletion if not already done
for i in rows2delete:
row=wdf.loc[i,:].values.tolist()
print_colors(f'[+] REMOVING ROW: {i} {row}')
wdf.drop(i, inplace= True)
wdf.to_csv(webringcsvfile, index=False)
print_colors(f"{wdf}")
rows2delete= [] # it is an empty list at first
# remove the entire directory in www/participants/INSTANCENAME aswell to get rid of it
instance2blacklistpath=rootpath+'www/participants/'+instance2blacklist
print_colors(f"[+] removing the participant's directory at {instance2blacklistpath}")
shutil.rmtree(instance2blacklistpath)
case _:
break
except Exception:
break
break
@ -1004,50 +841,14 @@ Maintenance:
case 9:
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
try:
print_colors('[+] Reading local verified and unverified')
verified_df, unverified_df = get_local_verified_and_unverified()
print_colors('[+] Removing cross dataframe replications')
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
print_colors('[+] Saving local verified and unverified')
save_local_verified_and_unverified(verified_df, unverified_df)
except Exception as err:
print_colors("[-] Option 9 failed suddently, please try again", is_error=True)
options.run_option_9()
break
case 10:
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
try:
print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
for participant in os.listdir(conf.PARTICIPANT_DIR):
participant_local_dir = conf.PARTICIPANT_DIR + participant + '/'
print_colors('[+] Reading webrring participant\'s verified and unverified')
participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_local_dir)
print_colors('[+] Removing unverified and blacklisted rows')
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist)
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist)
print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
print_colors('[+] Saving local participant verified and unverified')
save_local_participant_verified_and_unverified(participant_verified_df, participant_unverified_df, participant_local_dir)
except Exception as err:
print_colors("[-] Option 10 failed suddently, please try again", is_error=True)
options.run_option_10()
break

View file

@ -7,11 +7,15 @@ def download_participant_data(participant):
"""
Downloads the participants csv files and banner
Parameters:
participant (str): The url of the webring participant.
Parameters
----------
participant : str
The url of the webring participant.
Returns:
Boolean: True if all files downloaded, False if any of them failed
Returns
-------
Boolean
True if all files downloaded, False if any of them failed
"""
try:
@ -44,19 +48,25 @@ def download_participant_data(participant):
utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner")
except Exception as err:
print_colors("[-] Downloading webring participant's files failed.", is_error=True)
utils.print_colors("[-] Downloading webring participant's files failed.", is_error=True)
def clean_csv(df, blacklist):
"""
Cleans duplications and blacklisted rows
Parameters:
df (dataframe): The dataframe we want to clean.
blacklist (list): The blacklisted words.
Parameters
----------
df pd.DataFrame
The dataframe we want to clean.
blacklist : list
The blacklisted words.
Returns:
Dataframe: Cleaned dataframe.
Returns
-------
pd.DataFrame
Cleaned dataframe.
"""
try:
if not df.empty:
df = utils.remove_duplications(df)
@ -75,12 +85,17 @@ def mark_sensitive(df, sensitive_list):
"""
Marks rows as sensitive
Parameters:
df (dataframe): The dataframe we want to mark.
sensitive (list): The sensitive words.
Parameters
----------
df : pd.DataFrame
The dataframe we want to mark.
sensitive : list
The sensitive words.
Returns:
Dataframe: Marked dataframe.
Returns
-------
pd.DataFrame
Marked dataframe.
"""
try:
@ -91,6 +106,72 @@ def mark_sensitive(df, sensitive_list):
df.loc[~sensitive_rows, 'Sensitive'] = 'NO'
except Exception as err:
print_colors("[-] MArking sensitive words failed.", is_error=True)
print_colors("[-] Marking sensitive words failed.", is_error=True)
return df
def mark_webring_participant_trusted(webring_df, participant_id, trustworthy):
"""
Marks a webring to be trusted or not
Parameters
----------
webring_df : pd.DataFrame
dataframe of all the webring participants
participant_id : int
the index of the participant
trustworthy : bool
is the participant trustworthy or not
Returns
-------
pd.DataFrame
Marked webring dataframe with trust/untrust.
"""
try:
webring_df.iloc[participant_id, webring_df.columns.get_loc('Trusted')] = "YES" if trustworthy else "NO"
except Exception as err:
utils.print_colors("[-] Trusting or untrusting a webring participant failed", is_error = True)
return webring_df
def mark_webring_participant_blacklist(webring_df, participant_instance, participant_id, blacklisted):
"""
Marks a webring to be blacklisted or not
Parameters
----------
webring_df : pd.DataFrame
dataframe of all the webring participants
participant_id :int
the index of the participant
blacklisted : bool
is the participant set to be blacklisted or not
Returns
-------
pd.DataFrame
Marked webring dataframe with blacklist/unblacklist.
"""
try:
if blacklisted:
webring_df.iloc[participant_id, webring_df.columns.get_loc('Blacklisted')] = "YES"
utils.print_colors(f'[+] Adding new word to blacklist')
local_blacklist_df = utils.add_word_to_blacklist(participant_instance)
else:
webring_df.iloc[participant_id, webring_df.columns.get_loc('Blacklisted')] = "NO"
utils.print_colors(f'[+] Removing word from blacklist')
local_blacklist_df = utils.remove_word_from_blacklist(participant_instance)
except Exception as err:
utils.print_colors("[-] Blacklisting or unblacklisting a webring participlant failed", is_error = True)
raise err
return webring_df

234
scripts/logic/options.py Normal file
View file

@ -0,0 +1,234 @@
import shutil
import os
import utils
import conf
import logic.lantern_logic as lantern
def run_option_4():
"""
Running option 4: syncing all links from official and registered webrings
"""
try:
utils.print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
utils.print_colors('[+] Syncing official webrings to local webrings')
webring_df = utils.get_local_webring_participants()
current_instance = utils.get_current_instance()
utils.print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist_df = utils.get_local_blacklist()
local_sensitive_df = utils.get_local_sensitive()
utils.print_colors('[+] Reading local verified and unverified')
local_verified_df, local_unverified_df = utils.get_local_verified_and_unverified()
#Remove all rows
local_unverified_df = local_unverified_df[0:0]
local_verified_df = local_verified_df[0:0]
for participant in webring_df.itertuples(index=False, name='columns'):
# Check if the participant is my instance
if current_instance in participant:
continue
if participant.Blacklisted == 'YES':
continue
if not utils.is_participant_reachable(participant.URL):
utils.print_colors(f"[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
continue
utils.print_colors('[+] Downloading participant\'s files to store locally')
lantern.download_participant_data(participant.URL)
participant_url = utils.generate_local_participant_dir(participant.URL)
utils.print_colors('[+] Reading webring participant\'s verified and unverified')
participant_verified_df, participant_unverified_df = utils.get_participant_local_verified_and_unverified(participant_url)
utils.print_colors('[+] Removing unvalidated and blacklisted rows')
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist_df['blacklisted-words'].tolist())
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist_df['blacklisted-words'].tolist())
utils.print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive_df['sensitive-words'].tolist())
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive_df['sensitive-words'].tolist())
if participant.Trusted == 'YES':
utils.print_colors('[+] This participant is trusted, copying participant\'s verified to local verified')
local_verified_df = utils.merge_verification_df(local_verified_df, participant_verified_df)
else:
utils.print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
local_unverified_df = utils.merge_verification_df(local_unverified_df, participant_verified_df)
utils.print_colors('[+] Copying participant\'s unverified to local unverified')
local_unverified_df = utils.merge_verification_df(local_unverified_df, participant_unverified_df)
utils.print_colors('[+] Saving local verified and unverified')
utils.save_local_verified_and_unverified(local_verified_df, local_unverified_df)
except Exception as err:
utils.print_colors("[-] Option 4 failed suddenly, please try again", is_error=True)
def run_option_6():
"""
Running option 6: Trusting/Untrusting/Blacklisting a webring participant
"""
while True:
utils.print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
webring_df = utils.get_local_webring_participants()
webring_path = conf.LOCAL_DIR + conf.WEBRING_CSV_FILE
utils.print_colors(f'{webring_df[["URL","Trusted", "Blacklisted"]]}')
try:
index = int(input('What is the index of the webring participant that you want to edit? -1 to exit ').strip())
if index == -1:
break
if index in webring_df.index:
choice = input('Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist the webring participant?').strip()
utils.print_colors('[+] Reading local verified and unverified')
local_verified_df, local_unverified_df = utils.get_local_verified_and_unverified()
participant_instance = webring_df.iloc[index, webring_df.columns.get_loc("URL")]
match choice:
case '1':
# trust the webring participant
approve=input('You\'re about to trust another peer, this means that you\'re going to automatically trust all of the links they have in their verified.csv file! If this is a malicious peer, this action might be potentially risky! Do you want to continue ? (y/n)')
# to lower case incase someone enters Y instead of y
if approve.lower() == 'y':
try:
utils.print_colors(f'[+] Trusting webring participant {participant_instance}')
webring_df = lantern.mark_webring_participant_trusted(webring_df, index, True)
webring_df = lantern.mark_webring_participant_blacklist(webring_df, participant_instance, index, False)
except Exception as err:
utils.print_colors('[-] Trusting webring participant failed', is_error=True)
else:
utils.print_colors('[-] not trusting webring participant', is_error=True)
case '2':
try:
utils.print_colors(f'[+] Untrusting webring participant {participant_instance}')
webring_df = lantern.mark_webring_participant_trusted(webring_df, index, False)
webring_df = lantern.mark_webring_participant_blacklist(webring_df, participant_instance, index, False)
except Exception as err:
utils.print_colors('[-] Untrusting webring participant failed', is_error=True)
case '3':
try:
utils.print_colors(f'[+] Blacklisting webring participant {participant_instance}')
webring_df = lantern.mark_webring_participant_trusted(webring_df, index, False)
webring_df = lantern.mark_webring_participant_blacklist(webring_df, participant_instance, index, True)
local_blacklist_df = utils.get_local_blacklist()
utils.print_colors('[+] Removing unvalidated and blacklisted rows')
local_verified_df = lantern.clean_csv(local_verified_df, local_blacklist_df['blacklisted-words'].tolist())
local_unverified_df = lantern.clean_csv(local_verified_df, local_blacklist_df['blacklisted-words'].tolist())
participant_dir = f'{conf.PARTICIPANT_DIR}{participant_instance}'
utils.print_colors(f"[+] removing the participant's directory at {participant_dir}")
shutil.rmtree(participant_dir)
except FileNotFoundError as err:
utils.print_colors('[-] File already blacklisted', is_error=True)
except Exception as err:
utils.print_colors('[-] Blacklisting webring participant failed', is_error=True)
utils.save_dataframe(webring_df, webring_path)
utils.print_colors('[+] Saving local verified and unverified')
utils.save_local_verified_and_unverified(local_verified_df, local_unverified_df)
except Exception as err:
utils.print_colors("[-] Option 6 failed suddently, please try again", is_error=True)
def run_option_9():
"""
Running option 9: cleans duplications in local instance verified and unverified csv files
"""
utils.print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
try:
utils.print_colors('[+] Reading local verified and unverified')
verified_df, unverified_df = utils.get_local_verified_and_unverified()
utils.print_colors('[+] Removing cross dataframe replications')
verified_df, unverified_df = utils.remove_cross_dataframe_replications(verified_df, unverified_df)
utils.print_colors('[+] Saving local verified and unverified')
utils.save_local_verified_and_unverified(verified_df, unverified_df)
except Exception as err:
utils.print_colors("[-] Option 9 failed suddenly, please try again", is_error=True)
def run_option_10():
"""
Running option 10: go over all verified and unverified participants csv files
"""
utils.print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
try:
utils.print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist_df = utils.get_local_blacklist()
local_sensitive_df = utils.get_local_sensitive()
for participant in os.listdir(conf.PARTICIPANT_DIR):
participant_local_dir = conf.PARTICIPANT_DIR + participant + '/'
if not os.path.exists(f'{participant_local_dir}verified.csv'):
continue
utils.print_colors('[+] Reading webrring participant\'s verified and unverified')
participant_verified_df, participant_unverified_df = utils.get_participant_local_verified_and_unverified(participant_local_dir)
utils.print_colors('[+] Removing unverified and blacklisted rows')
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist_df['blacklisted-words'].tolist())
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist_df['blacklisted-words'].tolist())
utils.print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive_df['sensitive-words'].tolist())
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive_df['sensitive-words'].tolist())
utils.print_colors('[+] Saving local participant verified and unverified')
utils.save_local_participant_verified_and_unverified(participant_verified_df, participant_unverified_df, participant_local_dir)
except Exception as err:
utils.print_colors("[-] Option 10 failed suddenly, please try again", is_error=True)

View file

@ -19,7 +19,7 @@ RESET = '\033[m'
def get_current_instance():
"""
Checks if all URL files are actually reachable via Tor
Get the current host instance
Returns:
str: the local instance onion url
@ -28,9 +28,13 @@ def get_current_instance():
#expanduser gives the current user directory
instance_file = os.path.expanduser("~") + '/.darknet_participant_url'
if os.path.exists(instance_file):
with open(instance_file) as f:
return f.read().rstrip()
else:
return ""
#Set the local dir on script run
conf.LOCAL_DIR = conf.PARTICIPANT_DIR + get_current_instance() + '/'
@ -137,11 +141,15 @@ def is_participant_reachable(instance):
"""
Checks if all URL files are actually reachable via Tor
Parameters:
instance (str): The participant onion address
Parameters
----------
instance : str
The participant onion address
Returns:
Boolean: False if any file is unreachable, True if all are reachable
Returns
-------
Bool
False if any file is unreachable, True if all are reachable
"""
url = generate_participant_url(instance)
@ -268,13 +276,17 @@ def send_server_checks(url: str) -> tuple[str, str, str]:
def is_row_valid(row):
"""
validates dataframe row to check if all field are valid
Validates dataframe row to check if all field are valid
Parameters:
row (dict): dataframe row
Parameters
----------
row : dict
Dataframe row
Returns:
Boolean: True if row is valid, False if row isn't valid
Returns
-------
Bool
True if row is valid, False if row isn't valid
"""
try:
return (
@ -295,14 +307,19 @@ def is_row_valid(row):
def merge_verification_df(receiving_df, merging_df):
"""
merges 2 dataframes of type verified or unverified (do not merge duplications by name or url)
Merges 2 dataframes of type verified or unverified (do not merge duplications by name or url)
Parameters:
receiving_df (Dataframe): dataframe we want to receive the data
merging_df (Dataframe): dataframe we want to merge into the receiving dataframe
Parameters
----------
receiving_df : pd.DataFrame
Dataframe we want to receive the data
merging_df : pd.DataFrame
Dataframe we want to merge into the receiving dataframe
Returns:
Dataframe: the combined dataframe will be returned
--------
pd.DataFrame
The combined dataframe will be returned
"""
try:
filtered_df = merging_df[~((merging_df['URL'].isin(receiving_df['URL'])) | merging_df['Name'].isin(receiving_df['Name']))]
@ -321,13 +338,17 @@ def merge_verification_df(receiving_df, merging_df):
def remove_duplications(df):
"""
remove url and name duplications from the dataframe
Remove url and name duplications from the dataframe
Parameters:
df (Dataframe): the dataframe to remove duplications from
Parameters
----------
df : pd.DataFrame
The dataframe to remove duplications from
Returns:
Dataframe: the dataframe after all duplications were removed
Returns
-------
pd.DataFrame
The dataframe after all duplications were removed
"""
try:
df = df.drop_duplicates(subset='Name')
@ -340,15 +361,21 @@ def remove_duplications(df):
def remove_cross_dataframe_replications(main_df, sub_df):
"""
remove replications from sub_df that exist in main_df
Remove replications from sub_df that exist in main_df
Parameters:
main_df (Dataframe): the dataframe to keep replications
sub_df (Dataframe): the dataframe to remove replications
Parameters
----------
main_df : pd.DataFrame
The dataframe to keep replications
sub_df : DataFrame
The dataframe to remove replications
Returns:
Dataframe: the main_df with removed duplications
Dataframe: the sub_df with removed duplications and removed replications
Returns
-------
pd.DataFrame
The main_df with removed duplications
pd.DataFrame
The sub_df with removed duplications and removed replications
"""
try:
@ -365,24 +392,150 @@ def remove_cross_dataframe_replications(main_df, sub_df):
return main_df, sub_df
def add_word_to_blacklist(word):
"""
Add a new word to the blacklist
Parameters
----------
word : str
The new word we want to add to the blacklist
Returns
-------
bool
True if word is in the blacklist or added, False if fails
"""
try:
local_blacklist_df = get_local_blacklist()
if word not in local_blacklist_df['blacklisted-words'].values:
local_blacklist_df.loc[len(local_blacklist_df)] = [word]
save_local_blacklist(local_blacklist_df)
else:
print_colors('[+] Word already exists in the blacklist')
except Exception as err:
print_colors('[-] Adding word to the blacklist failed',is_error=True)
return local_blacklist_df
def remove_word_from_blacklist(word):
"""
Remove a word from the blacklist
Parameters
----------
word : str
The word we want to remove from the blacklist
Returns
-------
bool
True if word is not in the blacklist or removed, False if fails
"""
try:
local_blacklist_df = get_local_blacklist()
if word in local_blacklist_df['blacklisted-words'].values:
local_blacklist_df = local_blacklist_df[local_blacklist_df['blacklisted-words'] != word]
save_local_blacklist(local_blacklist_df)
else:
print_colors('[+] Word wasn\'t found on the blacklist')
except Exception as err:
print_colors('[-] Removing word from the blacklist failed',is_error=True)
return local_blacklist_df
def transfer_rows_by_instance(target_df, source_df, participant_instance):
"""
Transfer rows from one dataframe to another by instance condition
Parameters
----------
target_df pd.DataFrame
The dataframe i want to copy into
source_df pd.DataFrame
The dataframe i want to cut out of
participant_instance : str
The participant's instance onion address
Returns
-------
pd.DataFrame
The target_df with the new rows
pd.DataFrame
The source_df with the removed rows
"""
try:
mask = source_df['Instance'] == participant_instance
target_df = pd.concat([target_df, source_df[mask]])
source_df = source_df[~mask]
except Exception as err:
print_colors('[-] Transferring rows by instance failed',is_error=True)
return target_df, source_df
def save_local_blacklist(blacklist_df):
"""
Saves the local blacklist
Parameters
----------
blacklist_df : pd.DataFrame
Dataframe of the blacklist
Returns
-------
bool
True if successful, False if not
"""
try:
save_dataframe(blacklist_df, f'{conf.LOCAL_DIR}blacklist.csv')
return True
except Exception as err:
print_colors('[-] Saving blacklist failed',is_error=True)
return False
###TODO: can later remove the inputs and have a "global" local verified and unverified or a class of the local(lantern host) participant
def save_local_verified_and_unverified(verified_df, unverified_df):
"""
saves the local verified and unverified
Saves the local verified and unverified
Parameters:
verified_df (Dataframe): local verified rows dataframe
unverified_df (Dataframe): local unverified rows dataframe
Parameters
----------
verified_df : pd.DataFrame
Local verified rows dataframe
unverified_df : DataFrame
Local unverified rows dataframe
Returns:
bool: True if successful, False if not
Returns
-------
bool
True if successful, False if not
"""
try:
current_instance = get_current_instance() + '/'
verified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv', index=False)
save_dataframe(verified_df, f'{conf.LOCAL_DIR}verified.csv')
unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False)
save_dataframe(unverified_df, f'{conf.LOCAL_DIR}unverified.csv')
print_colors('[+] Verified and unverified saved successfully')
@ -394,21 +547,28 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
def save_local_participant_verified_and_unverified(verified_df, unverified_df, participant):
"""
saves the local verified and unverified of a participant
Saves the local verified and unverified of a participant
Parameters:
verified_df (Dataframe): local verified rows dataframe
unverified_df (Dataframe): local unverified rows dataframe
participant (str): participant's onion local path
Parameters
----------
verified_df pd.DataFrame
Local verified rows dataframe
unverified_df pd.DataFrame
Local unverified rows dataframe
participant : str
Participant's onion local path
Returns:
bool: True if successful, False if not
Returns
-------
bool
True if successful, False if not
"""
try:
verified_df.to_csv(f'{participant}verified.csv', index=False)
save_dataframe(verified_df, f'{participant}verified.csv')
unverified_df.to_csv(f'{participant}unverified.csv', index=False)
save_dataframe(unverified_df, f'{participant}unverified.csv')
print_colors('[+] Verified and unverified saved successfully')
@ -418,43 +578,82 @@ def save_local_participant_verified_and_unverified(verified_df, unverified_df, p
print_colors('[-] Saving verified and unverified failed',is_error=True)
return False
def save_dataframe(df, path):
"""
Saves a dataframe
Parameters
----------
df : pd.DataFrame
Dataframe wants to be saved
path : str
Local path for the dataframe
Returns
-------
bool
True if saved, False if not
"""
try:
df.to_csv(path, index=False)
return True
except Exception as err:
return False
###################### Getters/Generators ######################
def generate_participant_url(participant):
"""
generates url of the webring participant
Generates url of the webring participant
Parameters:
participant(str): participant's onion address/instance
Parameters
----------
participant : str
Participant's onion address/instance
Returns:
str: the url of the webring participant
Returns
-------
str
The url of the webring participant
"""
return f'http://{participant}/participants/{participant}/'
def generate_local_participant_dir(participant):
"""
generates local files path of the webring participant
Generates local files path of the webring participant
Parameters:
participant(str): participant's onion address/instance
Parameters
----------
participant : str
Participant's onion address/instance
Returns:
str: the local path of the webring participant's files
Returns
-------
str
The local path of the webring participant's files
"""
return f'{conf.PARTICIPANT_DIR}{participant}/'
def get_participant_local_verified_and_unverified(participant):
"""
reads the local verified csv and the local unverified csv of a participant
Reads the local verified csv and the local unverified csv of a participant
Parameters:
participant (str): participant's local files path
Parameters
----------
participant : str
Participant's local files path
Returns:
verified_df(Dataframe): verified.csv as dataframe
unverified_df(Dataframe): unverified.csv as dataframe
Returns
-------
pd.DataFrame
verified.csv as dataframe
pd.DataFrame
unverified.csv as dataframe
"""
try:
@ -462,24 +661,26 @@ def get_participant_local_verified_and_unverified(participant):
except FileNotFoundError:
print_colors("[-] File not found: verified.csv", is_error=True)
return pd.Dataframe(), pd.Dataframe()
return pd.DataFrame(), pd.DataFrame()
try:
unverified_df = pd.read_csv(f'{participant}unverified.csv')
except FileNotFoundError:
print_colors("[-] Participant File not found: unverified.csv", is_error=True)
return pd.Dataframe(), pd.Dataframe()
return pd.DataFrame(), pd.DataFrame()
return verified_df, unverified_df
def get_official_participants():
"""
reads all the official webring participants
Reads all the official webring participants
Returns:
list: list of all the official webring participants
Returns
-------
list
List of all the official webring participants
"""
try:
@ -491,58 +692,78 @@ def get_official_participants():
except Exception as err:
print_colors('[-] Couldn\'t read official webring participants file',is_error=True )
def get_local_blacklist_and_sensitive():
def get_local_blacklist():
"""
reads the local blacklisted words and the local sensitive words
Reads the local blacklist
Returns:
blacklist(list): list of all the words that are blacklisted
sensitive_list(list): list of all the words that are sensitive
Returns
-------
blacklist_df : pd.DataFrame
Dataframe of the blacklist
"""
try:
current_instance = get_current_instance() + '/'
try:
blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv')
blacklist = blacklist_df.iloc[:, 0].tolist()
blacklist_df = pd.read_csv(f'{conf.LOCAL_DIR}blacklist.csv')
except FileNotFoundError:
print_colors("[-] File not found: blacklist.csv", is_error=True)
return blacklist_df
except Exception as err:
print_colors('[-] Failed reading the blacklist words file',is_error=True)
return pd.DataFrame()
def get_local_sensitive():
"""
Reads the local sensitive words
Returns
-------
sensitive_list list
List of all the words that are sensitive
"""
try:
sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv')
sensitive_list = sensitive_df.iloc[:, 0].tolist()
try:
sensitive_df = pd.read_csv(f'{conf.LOCAL_DIR}sensitive.csv')
except FileNotFoundError:
print_colors("[-] File not found: sensitive.csv", is_error=True)
return blacklist, sensitive_list
return sensitive_df
except Exception as err:
print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True)
print_colors('[-] Failed reading the sensitive words file',is_error=True)
return [], []
return pd.DataFrame()
def get_local_verified_and_unverified():
"""
reads the local verified csv and the local unverified csv of the instance
Reads the local verified csv and the local unverified csv of the instance
Returns:
verified_df(Dataframe): verified.csv as dataframe
unverified_df(Dataframe): unverified.csv as dataframe
Returns
-------
verified_df : pd.DataFrame
verified.csv as dataframe
unverified_df : pd.DataFrame
unverified.csv as dataframe
"""
try:
current_instance = get_current_instance() + '/'
try:
verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv')
verified_df = pd.read_csv(f'{conf.LOCAL_DIR}verified.csv')
except FileNotFoundError:
print_colors("[-] File not found: verified.csv", is_error=True)
try:
unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv')
unverified_df = pd.read_csv(f'{conf.LOCAL_DIR}unverified.csv')
except FileNotFoundError:
print_colors("[-] File not found: unverified.csv", is_error=True)
@ -556,10 +777,12 @@ def get_local_verified_and_unverified():
def get_local_webring_participants():
"""
make sure the official participants are registered in the webring csv file
Make sure the official participants are registered in the webring csv file
Returns:
Dataframe: the verified local webring participants dataframe
Returns
-------
pd.DataFrame
The verified local webring participants dataframe
"""
try:
@ -572,7 +795,7 @@ def get_local_webring_participants():
new_row = [{'Name': '','URL': participant,'Description': '','Trusted': 'NO','Status': '','Score': ''}]
webring_df = pd.concat([webring_df, pd.DataFrame(new_row)], ignore_index=True)
webring_df.to_csv(conf.LOCAL_DIR + conf.WEBRING_CSV_FILE, index=False)
save_dataframe(webring_df, conf.LOCAL_DIR + conf.WEBRING_CSV_FILE)
return webring_df

View file

@ -1 +1 @@
Name,URL,Description,Trusted,Status,Score
Name,URL,Description,Trusted,Status,Score,Blacklisted

1 Name URL Description Trusted Status Score Blacklisted