diff --git a/scripts/lantern.py b/scripts/lantern.py index 701d834..b8c80a3 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -1,6 +1,7 @@ from utils import * import logic.lantern_logic as lantern from dotenv import load_dotenv +import logic.options as options import os, pwd @@ -149,6 +150,11 @@ def main(): webpdf = pd.read_csv(webpcsvfile, on_bad_lines='skip') print_colors(f"[+] file exists, your Webring URL is {instance}") + if 'Blacklisted' not in webpdf.columns: + webpdf['Blacklisted'] = 'NO' + + webpdf.to_csv(webpcsvfile) + ##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE ##### if len(sys.argv) == 2 and sys.argv[1] == "4": print("4) Synchronize new links from existing webring participants into your unverified.csv file") @@ -224,6 +230,8 @@ Maintenance: else: sensi = 'YES' + + #TODO: add blacklisting default to no when refactoring newrow=[instance,category,name,url,sensi,desc,'YES','100'] print_colors(f"[+] NEWROW= {newrow}") # (rest is automatic: status, score, instance is = '' because it is your own instance) @@ -529,65 +537,9 @@ Maintenance: # 6) Trust/UnTrust/Blacklist a webring participant ##################################################### - case 4: - print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files") - - try: - - print_colors('[+] Syncing official webrings to local webrings') - webring_df = get_local_webring_participants() - - current_instance = get_current_instance() - - for participant in webring_df.itertuples(index=False, name='columns'): - # Check if the participant is my instance - if current_instance in participant: - continue - - if not is_participant_reachable(participant.URL): - print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True) - continue - - print_colors('[+] Downloading participant\'s files to store locally') - lantern.download_participant_data(participant.URL) - - print_colors('[+] Reading local blacklist and sensitive words') - local_blacklist, local_sensitive = get_local_blacklist_and_sensitive() - - print_colors('[+] Reading local verified and unverified') - local_verified_df, local_unverified_df = get_local_verified_and_unverified() - - participant_url = generate_local_participant_dir(participant.URL) - - print_colors('[+] Reading webrring participant\'s verified and unverified') - participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_url) - - print_colors('[+] Removing unvalidated and blacklisted rows') - participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist) - participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist) - - print_colors('[+] Marking sensitive rows') - participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive) - participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive) - - if participant.Trusted == 'YES': - print_colors('[+] This participant is trusted, copying participant\'s verified to local verified') - local_verified_df = merge_verification_df(local_verified_df, participant_verified_df) - - else: - print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified') - local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df) - - print_colors('[+] Copying participant\'s unverified to local unverified') - local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df) - - print_colors('[+] Saving local verified and unverified') - save_local_verified_and_unverified(local_verified_df, local_unverified_df) - - except Exception as err: - print_colors("[-] Option 4 failed suddently, please try again", is_error=True) + options.run_option_4() break @@ -731,124 +683,9 @@ Maintenance: ############################################## - - - - - - case 6: - while True: - print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)") - webringcsvfile=instancepath+'/'+'webring-participants.csv' - wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip') - print_colors(f'{wdf[["URL","Trusted"]]}') - try: - index = int(input("What is the index of the webring participant that you want to edit? -1 to exit ").strip()) - if index == -1: - break - elif index in wdf.index: - choice = int(input("Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist the webring participant?").strip()) - while True: - match choice: - case 1: - # trust the webring participant - choice2=input("You're about to trust another peer, this means that you're going to automatically trust all of the links they have in their verified.csv file! If this is a malicious peer, this action might be potentially risky! Do you want to continue ? (y/n)") - if choice2 == "y": - print_colors(f'[+] Trusting webring participant {wdf.at[index,"URL"]}') - ## Warning: In future versions of panda '✔️' will not work. It will show an error. - wdf.at[index,"Trusted"]= 'YES' - wdf.to_csv(webringcsvfile, index=False) - break - else: - print_colors("[-] not trusting webring participant", is_error=True) - break + options.run_option_6() - case 2: - print_colors(f'[+] UnTrusting webring participant {wdf.at[index,"URL"]}') - ## Warning: In future versions of panda '' will not work. It will show an error. Maybe change to a 0,1 - wdf.at[index,"Trusted"]='NO' - wdf.to_csv(webringcsvfile, index=False) - break - - case 3: - print_colors(f'[+] Blacklisting webring participant {wdf.at[index,"URL"]}') - instance2blacklist=wdf.at[index,"URL"] - newrow=[instance2blacklist] - print_colors(f"[+] NEWROW= {newrow}") - # (rest is automatic: status, score, instance is = '' because it is your own instance) - # check if the entry doesn't already exist in verified.csv and in unverified.csv - # if it doesnt exist, add it into unverified.csv - bldf.loc[-1] = newrow # adding a row - bldf.index = bldf.index + 1 # shifting index - bldf = bldf.sort_index() # sorting by index - print_colors("[+] New row added! now writing the csv file:") - bldf.to_csv(blcsvfile, index=False) - - - # remove all of the entries that came from that participant (drop the lines in your own verified+unverified.csv that have that instance in the instance column) - - rows2delete= [] # it is an empty list at first - for i,j in vdf.iterrows(): - row=vdf.loc[i,:].values.tolist() - for k,l in bldf.iterrows(): - blword=bldf.at[k, 'blacklisted-words'] - if any(blword in str(x) for x in row) == True: - if i not in rows2delete: - print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word") - rows2delete.append(i) #mark the row for deletion if not already done - for i in rows2delete: - row=vdf.loc[i,:].values.tolist() - print_colors(f'[+] REMOVING ROW: {i} {row}') - vdf.drop(i, inplace= True) - vdf.to_csv(verifiedcsvfile, index=False) - print_colors(f"{vdf}") - rows2delete= [] # it is an empty list at first - rows2delete= [] # it is an empty list at first - for i,j in uvdf.iterrows(): - row=uvdf.loc[i,:].values.tolist() - for k,l in bldf.iterrows(): - blword=bldf.at[k, 'blacklisted-words'] - if any(blword in str(x) for x in row) == True: - if i not in rows2delete: - print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word") - rows2delete.append(i) #mark the row for deletion if not already done - for i in rows2delete: - row=uvdf.loc[i,:].values.tolist() - print_colors(f'[+] REMOVING ROW: {i} {row}') - uvdf.drop(i, inplace= True) - uvdf.to_csv(unverifiedcsvfile, index=False) - print_colors(f"{uvdf}") - rows2delete= [] # it is an empty list at first - - # find all rows that match with the instance name in wdf aswell to remove them - for i,j in wdf.iterrows(): - row=wdf.loc[i,:].values.tolist() - for k,l in bldf.iterrows(): - blword=bldf.at[k, 'blacklisted-words'] - if any(blword in str(x) for x in row) == True: - if i not in rows2delete: - print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word") - rows2delete.append(i) #mark the row for deletion if not already done - for i in rows2delete: - row=wdf.loc[i,:].values.tolist() - print_colors(f'[+] REMOVING ROW: {i} {row}') - wdf.drop(i, inplace= True) - wdf.to_csv(webringcsvfile, index=False) - print_colors(f"{wdf}") - rows2delete= [] # it is an empty list at first - - - - # remove the entire directory in www/participants/INSTANCENAME aswell to get rid of it - instance2blacklistpath=rootpath+'www/participants/'+instance2blacklist - print_colors(f"[+] removing the participant's directory at {instance2blacklistpath}") - shutil.rmtree(instance2blacklistpath) - - case _: - break - except Exception: - break break @@ -1004,50 +841,14 @@ Maintenance: case 9: - print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") - - try: - - print_colors('[+] Reading local verified and unverified') - verified_df, unverified_df = get_local_verified_and_unverified() - print_colors('[+] Removing cross dataframe replications') - verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df) - - print_colors('[+] Saving local verified and unverified') - save_local_verified_and_unverified(verified_df, unverified_df) - - except Exception as err: - print_colors("[-] Option 9 failed suddently, please try again", is_error=True) + options.run_option_9() break case 10: - print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)") - - try: - print_colors('[+] Reading local blacklist and sensitive words') - local_blacklist, local_sensitive = get_local_blacklist_and_sensitive() - - for participant in os.listdir(conf.PARTICIPANT_DIR): - participant_local_dir = conf.PARTICIPANT_DIR + participant + '/' - - print_colors('[+] Reading webrring participant\'s verified and unverified') - participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_local_dir) - - print_colors('[+] Removing unverified and blacklisted rows') - participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist) - participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist) - - print_colors('[+] Marking sensitive rows') - participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive) - participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive) - - print_colors('[+] Saving local participant verified and unverified') - save_local_participant_verified_and_unverified(participant_verified_df, participant_unverified_df, participant_local_dir) - except Exception as err: - print_colors("[-] Option 10 failed suddently, please try again", is_error=True) + options.run_option_10() break diff --git a/scripts/logic/lantern_logic.py b/scripts/logic/lantern_logic.py index 61590fb..f3174a7 100644 --- a/scripts/logic/lantern_logic.py +++ b/scripts/logic/lantern_logic.py @@ -7,11 +7,15 @@ def download_participant_data(participant): """ Downloads the participants csv files and banner - Parameters: - participant (str): The url of the webring participant. + Parameters + ---------- + participant : str + The url of the webring participant. - Returns: - Boolean: True if all files downloaded, False if any of them failed + Returns + ------- + Boolean + True if all files downloaded, False if any of them failed """ try: @@ -44,28 +48,34 @@ def download_participant_data(participant): utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner") except Exception as err: - print_colors("[-] Downloading webring participant's files failed.", is_error=True) + utils.print_colors("[-] Downloading webring participant's files failed.", is_error=True) def clean_csv(df, blacklist): """ Cleans duplications and blacklisted rows - Parameters: - df (dataframe): The dataframe we want to clean. - blacklist (list): The blacklisted words. + Parameters + ---------- + df pd.DataFrame + The dataframe we want to clean. + blacklist : list + The blacklisted words. - Returns: - Dataframe: Cleaned dataframe. + Returns + ------- + pd.DataFrame + Cleaned dataframe. """ + try: if not df.empty: df = utils.remove_duplications(df) df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)] - + if not df.empty: df = df[df.apply(utils.is_row_valid, axis=1)] - + except Exception as err: print_colors("[-] cleaning dataframe failed", is_error=True) @@ -75,12 +85,17 @@ def mark_sensitive(df, sensitive_list): """ Marks rows as sensitive - Parameters: - df (dataframe): The dataframe we want to mark. - sensitive (list): The sensitive words. + Parameters + ---------- + df : pd.DataFrame + The dataframe we want to mark. + sensitive : list + The sensitive words. - Returns: - Dataframe: Marked dataframe. + Returns + ------- + pd.DataFrame + Marked dataframe. """ try: @@ -91,6 +106,72 @@ def mark_sensitive(df, sensitive_list): df.loc[~sensitive_rows, 'Sensitive'] = 'NO' except Exception as err: - print_colors("[-] MArking sensitive words failed.", is_error=True) + print_colors("[-] Marking sensitive words failed.", is_error=True) - return df \ No newline at end of file + return df + +def mark_webring_participant_trusted(webring_df, participant_id, trustworthy): + """ + Marks a webring to be trusted or not + + Parameters + ---------- + webring_df : pd.DataFrame + dataframe of all the webring participants + participant_id : int + the index of the participant + trustworthy : bool + is the participant trustworthy or not + + Returns + ------- + pd.DataFrame + Marked webring dataframe with trust/untrust. + """ + + try: + + webring_df.iloc[participant_id, webring_df.columns.get_loc('Trusted')] = "YES" if trustworthy else "NO" + + except Exception as err: + utils.print_colors("[-] Trusting or untrusting a webring participant failed", is_error = True) + + return webring_df + +def mark_webring_participant_blacklist(webring_df, participant_instance, participant_id, blacklisted): + """ + Marks a webring to be blacklisted or not + + Parameters + ---------- + webring_df : pd.DataFrame + dataframe of all the webring participants + participant_id :int + the index of the participant + blacklisted : bool + is the participant set to be blacklisted or not + + Returns + ------- + pd.DataFrame + Marked webring dataframe with blacklist/unblacklist. + """ + + try: + if blacklisted: + webring_df.iloc[participant_id, webring_df.columns.get_loc('Blacklisted')] = "YES" + + utils.print_colors(f'[+] Adding new word to blacklist') + local_blacklist_df = utils.add_word_to_blacklist(participant_instance) + + else: + webring_df.iloc[participant_id, webring_df.columns.get_loc('Blacklisted')] = "NO" + + utils.print_colors(f'[+] Removing word from blacklist') + local_blacklist_df = utils.remove_word_from_blacklist(participant_instance) + + except Exception as err: + utils.print_colors("[-] Blacklisting or unblacklisting a webring participlant failed", is_error = True) + raise err + + return webring_df \ No newline at end of file diff --git a/scripts/logic/options.py b/scripts/logic/options.py new file mode 100644 index 0000000..9cd3be5 --- /dev/null +++ b/scripts/logic/options.py @@ -0,0 +1,234 @@ +import shutil +import os + +import utils +import conf +import logic.lantern_logic as lantern + +def run_option_4(): + """ + Running option 4: syncing all links from official and registered webrings + + """ + try: + + utils.print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files") + + utils.print_colors('[+] Syncing official webrings to local webrings') + + webring_df = utils.get_local_webring_participants() + + current_instance = utils.get_current_instance() + + utils.print_colors('[+] Reading local blacklist and sensitive words') + local_blacklist_df = utils.get_local_blacklist() + local_sensitive_df = utils.get_local_sensitive() + + utils.print_colors('[+] Reading local verified and unverified') + local_verified_df, local_unverified_df = utils.get_local_verified_and_unverified() + + #Remove all rows + local_unverified_df = local_unverified_df[0:0] + local_verified_df = local_verified_df[0:0] + + for participant in webring_df.itertuples(index=False, name='columns'): + # Check if the participant is my instance + if current_instance in participant: + continue + + if participant.Blacklisted == 'YES': + continue + + if not utils.is_participant_reachable(participant.URL): + utils.print_colors(f"[-] Webring {participant.URL} isn't reachable, skipping", is_error=True) + continue + + utils.print_colors('[+] Downloading participant\'s files to store locally') + lantern.download_participant_data(participant.URL) + + participant_url = utils.generate_local_participant_dir(participant.URL) + + utils.print_colors('[+] Reading webring participant\'s verified and unverified') + participant_verified_df, participant_unverified_df = utils.get_participant_local_verified_and_unverified(participant_url) + + utils.print_colors('[+] Removing unvalidated and blacklisted rows') + participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist_df['blacklisted-words'].tolist()) + participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist_df['blacklisted-words'].tolist()) + + utils.print_colors('[+] Marking sensitive rows') + participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive_df['sensitive-words'].tolist()) + participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive_df['sensitive-words'].tolist()) + + if participant.Trusted == 'YES': + utils.print_colors('[+] This participant is trusted, copying participant\'s verified to local verified') + local_verified_df = utils.merge_verification_df(local_verified_df, participant_verified_df) + + else: + utils.print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified') + local_unverified_df = utils.merge_verification_df(local_unverified_df, participant_verified_df) + + utils.print_colors('[+] Copying participant\'s unverified to local unverified') + local_unverified_df = utils.merge_verification_df(local_unverified_df, participant_unverified_df) + + utils.print_colors('[+] Saving local verified and unverified') + utils.save_local_verified_and_unverified(local_verified_df, local_unverified_df) + + except Exception as err: + utils.print_colors("[-] Option 4 failed suddenly, please try again", is_error=True) + +def run_option_6(): + """ + Running option 6: Trusting/Untrusting/Blacklisting a webring participant + + """ + while True: + utils.print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)") + + webring_df = utils.get_local_webring_participants() + + webring_path = conf.LOCAL_DIR + conf.WEBRING_CSV_FILE + + utils.print_colors(f'{webring_df[["URL","Trusted", "Blacklisted"]]}') + + try: + index = int(input('What is the index of the webring participant that you want to edit? -1 to exit ').strip()) + + if index == -1: + break + + if index in webring_df.index: + choice = input('Do you want to 1) Trust, 2) UnTrust, or 3) Blacklist the webring participant?').strip() + + utils.print_colors('[+] Reading local verified and unverified') + local_verified_df, local_unverified_df = utils.get_local_verified_and_unverified() + + participant_instance = webring_df.iloc[index, webring_df.columns.get_loc("URL")] + + + match choice: + case '1': + + # trust the webring participant + approve=input('You\'re about to trust another peer, this means that you\'re going to automatically trust all of the links they have in their verified.csv file! If this is a malicious peer, this action might be potentially risky! Do you want to continue ? (y/n)') + + # to lower case incase someone enters Y instead of y + if approve.lower() == 'y': + try: + utils.print_colors(f'[+] Trusting webring participant {participant_instance}') + + webring_df = lantern.mark_webring_participant_trusted(webring_df, index, True) + + webring_df = lantern.mark_webring_participant_blacklist(webring_df, participant_instance, index, False) + + except Exception as err: + utils.print_colors('[-] Trusting webring participant failed', is_error=True) + + else: + utils.print_colors('[-] not trusting webring participant', is_error=True) + + case '2': + + try: + utils.print_colors(f'[+] Untrusting webring participant {participant_instance}') + + webring_df = lantern.mark_webring_participant_trusted(webring_df, index, False) + + webring_df = lantern.mark_webring_participant_blacklist(webring_df, participant_instance, index, False) + + except Exception as err: + utils.print_colors('[-] Untrusting webring participant failed', is_error=True) + + case '3': + + try: + + utils.print_colors(f'[+] Blacklisting webring participant {participant_instance}') + + webring_df = lantern.mark_webring_participant_trusted(webring_df, index, False) + + webring_df = lantern.mark_webring_participant_blacklist(webring_df, participant_instance, index, True) + + local_blacklist_df = utils.get_local_blacklist() + + utils.print_colors('[+] Removing unvalidated and blacklisted rows') + local_verified_df = lantern.clean_csv(local_verified_df, local_blacklist_df['blacklisted-words'].tolist()) + local_unverified_df = lantern.clean_csv(local_verified_df, local_blacklist_df['blacklisted-words'].tolist()) + + participant_dir = f'{conf.PARTICIPANT_DIR}{participant_instance}' + + utils.print_colors(f"[+] removing the participant's directory at {participant_dir}") + shutil.rmtree(participant_dir) + + except FileNotFoundError as err: + utils.print_colors('[-] File already blacklisted', is_error=True) + + except Exception as err: + utils.print_colors('[-] Blacklisting webring participant failed', is_error=True) + + + utils.save_dataframe(webring_df, webring_path) + + utils.print_colors('[+] Saving local verified and unverified') + utils.save_local_verified_and_unverified(local_verified_df, local_unverified_df) + + except Exception as err: + utils.print_colors("[-] Option 6 failed suddently, please try again", is_error=True) + +def run_option_9(): + """ + Running option 9: cleans duplications in local instance verified and unverified csv files + + """ + utils.print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") + + try: + + utils.print_colors('[+] Reading local verified and unverified') + verified_df, unverified_df = utils.get_local_verified_and_unverified() + + utils.print_colors('[+] Removing cross dataframe replications') + verified_df, unverified_df = utils.remove_cross_dataframe_replications(verified_df, unverified_df) + + utils.print_colors('[+] Saving local verified and unverified') + utils.save_local_verified_and_unverified(verified_df, unverified_df) + + except Exception as err: + utils.print_colors("[-] Option 9 failed suddenly, please try again", is_error=True) + +def run_option_10(): + """ + Running option 10: go over all verified and unverified participants csv files + + """ + utils.print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)") + + try: + utils.print_colors('[+] Reading local blacklist and sensitive words') + local_blacklist_df = utils.get_local_blacklist() + local_sensitive_df = utils.get_local_sensitive() + + for participant in os.listdir(conf.PARTICIPANT_DIR): + participant_local_dir = conf.PARTICIPANT_DIR + participant + '/' + + if not os.path.exists(f'{participant_local_dir}verified.csv'): + continue + + utils.print_colors('[+] Reading webrring participant\'s verified and unverified') + participant_verified_df, participant_unverified_df = utils.get_participant_local_verified_and_unverified(participant_local_dir) + + utils.print_colors('[+] Removing unverified and blacklisted rows') + participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist_df['blacklisted-words'].tolist()) + participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist_df['blacklisted-words'].tolist()) + + utils.print_colors('[+] Marking sensitive rows') + participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive_df['sensitive-words'].tolist()) + participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive_df['sensitive-words'].tolist()) + + utils.print_colors('[+] Saving local participant verified and unverified') + utils.save_local_participant_verified_and_unverified(participant_verified_df, participant_unverified_df, participant_local_dir) + + except Exception as err: + utils.print_colors("[-] Option 10 failed suddenly, please try again", is_error=True) + + + diff --git a/scripts/utils.py b/scripts/utils.py index c15c57e..790be16 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -19,7 +19,7 @@ RESET = '\033[m' def get_current_instance(): """ - Checks if all URL files are actually reachable via Tor + Get the current host instance Returns: str: the local instance onion url @@ -28,8 +28,12 @@ def get_current_instance(): #expanduser gives the current user directory instance_file = os.path.expanduser("~") + '/.darknet_participant_url' - with open(instance_file) as f: - return f.read().rstrip() + if os.path.exists(instance_file): + with open(instance_file) as f: + return f.read().rstrip() + + else: + return "" #Set the local dir on script run conf.LOCAL_DIR = conf.PARTICIPANT_DIR + get_current_instance() + '/' @@ -137,11 +141,15 @@ def is_participant_reachable(instance): """ Checks if all URL files are actually reachable via Tor - Parameters: - instance (str): The participant onion address + Parameters + ---------- + instance : str + The participant onion address - Returns: - Boolean: False if any file is unreachable, True if all are reachable + Returns + ------- + Bool + False if any file is unreachable, True if all are reachable """ url = generate_participant_url(instance) @@ -268,13 +276,17 @@ def send_server_checks(url: str) -> tuple[str, str, str]: def is_row_valid(row): """ - validates dataframe row to check if all field are valid + Validates dataframe row to check if all field are valid - Parameters: - row (dict): dataframe row + Parameters + ---------- + row : dict + Dataframe row - Returns: - Boolean: True if row is valid, False if row isn't valid + Returns + ------- + Bool + True if row is valid, False if row isn't valid """ try: return ( @@ -295,18 +307,23 @@ def is_row_valid(row): def merge_verification_df(receiving_df, merging_df): """ - merges 2 dataframes of type verified or unverified (do not merge duplications by name or url) + Merges 2 dataframes of type verified or unverified (do not merge duplications by name or url) - Parameters: - receiving_df (Dataframe): dataframe we want to receive the data - merging_df (Dataframe): dataframe we want to merge into the receiving dataframe + Parameters + ---------- + receiving_df : pd.DataFrame + Dataframe we want to receive the data + merging_df : pd.DataFrame + Dataframe we want to merge into the receiving dataframe Returns: - Dataframe: the combined dataframe will be returned + -------- + pd.DataFrame + The combined dataframe will be returned """ try: filtered_df = merging_df[~((merging_df['URL'].isin(receiving_df['URL'])) | merging_df['Name'].isin(receiving_df['Name']))] - + if filtered_df.empty: return receiving_df @@ -321,13 +338,17 @@ def merge_verification_df(receiving_df, merging_df): def remove_duplications(df): """ - remove url and name duplications from the dataframe + Remove url and name duplications from the dataframe - Parameters: - df (Dataframe): the dataframe to remove duplications from + Parameters + ---------- + df : pd.DataFrame + The dataframe to remove duplications from - Returns: - Dataframe: the dataframe after all duplications were removed + Returns + ------- + pd.DataFrame + The dataframe after all duplications were removed """ try: df = df.drop_duplicates(subset='Name') @@ -340,15 +361,21 @@ def remove_duplications(df): def remove_cross_dataframe_replications(main_df, sub_df): """ - remove replications from sub_df that exist in main_df + Remove replications from sub_df that exist in main_df - Parameters: - main_df (Dataframe): the dataframe to keep replications - sub_df (Dataframe): the dataframe to remove replications + Parameters + ---------- + main_df : pd.DataFrame + The dataframe to keep replications + sub_df : DataFrame + The dataframe to remove replications - Returns: - Dataframe: the main_df with removed duplications - Dataframe: the sub_df with removed duplications and removed replications + Returns + ------- + pd.DataFrame + The main_df with removed duplications + pd.DataFrame + The sub_df with removed duplications and removed replications """ try: @@ -365,24 +392,150 @@ def remove_cross_dataframe_replications(main_df, sub_df): return main_df, sub_df +def add_word_to_blacklist(word): + """ + Add a new word to the blacklist + + Parameters + ---------- + word : str + The new word we want to add to the blacklist + + Returns + ------- + bool + True if word is in the blacklist or added, False if fails + """ + + try: + local_blacklist_df = get_local_blacklist() + + if word not in local_blacklist_df['blacklisted-words'].values: + + local_blacklist_df.loc[len(local_blacklist_df)] = [word] + + save_local_blacklist(local_blacklist_df) + + else: + print_colors('[+] Word already exists in the blacklist') + + except Exception as err: + print_colors('[-] Adding word to the blacklist failed',is_error=True) + + return local_blacklist_df + +def remove_word_from_blacklist(word): + """ + Remove a word from the blacklist + + Parameters + ---------- + word : str + The word we want to remove from the blacklist + + Returns + ------- + bool + True if word is not in the blacklist or removed, False if fails + """ + + try: + local_blacklist_df = get_local_blacklist() + + if word in local_blacklist_df['blacklisted-words'].values: + + local_blacklist_df = local_blacklist_df[local_blacklist_df['blacklisted-words'] != word] + + save_local_blacklist(local_blacklist_df) + + else: + print_colors('[+] Word wasn\'t found on the blacklist') + + except Exception as err: + print_colors('[-] Removing word from the blacklist failed',is_error=True) + + return local_blacklist_df + + +def transfer_rows_by_instance(target_df, source_df, participant_instance): + """ + Transfer rows from one dataframe to another by instance condition + + Parameters + ---------- + target_df pd.DataFrame + The dataframe i want to copy into + source_df pd.DataFrame + The dataframe i want to cut out of + participant_instance : str + The participant's instance onion address + + Returns + ------- + pd.DataFrame + The target_df with the new rows + pd.DataFrame + The source_df with the removed rows + """ + + try: + mask = source_df['Instance'] == participant_instance + + target_df = pd.concat([target_df, source_df[mask]]) + + source_df = source_df[~mask] + + except Exception as err: + print_colors('[-] Transferring rows by instance failed',is_error=True) + + return target_df, source_df + +def save_local_blacklist(blacklist_df): + """ + Saves the local blacklist + + Parameters + ---------- + blacklist_df : pd.DataFrame + Dataframe of the blacklist + + Returns + ------- + bool + True if successful, False if not + """ + + try: + save_dataframe(blacklist_df, f'{conf.LOCAL_DIR}blacklist.csv') + + return True + + except Exception as err: + print_colors('[-] Saving blacklist failed',is_error=True) + return False + ###TODO: can later remove the inputs and have a "global" local verified and unverified or a class of the local(lantern host) participant def save_local_verified_and_unverified(verified_df, unverified_df): """ - saves the local verified and unverified + Saves the local verified and unverified - Parameters: - verified_df (Dataframe): local verified rows dataframe - unverified_df (Dataframe): local unverified rows dataframe + Parameters + ---------- + verified_df : pd.DataFrame + Local verified rows dataframe + unverified_df : DataFrame + Local unverified rows dataframe - Returns: - bool: True if successful, False if not + Returns + ------- + bool + True if successful, False if not """ try: - current_instance = get_current_instance() + '/' - verified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv', index=False) + save_dataframe(verified_df, f'{conf.LOCAL_DIR}verified.csv') - unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False) + save_dataframe(unverified_df, f'{conf.LOCAL_DIR}unverified.csv') print_colors('[+] Verified and unverified saved successfully') @@ -394,21 +547,28 @@ def save_local_verified_and_unverified(verified_df, unverified_df): def save_local_participant_verified_and_unverified(verified_df, unverified_df, participant): """ - saves the local verified and unverified of a participant + Saves the local verified and unverified of a participant - Parameters: - verified_df (Dataframe): local verified rows dataframe - unverified_df (Dataframe): local unverified rows dataframe - participant (str): participant's onion local path + Parameters + ---------- + verified_df pd.DataFrame + Local verified rows dataframe + unverified_df pd.DataFrame + Local unverified rows dataframe + participant : str + Participant's onion local path - Returns: - bool: True if successful, False if not + Returns + ------- + bool + True if successful, False if not """ + try: - verified_df.to_csv(f'{participant}verified.csv', index=False) + save_dataframe(verified_df, f'{participant}verified.csv') - unverified_df.to_csv(f'{participant}unverified.csv', index=False) + save_dataframe(unverified_df, f'{participant}unverified.csv') print_colors('[+] Verified and unverified saved successfully') @@ -418,43 +578,82 @@ def save_local_participant_verified_and_unverified(verified_df, unverified_df, p print_colors('[-] Saving verified and unverified failed',is_error=True) return False +def save_dataframe(df, path): + """ + Saves a dataframe + + Parameters + ---------- + df : pd.DataFrame + Dataframe wants to be saved + + path : str + Local path for the dataframe + + Returns + ------- + bool + True if saved, False if not + """ + + try: + df.to_csv(path, index=False) + + return True + + except Exception as err: + return False + ###################### Getters/Generators ###################### def generate_participant_url(participant): """ - generates url of the webring participant + Generates url of the webring participant - Parameters: - participant(str): participant's onion address/instance + Parameters + ---------- + participant : str + Participant's onion address/instance - Returns: - str: the url of the webring participant + Returns + ------- + str + The url of the webring participant """ return f'http://{participant}/participants/{participant}/' def generate_local_participant_dir(participant): """ - generates local files path of the webring participant + Generates local files path of the webring participant - Parameters: - participant(str): participant's onion address/instance + Parameters + ---------- + participant : str + Participant's onion address/instance - Returns: - str: the local path of the webring participant's files + Returns + ------- + str + The local path of the webring participant's files """ return f'{conf.PARTICIPANT_DIR}{participant}/' def get_participant_local_verified_and_unverified(participant): """ - reads the local verified csv and the local unverified csv of a participant + Reads the local verified csv and the local unverified csv of a participant - Parameters: - participant (str): participant's local files path + Parameters + ---------- + participant : str + Participant's local files path - Returns: - verified_df(Dataframe): verified.csv as dataframe - unverified_df(Dataframe): unverified.csv as dataframe + Returns + ------- + pd.DataFrame + verified.csv as dataframe + pd.DataFrame + unverified.csv as dataframe """ try: @@ -462,24 +661,26 @@ def get_participant_local_verified_and_unverified(participant): except FileNotFoundError: print_colors("[-] File not found: verified.csv", is_error=True) - return pd.Dataframe(), pd.Dataframe() + return pd.DataFrame(), pd.DataFrame() try: unverified_df = pd.read_csv(f'{participant}unverified.csv') except FileNotFoundError: print_colors("[-] Participant File not found: unverified.csv", is_error=True) - return pd.Dataframe(), pd.Dataframe() + return pd.DataFrame(), pd.DataFrame() return verified_df, unverified_df def get_official_participants(): """ - reads all the official webring participants + Reads all the official webring participants - Returns: - list: list of all the official webring participants + Returns + ------- + list + List of all the official webring participants """ try: @@ -491,58 +692,78 @@ def get_official_participants(): except Exception as err: print_colors('[-] Couldn\'t read official webring participants file',is_error=True ) -def get_local_blacklist_and_sensitive(): +def get_local_blacklist(): """ - reads the local blacklisted words and the local sensitive words + Reads the local blacklist - Returns: - blacklist(list): list of all the words that are blacklisted - sensitive_list(list): list of all the words that are sensitive + Returns + ------- + blacklist_df : pd.DataFrame + Dataframe of the blacklist """ try: - current_instance = get_current_instance() + '/' try: - blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv') - blacklist = blacklist_df.iloc[:, 0].tolist() + blacklist_df = pd.read_csv(f'{conf.LOCAL_DIR}blacklist.csv') except FileNotFoundError: print_colors("[-] File not found: blacklist.csv", is_error=True) + + return blacklist_df + + except Exception as err: + print_colors('[-] Failed reading the blacklist words file',is_error=True) + + return pd.DataFrame() + +def get_local_sensitive(): + """ + Reads the local sensitive words + + Returns + ------- + sensitive_list list + List of all the words that are sensitive + """ + try: + try: - sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv') - sensitive_list = sensitive_df.iloc[:, 0].tolist() + sensitive_df = pd.read_csv(f'{conf.LOCAL_DIR}sensitive.csv') except FileNotFoundError: print_colors("[-] File not found: sensitive.csv", is_error=True) - return blacklist, sensitive_list + return sensitive_df except Exception as err: - print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True) + print_colors('[-] Failed reading the sensitive words file',is_error=True) - return [], [] + return pd.DataFrame() def get_local_verified_and_unverified(): """ - reads the local verified csv and the local unverified csv of the instance + Reads the local verified csv and the local unverified csv of the instance - Returns: - verified_df(Dataframe): verified.csv as dataframe - unverified_df(Dataframe): unverified.csv as dataframe + Returns + ------- + verified_df : pd.DataFrame + verified.csv as dataframe + unverified_df : pd.DataFrame + unverified.csv as dataframe """ try: - current_instance = get_current_instance() + '/' + try: - verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv') + verified_df = pd.read_csv(f'{conf.LOCAL_DIR}verified.csv') except FileNotFoundError: print_colors("[-] File not found: verified.csv", is_error=True) try: - unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv') + unverified_df = pd.read_csv(f'{conf.LOCAL_DIR}unverified.csv') except FileNotFoundError: print_colors("[-] File not found: unverified.csv", is_error=True) @@ -556,10 +777,12 @@ def get_local_verified_and_unverified(): def get_local_webring_participants(): """ - make sure the official participants are registered in the webring csv file + Make sure the official participants are registered in the webring csv file - Returns: - Dataframe: the verified local webring participants dataframe + Returns + ------- + pd.DataFrame + The verified local webring participants dataframe """ try: @@ -572,7 +795,7 @@ def get_local_webring_participants(): new_row = [{'Name': '','URL': participant,'Description': '','Trusted': 'NO','Status': '','Score': ''}] webring_df = pd.concat([webring_df, pd.DataFrame(new_row)], ignore_index=True) - webring_df.to_csv(conf.LOCAL_DIR + conf.WEBRING_CSV_FILE, index=False) + save_dataframe(webring_df, conf.LOCAL_DIR + conf.WEBRING_CSV_FILE) return webring_df diff --git a/templates/webring-participants.csv b/templates/webring-participants.csv index 2026b4a..c0d32ff 100644 --- a/templates/webring-participants.csv +++ b/templates/webring-participants.csv @@ -1 +1 @@ -Name,URL,Description,Trusted,Status,Score +Name,URL,Description,Trusted,Status,Score,Blacklisted