From 7e6b75ec9d218fd51d0977a733e84f7003274c76 Mon Sep 17 00:00:00 2001 From: doctor_dev Date: Fri, 30 May 2025 15:30:01 +0000 Subject: [PATCH] Started refactoring 9 --- scripts/lantern.py | 2 ++ scripts/utils.py | 90 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/scripts/lantern.py b/scripts/lantern.py index 8985bcd..befcd5f 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -1002,6 +1002,7 @@ Maintenance: case 9: print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") + for w in ['verified.csv', 'unverified.csv']: csvfilepath = os.path.join(instancepath, w) print_colors(f"Processing file: {csvfilepath}") @@ -1022,6 +1023,7 @@ Maintenance: case 10: print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)") + participantspath = rootpath+'www/participants/' for participant in os.listdir(participantspath): print_colors(f"Participant: {participant}") diff --git a/scripts/utils.py b/scripts/utils.py index 513bd15..f31340e 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -469,6 +469,23 @@ def remove_duplications(df): return df +def remove_cross_dataframe_replications(main_df, sub_df): + try: + + main_df = remove_duplications(main_df) + sub_df = remove_duplications(sub_df) + + mask = sub_df['URL'].isin(main_fd['URL']) | df_a['Name'].isin(df_b['Name']) + + sub_df = sub_df[~mask] + + return sub_df + + except: + pass + + return main_df, sub_df + ###TODO: can later remove the inputs and have a "global" local verified and unverified or a class of the local(lantern host) participant def save_local_verified_and_unverified(verified_df, unverified_df): """ @@ -521,6 +538,39 @@ def generate_local_participant_dir(participant): return f'{conf.PARTICIPANT_DIR}{participant}/' +def get_participant_local_verified_and_unverified(participant): + """ + reads the local verified csv and the local unverified csv of a participant + + Parameters: + participant (str): participant's onion address/instance + + Returns: + verified_df(Dataframe): verified.csv as dataframe + unverified_df(Dataframe): unverified.csv as dataframe + """ + + try: + current_instance = get_current_instance() + '/' + try: + verified_df = pd.read_csv(f'{participant}verified.csv') + + except FileNotFoundError: + print_colors("[-] File not found: verified.csv", is_error=True) + + try: + unverified_df = pd.read_csv(f'{participant}unverified.csv') + + except FileNotFoundError: + print_colors("[-] Participant File not found: unverified.csv", is_error=True) + + return verified_df, unverified_df + + except Exception: + print_colors('[-] Failed reading the verified and unverified files',is_error=True) + + return pd.DataFrame(), pd.DataFrame() + def get_official_participants(): """ reads all the official webring participants @@ -548,22 +598,32 @@ def get_local_blacklist_and_sensitive(): """ try: current_instance = get_current_instance() + '/' + try: + blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv') + blacklist = blacklist_df.iloc[:, 0].tolist() - blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv') - blacklist = blacklist_df.iloc[:, 0].tolist() + except FileNotFoundError: + print_colors("[-] File not found: blacklist.csv", is_error=True) + + try: + sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv') + sensitive_list = sensitive_df.iloc[:, 0].tolist() + + except FileNotFoundError: + print_colors("[-] File not found: sensitive.csv", is_error=True) - sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv') - sensitive_list = sensitive_df.iloc[:, 0].tolist() return blacklist, sensitive_list + except Exception: - print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True ) - return [], [] + print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True) + + return [], [] def get_local_verified_and_unverified(): """ - reads the local verified csv and the local unverified csv + reads the local verified csv and the local unverified csv of the instance Returns: verified_df(Dataframe): verified.csv as dataframe @@ -572,16 +632,24 @@ def get_local_verified_and_unverified(): try: current_instance = get_current_instance() + '/' + try: + verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv') + + except FileNotFoundError: + print_colors("[-] File not found: verified.csv", is_error=True) - verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv') + try: + unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv') - unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv') + except FileNotFoundError: + print_colors("[-] File not found: unverified.csv", is_error=True) return verified_df, unverified_df except Exception: - print_colors('[-] Failed reading the verified and unverified files',is_error=True ) - return pd.DataFrame(), pd.DataFrame() + print_colors('[-] Failed reading the verified and unverified files',is_error=True) + + return pd.DataFrame(), pd.DataFrame() def get_local_webring_participants(): """