From c4ebef10a48681607a9445d39fa4028108c59bf1 Mon Sep 17 00:00:00 2001 From: doctor_dev Date: Fri, 30 May 2025 17:59:12 +0000 Subject: [PATCH] finished option 9 ready for test --- .gitignore | 1 + scripts/conf.py | 2 + scripts/lantern.py | 110 ++++++++++++++++++++++----------------------- scripts/utils.py | 10 ++--- 4 files changed, 61 insertions(+), 62 deletions(-) diff --git a/.gitignore b/.gitignore index 8c43f1e..868c262 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ __pycache__/ env/ submissions/submission.csv venv/ +local_testing/* diff --git a/scripts/conf.py b/scripts/conf.py index 3c2728e..9b79107 100644 --- a/scripts/conf.py +++ b/scripts/conf.py @@ -1,3 +1,5 @@ +import re + ROOT_PATH = '/srv/darknet-lantern/' STATIC_PATH = ROOT_PATH + 'www/' TEMPLATE_PATH = ROOT_PATH + 'templates/' diff --git a/scripts/lantern.py b/scripts/lantern.py index 2bd1454..7a2109d 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -533,55 +533,60 @@ Maintenance: case 4: print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files") - print_colors('[+] Syncing official webrings to local webrings') + try: - webring_df = verify_official_participants_registered() - - current_instance = get_current_instance() + print_colors('[+] Syncing official webrings to local webrings') - for participant in webring_df.itertuples(index=False, name='columns'): - # Check if the participant is my instance - if current_instance in participant: - continue + webring_df = verify_official_participants_registered() - if not is_participant_reachable(participant.URL): - print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True) - continue + current_instance = get_current_instance() - print_colors('[+] Downloading participant\'s files to store locally') - lantern.download_participant_data(participant.URL) + for participant in webring_df.itertuples(index=False, name='columns'): + # Check if the participant is my instance + if current_instance in participant: + continue - print_colors('[+] Reading local blacklist and sensitive words') - local_blacklist, local_sensitive = get_local_blacklist_and_sensitive() - - print_colors('[+] Reading local verified and unverified') - local_verified_df, local_unverified_df = get_local_verified_and_unverified() - - participant_url = generate_local_participant_dir(participant.URL) - - print_colors('[+] Reading webrring participant\'s verified and unverified, and removing unverified and blacklisted rows') - participant_verified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}verified.csv'), local_blacklist) - participant_unverified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}unverified.csv'), local_blacklist) - - print_colors('[+] Marking sensitive rows') - participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive) - participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive) + if not is_participant_reachable(participant.URL): + print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True) + continue - if participant.Trusted == 'YES': - print_colors('[+] This participant is trusted, copying participant\'s verified to local verified') - local_verified_df = merge_verification_df(local_verified_df, participant_verified_df) - - else: - print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified') - local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df) - - print_colors('[+] Copying participant\'s unverified to local unverified') - local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df) + print_colors('[+] Downloading participant\'s files to store locally') + lantern.download_participant_data(participant.URL) - print_colors('[+] Saving local verified and unverified') - save_local_verified_and_unverified(local_verified_df, local_unverified_df) + print_colors('[+] Reading local blacklist and sensitive words') + local_blacklist, local_sensitive = get_local_blacklist_and_sensitive() - break + print_colors('[+] Reading local verified and unverified') + local_verified_df, local_unverified_df = get_local_verified_and_unverified() + + participant_url = generate_local_participant_dir(participant.URL) + + print_colors('[+] Reading webrring participant\'s verified and unverified, and removing unverified and blacklisted rows') + participant_verified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}verified.csv'), local_blacklist) + participant_unverified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}unverified.csv'), local_blacklist) + + print_colors('[+] Marking sensitive rows') + participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive) + participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive) + + if participant.Trusted == 'YES': + print_colors('[+] This participant is trusted, copying participant\'s verified to local verified') + local_verified_df = merge_verification_df(local_verified_df, participant_verified_df) + + else: + print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified') + local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df) + + print_colors('[+] Copying participant\'s unverified to local unverified') + local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df) + + print_colors('[+] Saving local verified and unverified') + save_local_verified_and_unverified(local_verified_df, local_unverified_df) + + break + + except Exception: + print_colors("[-] Option 4 failed suddently, please try again", is_error=True) case 5: print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))") @@ -997,26 +1002,17 @@ Maintenance: case 9: print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") + + try: - verified_df, unverified_df = utils.get_local_verified_and_unverified() + verified_df, unverified_df = utils.get_local_verified_and_unverified() + verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df) + save_local_verified_and_unverified(verified_df, unverified_df) - for w in ['verified.csv', 'unverified.csv']: - csvfilepath = os.path.join(instancepath, w) - print_colors(f"Processing file: {csvfilepath}") - try: - csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip') - print_colors(f"Removing duplicates in {csvfilepath}") - #print_colors(f"{csvdf[['URL']]}") - csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False) - #print_colors(f"{csvdf[['URL']]}") - csvdf.to_csv(csvfilepath, index=False) - print_colors(f"Cleaned data:\n{csvdf[['URL']]}") - except Exception as e: - print_colors(f"An error occurred while processing {csvfilepath}: {e}") - break - break + except Exception: + print_colors("[-] Option 9 failed suddently, please try again", is_error=True) case 10: print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)") diff --git a/scripts/utils.py b/scripts/utils.py index e33482d..9c0580c 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -334,7 +334,7 @@ def remove_duplications(df): df = df.drop_duplicates(subset='URL') except Exception: - pass + print_colors('[-] Removing duplication failed',is_error=True) return df @@ -348,10 +348,8 @@ def remove_cross_dataframe_replications(main_df, sub_df): sub_df = sub_df[~mask] - return sub_df - except: - pass + print_colors('[-] Removing cross dataframe duplications failed',is_error=True) return main_df, sub_df @@ -374,10 +372,12 @@ def save_local_verified_and_unverified(verified_df, unverified_df): unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False) + print_colors('[+] Verified and unverified saved successfully') + return True except Exception: - print_colors('[-] Saving verified and unverified failed',is_error=True ) + print_colors('[-] Saving verified and unverified failed',is_error=True) return False ###################### Getters/Generators ######################