finished option 9 ready for test

This commit is contained in:
doctor_dev 2025-05-30 17:59:12 +00:00
parent b234d9d1d0
commit c4ebef10a4
No known key found for this signature in database
GPG key ID: F12F7F71CB84AEAA
4 changed files with 61 additions and 62 deletions

1
.gitignore vendored
View file

@ -6,4 +6,5 @@ __pycache__/
env/ env/
submissions/submission.csv submissions/submission.csv
venv/ venv/
local_testing/*

View file

@ -1,3 +1,5 @@
import re
ROOT_PATH = '/srv/darknet-lantern/' ROOT_PATH = '/srv/darknet-lantern/'
STATIC_PATH = ROOT_PATH + 'www/' STATIC_PATH = ROOT_PATH + 'www/'
TEMPLATE_PATH = ROOT_PATH + 'templates/' TEMPLATE_PATH = ROOT_PATH + 'templates/'

View file

@ -533,55 +533,60 @@ Maintenance:
case 4: case 4:
print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files") print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
print_colors('[+] Syncing official webrings to local webrings') try:
webring_df = verify_official_participants_registered() print_colors('[+] Syncing official webrings to local webrings')
current_instance = get_current_instance()
for participant in webring_df.itertuples(index=False, name='columns'): webring_df = verify_official_participants_registered()
# Check if the participant is my instance
if current_instance in participant:
continue
if not is_participant_reachable(participant.URL): current_instance = get_current_instance()
print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
continue
print_colors('[+] Downloading participant\'s files to store locally') for participant in webring_df.itertuples(index=False, name='columns'):
lantern.download_participant_data(participant.URL) # Check if the participant is my instance
if current_instance in participant:
continue
print_colors('[+] Reading local blacklist and sensitive words') if not is_participant_reachable(participant.URL):
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive() print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
continue
print_colors('[+] Reading local verified and unverified')
local_verified_df, local_unverified_df = get_local_verified_and_unverified()
participant_url = generate_local_participant_dir(participant.URL)
print_colors('[+] Reading webrring participant\'s verified and unverified, and removing unverified and blacklisted rows')
participant_verified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}verified.csv'), local_blacklist)
participant_unverified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}unverified.csv'), local_blacklist)
print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
if participant.Trusted == 'YES': print_colors('[+] Downloading participant\'s files to store locally')
print_colors('[+] This participant is trusted, copying participant\'s verified to local verified') lantern.download_participant_data(participant.URL)
local_verified_df = merge_verification_df(local_verified_df, participant_verified_df)
else:
print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df)
print_colors('[+] Copying participant\'s unverified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df)
print_colors('[+] Saving local verified and unverified') print_colors('[+] Reading local blacklist and sensitive words')
save_local_verified_and_unverified(local_verified_df, local_unverified_df) local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
break print_colors('[+] Reading local verified and unverified')
local_verified_df, local_unverified_df = get_local_verified_and_unverified()
participant_url = generate_local_participant_dir(participant.URL)
print_colors('[+] Reading webrring participant\'s verified and unverified, and removing unverified and blacklisted rows')
participant_verified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}verified.csv'), local_blacklist)
participant_unverified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}unverified.csv'), local_blacklist)
print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
if participant.Trusted == 'YES':
print_colors('[+] This participant is trusted, copying participant\'s verified to local verified')
local_verified_df = merge_verification_df(local_verified_df, participant_verified_df)
else:
print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df)
print_colors('[+] Copying participant\'s unverified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df)
print_colors('[+] Saving local verified and unverified')
save_local_verified_and_unverified(local_verified_df, local_unverified_df)
break
except Exception:
print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
case 5: case 5:
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))") print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
@ -997,26 +1002,17 @@ Maintenance:
case 9: case 9:
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
try:
verified_df, unverified_df = utils.get_local_verified_and_unverified() verified_df, unverified_df = utils.get_local_verified_and_unverified()
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
save_local_verified_and_unverified(verified_df, unverified_df)
for w in ['verified.csv', 'unverified.csv']: except Exception:
csvfilepath = os.path.join(instancepath, w) print_colors("[-] Option 9 failed suddently, please try again", is_error=True)
print_colors(f"Processing file: {csvfilepath}")
try:
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
print_colors(f"Removing duplicates in {csvfilepath}")
#print_colors(f"{csvdf[['URL']]}")
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
#print_colors(f"{csvdf[['URL']]}")
csvdf.to_csv(csvfilepath, index=False)
print_colors(f"Cleaned data:\n{csvdf[['URL']]}")
except Exception as e:
print_colors(f"An error occurred while processing {csvfilepath}: {e}")
break
break
case 10: case 10:
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)") print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")

View file

@ -334,7 +334,7 @@ def remove_duplications(df):
df = df.drop_duplicates(subset='URL') df = df.drop_duplicates(subset='URL')
except Exception: except Exception:
pass print_colors('[-] Removing duplication failed',is_error=True)
return df return df
@ -348,10 +348,8 @@ def remove_cross_dataframe_replications(main_df, sub_df):
sub_df = sub_df[~mask] sub_df = sub_df[~mask]
return sub_df
except: except:
pass print_colors('[-] Removing cross dataframe duplications failed',is_error=True)
return main_df, sub_df return main_df, sub_df
@ -374,10 +372,12 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False) unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False)
print_colors('[+] Verified and unverified saved successfully')
return True return True
except Exception: except Exception:
print_colors('[-] Saving verified and unverified failed',is_error=True ) print_colors('[-] Saving verified and unverified failed',is_error=True)
return False return False
###################### Getters/Generators ###################### ###################### Getters/Generators ######################