Started refactoring 9

This commit is contained in:
doctor_dev 2025-05-30 15:30:01 +00:00
parent a2409ac83e
commit 7e6b75ec9d
No known key found for this signature in database
GPG key ID: F12F7F71CB84AEAA
2 changed files with 81 additions and 11 deletions

View file

@ -1002,6 +1002,7 @@ Maintenance:
case 9: case 9:
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
for w in ['verified.csv', 'unverified.csv']: for w in ['verified.csv', 'unverified.csv']:
csvfilepath = os.path.join(instancepath, w) csvfilepath = os.path.join(instancepath, w)
print_colors(f"Processing file: {csvfilepath}") print_colors(f"Processing file: {csvfilepath}")
@ -1022,6 +1023,7 @@ Maintenance:
case 10: case 10:
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)") print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
participantspath = rootpath+'www/participants/' participantspath = rootpath+'www/participants/'
for participant in os.listdir(participantspath): for participant in os.listdir(participantspath):
print_colors(f"Participant: {participant}") print_colors(f"Participant: {participant}")

View file

@ -469,6 +469,23 @@ def remove_duplications(df):
return df return df
def remove_cross_dataframe_replications(main_df, sub_df):
try:
main_df = remove_duplications(main_df)
sub_df = remove_duplications(sub_df)
mask = sub_df['URL'].isin(main_fd['URL']) | df_a['Name'].isin(df_b['Name'])
sub_df = sub_df[~mask]
return sub_df
except:
pass
return main_df, sub_df
###TODO: can later remove the inputs and have a "global" local verified and unverified or a class of the local(lantern host) participant ###TODO: can later remove the inputs and have a "global" local verified and unverified or a class of the local(lantern host) participant
def save_local_verified_and_unverified(verified_df, unverified_df): def save_local_verified_and_unverified(verified_df, unverified_df):
""" """
@ -521,6 +538,39 @@ def generate_local_participant_dir(participant):
return f'{conf.PARTICIPANT_DIR}{participant}/' return f'{conf.PARTICIPANT_DIR}{participant}/'
def get_participant_local_verified_and_unverified(participant):
"""
reads the local verified csv and the local unverified csv of a participant
Parameters:
participant (str): participant's onion address/instance
Returns:
verified_df(Dataframe): verified.csv as dataframe
unverified_df(Dataframe): unverified.csv as dataframe
"""
try:
current_instance = get_current_instance() + '/'
try:
verified_df = pd.read_csv(f'{participant}verified.csv')
except FileNotFoundError:
print_colors("[-] File not found: verified.csv", is_error=True)
try:
unverified_df = pd.read_csv(f'{participant}unverified.csv')
except FileNotFoundError:
print_colors("[-] Participant File not found: unverified.csv", is_error=True)
return verified_df, unverified_df
except Exception:
print_colors('[-] Failed reading the verified and unverified files',is_error=True)
return pd.DataFrame(), pd.DataFrame()
def get_official_participants(): def get_official_participants():
""" """
reads all the official webring participants reads all the official webring participants
@ -548,22 +598,32 @@ def get_local_blacklist_and_sensitive():
""" """
try: try:
current_instance = get_current_instance() + '/' current_instance = get_current_instance() + '/'
try:
blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv') blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv')
blacklist = blacklist_df.iloc[:, 0].tolist() blacklist = blacklist_df.iloc[:, 0].tolist()
except FileNotFoundError:
print_colors("[-] File not found: blacklist.csv", is_error=True)
try:
sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv') sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv')
sensitive_list = sensitive_df.iloc[:, 0].tolist() sensitive_list = sensitive_df.iloc[:, 0].tolist()
except FileNotFoundError:
print_colors("[-] File not found: sensitive.csv", is_error=True)
return blacklist, sensitive_list return blacklist, sensitive_list
except Exception: except Exception:
print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True) print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True)
return [], [] return [], []
def get_local_verified_and_unverified(): def get_local_verified_and_unverified():
""" """
reads the local verified csv and the local unverified csv reads the local verified csv and the local unverified csv of the instance
Returns: Returns:
verified_df(Dataframe): verified.csv as dataframe verified_df(Dataframe): verified.csv as dataframe
@ -572,15 +632,23 @@ def get_local_verified_and_unverified():
try: try:
current_instance = get_current_instance() + '/' current_instance = get_current_instance() + '/'
try:
verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv') verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv')
except FileNotFoundError:
print_colors("[-] File not found: verified.csv", is_error=True)
try:
unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv') unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv')
except FileNotFoundError:
print_colors("[-] File not found: unverified.csv", is_error=True)
return verified_df, unverified_df return verified_df, unverified_df
except Exception: except Exception:
print_colors('[-] Failed reading the verified and unverified files',is_error=True) print_colors('[-] Failed reading the verified and unverified files',is_error=True)
return pd.DataFrame(), pd.DataFrame() return pd.DataFrame(), pd.DataFrame()
def get_local_webring_participants(): def get_local_webring_participants():