starting work on option 10

This commit is contained in:
doctor_dev 2025-05-30 18:20:52 +00:00
parent c4ebef10a4
commit 1b67f7a218
No known key found for this signature in database
GPG key ID: F12F7F71CB84AEAA
3 changed files with 33 additions and 21 deletions

View file

@ -583,10 +583,11 @@ Maintenance:
print_colors('[+] Saving local verified and unverified') print_colors('[+] Saving local verified and unverified')
save_local_verified_and_unverified(local_verified_df, local_unverified_df) save_local_verified_and_unverified(local_verified_df, local_unverified_df)
break except Exception as err:
except Exception:
print_colors("[-] Option 4 failed suddently, please try again", is_error=True) print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
raise err
break
case 5: case 5:
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))") print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
@ -1005,18 +1006,27 @@ Maintenance:
try: try:
verified_df, unverified_df = utils.get_local_verified_and_unverified() verified_df, unverified_df = get_local_verified_and_unverified()
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df) verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
save_local_verified_and_unverified(verified_df, unverified_df) save_local_verified_and_unverified(verified_df, unverified_df)
except Exception: except Exception as err:
print_colors("[-] Option 9 failed suddently, please try again", is_error=True) print_colors("[-] Option 9 failed suddently, please try again", is_error=True)
break
case 10: case 10:
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)") print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
participantspath = rootpath+'www/participants/' participantspath = rootpath+'www/participants/'
for participant in os.listdir(participantspath): for participant in os.listdir(participantspath):
print_colors(f"Participant: {participant}") print_colors(f"Participant: {participant}")

View file

@ -43,7 +43,7 @@ def download_participant_data(participant):
utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner") utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner")
except Exception: except Exception as err:
print_colors("[-] Downloading webring participant's files failed.", is_error=True) print_colors("[-] Downloading webring participant's files failed.", is_error=True)
def clean_csv(df, blacklist): def clean_csv(df, blacklist):
@ -66,7 +66,7 @@ def clean_csv(df, blacklist):
if not df.empty: if not df.empty:
df = df[df.apply(utils.is_row_valid, axis=1)] df = df[df.apply(utils.is_row_valid, axis=1)]
except Exception: except Exception as err:
print_colors("[-] cleaning dataframe failed", is_error=True) print_colors("[-] cleaning dataframe failed", is_error=True)
return df return df
@ -90,7 +90,7 @@ def mark_sensitive(df, sensitive_list):
df.loc[sensitive_rows, 'Sensitive'] = 'YES' df.loc[sensitive_rows, 'Sensitive'] = 'YES'
df.loc[~sensitive_rows, 'Sensitive'] = 'NO' df.loc[~sensitive_rows, 'Sensitive'] = 'NO'
except Exception: except Exception as err:
print_colors("[-] MArking sensitive words failed.", is_error=True) print_colors("[-] MArking sensitive words failed.", is_error=True)
return df return df

View file

@ -152,7 +152,7 @@ def is_participant_reachable(instance):
status = requests.get(f'{url}{file_name}',proxies=conf.PROXIES, timeout=10).status_code status = requests.get(f'{url}{file_name}',proxies=conf.PROXIES, timeout=10).status_code
if status != 200: if status != 200:
return False return False
except Exception: except Exception as err:
return False return False
return True return True
@ -288,7 +288,7 @@ def is_row_valid(row):
IsScoreValid(row['Score']) IsScoreValid(row['Score'])
) )
except Exception: except Exception as err:
return False return False
###################### General ###################### ###################### General ######################
@ -316,7 +316,7 @@ def merge_verification_df(receiving_df, merging_df):
else: else:
return pd.concat([receiving_df, filtered_df], ignore_index=True) return pd.concat([receiving_df, filtered_df], ignore_index=True)
except Exception: except Exception as err:
return receiving_df return receiving_df
def remove_duplications(df): def remove_duplications(df):
@ -333,7 +333,7 @@ def remove_duplications(df):
df = df.drop_duplicates(subset='Name') df = df.drop_duplicates(subset='Name')
df = df.drop_duplicates(subset='URL') df = df.drop_duplicates(subset='URL')
except Exception: except Exception as err:
print_colors('[-] Removing duplication failed',is_error=True) print_colors('[-] Removing duplication failed',is_error=True)
return df return df
@ -344,12 +344,14 @@ def remove_cross_dataframe_replications(main_df, sub_df):
main_df = remove_duplications(main_df) main_df = remove_duplications(main_df)
sub_df = remove_duplications(sub_df) sub_df = remove_duplications(sub_df)
mask = sub_df['URL'].isin(main_fd['URL']) | df_a['Name'].isin(df_b['Name']) mask = sub_df['URL'].isin(main_df['URL']) | sub_df['Name'].isin(main_df['Name'])
sub_df = sub_df[~mask] sub_df = sub_df[~mask]
except: except Exception as err:
print_colors('[-] Removing cross dataframe duplications failed',is_error=True) print_colors('[-] Removing cross dataframe duplications failed',is_error=True)
raise err #REMOVE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
return main_df, sub_df return main_df, sub_df
@ -376,7 +378,7 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
return True return True
except Exception: except Exception as err:
print_colors('[-] Saving verified and unverified failed',is_error=True) print_colors('[-] Saving verified and unverified failed',is_error=True)
return False return False
@ -435,7 +437,7 @@ def get_participant_local_verified_and_unverified(participant):
return verified_df, unverified_df return verified_df, unverified_df
except Exception: except Exception as err:
print_colors('[-] Failed reading the verified and unverified files',is_error=True) print_colors('[-] Failed reading the verified and unverified files',is_error=True)
return pd.DataFrame(), pd.DataFrame() return pd.DataFrame(), pd.DataFrame()
@ -454,7 +456,7 @@ def get_official_participants():
with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file: with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file:
return [line.strip() for line in file if current_instance not in line] return [line.strip() for line in file if current_instance not in line]
except Exception: except Exception as err:
print_colors('[-] Couldn\'t read official webring participants file',is_error=True ) print_colors('[-] Couldn\'t read official webring participants file',is_error=True )
def get_local_blacklist_and_sensitive(): def get_local_blacklist_and_sensitive():
@ -485,7 +487,7 @@ def get_local_blacklist_and_sensitive():
return blacklist, sensitive_list return blacklist, sensitive_list
except Exception: except Exception as err:
print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True) print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True)
return [], [] return [], []
@ -515,7 +517,7 @@ def get_local_verified_and_unverified():
return verified_df, unverified_df return verified_df, unverified_df
except Exception: except Exception as err:
print_colors('[-] Failed reading the verified and unverified files',is_error=True) print_colors('[-] Failed reading the verified and unverified files',is_error=True)
return pd.DataFrame(), pd.DataFrame() return pd.DataFrame(), pd.DataFrame()
@ -542,7 +544,7 @@ def get_local_webring_participants():
return webring_df return webring_df
except Exception: except Exception as err:
print_colors(f'[-] failed reading webring participants file',is_error=True ) print_colors(f'[-] failed reading webring participants file',is_error=True )
return pd.DataFrame() return pd.DataFrame()