import utils import os import conf import requests def download_participant_data(participant): """ Downloads the participants csv files and banner Parameters ---------- participant : str The url of the webring participant. Returns ------- Boolean True if all files downloaded, False if any of them failed """ try: utils.print_colors(f"[+] Downloading webring {participant} csv files and banner") local_participant_dir = utils.generate_local_participant_dir(participant) os.makedirs(local_participant_dir, exist_ok=True) for file_name in conf.CSV_FILES: csv_res = requests.get(f'{utils.generate_participant_url(participant)}{file_name}', proxies=conf.PROXIES, timeout=10) with open(f'{local_participant_dir}{file_name}', "w") as file: file.write(csv_res.text) banner_res = requests.get(f'{utils.generate_participant_url(participant)}banner.png', stream=True, proxies=conf.PROXIES, timeout=10) banner_path = f'{local_participant_dir}banner.png' with open(banner_path, 'wb') as f: f.write(banner_res.content) # SANITY CHECK ON THE BANNER PNG IMAGE: if not utils.IsBannerValid(banner_path): # if false, overwrite it with the template banner png file os.remove(banner_path) shutil.copyfile(f'{conf.TEMPLATE_PATH}banner.png', banner_path) utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner") except Exception as err: utils.print_colors("[-] Downloading webring participant's files failed.", is_error=True) def clean_csv(df, blacklist): """ Cleans duplications and blacklisted rows Parameters ---------- df pd.DataFrame The dataframe we want to clean. blacklist : list The blacklisted words. Returns ------- pd.DataFrame Cleaned dataframe. """ try: if not df.empty: df = utils.remove_duplications(df) df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)] if not df.empty: df = df[df.apply(utils.is_row_valid, axis=1)] except Exception as err: print_colors("[-] cleaning dataframe failed", is_error=True) return df def mark_sensitive(df, sensitive_list): """ Marks rows as sensitive Parameters ---------- df : pd.DataFrame The dataframe we want to mark. sensitive : list The sensitive words. Returns ------- pd.DataFrame Marked dataframe. """ try: if not df.empty: sensitive_rows = df.apply(lambda row: any(word in str(value) for word in sensitive_list for value in row), axis=1) df.loc[sensitive_rows, 'Sensitive'] = 'YES' df.loc[~sensitive_rows, 'Sensitive'] = 'NO' except Exception as err: print_colors("[-] Marking sensitive words failed.", is_error=True) return df def mark_webring_participant_trusted(webring_df, participant_id, trustworthy): """ Marks a webring to be trusted or not Parameters ---------- webring_df : pd.DataFrame dataframe of all the webring participants participant_id : int the index of the participant trustworthy : bool is the participant trustworthy or not Returns ------- pd.DataFrame Marked webring dataframe with trust/untrust. """ try: webring_df.iloc[participant_id, webring_df.columns.get_loc('Trusted')] = "YES" if trustworthy else "NO" except Exception as err: utils.print_colors("[-] Trusting or untrusting a webring participant failed", is_error = True) return webring_df def mark_webring_participant_blacklist(webring_df, participant_instance, participant_id, blacklisted): """ Marks a webring to be blacklisted or not Parameters ---------- webring_df : pd.DataFrame dataframe of all the webring participants participant_id :int the index of the participant blacklisted : bool is the participant set to be blacklisted or not Returns ------- pd.DataFrame Marked webring dataframe with blacklist/unblacklist. """ try: if blacklisted: webring_df.iloc[participant_id, webring_df.columns.get_loc('Blacklisted')] = "YES" utils.print_colors(f'[+] Adding new word to blacklist') local_blacklist_df = utils.add_word_to_blacklist(participant_instance) else: webring_df.iloc[participant_id, webring_df.columns.get_loc('Blacklisted')] = "NO" utils.print_colors(f'[+] Removing word from blacklist') local_blacklist_df = utils.remove_word_from_blacklist(participant_instance) except Exception as err: utils.print_colors("[-] Blacklisting or unblacklisting a webring participlant failed", is_error = True) raise err return webring_df