darknet-lantern/scripts/logic/lantern_logic.py

import utils
import os
import conf
import requests

def download_participant_data(participant):
    """
    Downloads the participants csv files and banner

    Parameters:
        participant (str): The url of the webring participant.

    Returns:
        Boolean: True if all files downloaded, False if any of them failed
    """

    try:
        utils.print_colors(f"[+] Downloading webring {participant} csv files and banner")

        local_participant_dir = utils.generate_local_participant_dir(participant)

        os.makedirs(local_participant_dir, exist_ok=True)

        for file_name in conf.CSV_FILES:

            csv_res = requests.get(f'{utils.generate_participant_url(participant)}{file_name}', proxies=conf.PROXIES, timeout=10)

            with open(f'{local_participant_dir}{file_name}', "w") as file:
                file.write(csv_res.text)

        banner_res = requests.get(f'{utils.generate_participant_url(participant)}banner.png', stream=True, proxies=conf.PROXIES, timeout=10)

        banner_path = f'{local_participant_dir}banner.png'

        with open(banner_path, 'wb') as f:
            f.write(banner_res.content)

        # SANITY CHECK ON THE BANNER PNG IMAGE:
        if not utils.IsBannerValid(banner_path):
            # if false, overwrite it with the template banner png file
            os.remove(banner_path)
            shutil.copyfile(f'{conf.TEMPLATE_PATH}banner.png', banner_path)

        utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner")

    except Exception as err:
        print_colors("[-] Downloading webring participant's files failed.", is_error=True)

def clean_csv(df, blacklist):
    """
    Cleans duplications and blacklisted rows

    Parameters:
        df (dataframe): The dataframe we want to clean.
        blacklist (list): The blacklisted words.

    Returns:
        Dataframe: Cleaned dataframe.
    """
    try:
        if not df.empty:
            df = utils.remove_duplications(df)

            df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)]

            if not df.empty:
                df = df[df.apply(utils.is_row_valid, axis=1)]

    except Exception as err:
        print_colors("[-] cleaning dataframe failed", is_error=True)

    return df

def mark_sensitive(df, sensitive_list):
    """
    Marks rows as sensitive

    Parameters:
        df (dataframe): The dataframe we want to mark.
        sensitive (list): The sensitive words.

    Returns:
        Dataframe: Marked dataframe.
    """

    try:
        if not df.empty:
            sensitive_rows = df.apply(lambda row: any(word in str(value) for word in sensitive_list for value in row), axis=1)

            df.loc[sensitive_rows, 'Sensitive'] = 'YES'
            df.loc[~sensitive_rows, 'Sensitive'] = 'NO'

    except Exception as err:
        print_colors("[-] MArking sensitive words failed.", is_error=True)

    return df