Merge pull request 'fix_option_4_and_10_removing_self_added_rows' (#88) from fix_option_4_and_10_removing_self_added_rows into main

Reviewed-on: http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern/pulls/88
2025-07-01 19:06:41 +00:00 · 2025-06-11 07:59:47 +02:00 · 2025-06-11 07:59:47 +02:00 · 11c1a14266
commit 11c1a14266
parent 94fcec124b 2d12280dc9
5 changed files with 45 additions and 15 deletions
--- a/scripts/conf.py
+++ b/scripts/conf.py
@ -9,6 +9,7 @@ OFFICIAL_PARTICIPANTS_FILE = STATIC_PATH + '.official_participants'
 WEBRING_CSV_FILE = 'webring-participants.csv'
 LOCAL_DIR = '' # Assign on script startup
 LOCAL_INSTANCE = '' # Assign on script startup
 PROXIES = {
    'http': 'socks5h://127.0.0.1:9050',
--- a/scripts/lantern.py
+++ b/scripts/lantern.py
@ -153,7 +153,7 @@ def main():
    if 'Blacklisted' not in webpdf.columns:
        webpdf['Blacklisted'] = 'NO'
-        webpdf.to_csv(webpcsvfile, index=False)
+        save_dataframe(webpdf, webpcsvfile)
 	##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
    if len(sys.argv) == 2 and sys.argv[1] == "4":
--- a/scripts/logic/lantern_logic.py
+++ b/scripts/logic/lantern_logic.py
@ -69,6 +69,8 @@ def clean_csv(df, blacklist):
    try:
        if not df.empty:
            df = utils.sort_instances(df, 'Instance', conf.LOCAL_INSTANCE)
            df = utils.remove_duplications(df)
            df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)]
--- a/scripts/logic/options.py
+++ b/scripts/logic/options.py
@ -15,10 +15,8 @@ def run_option_4():
        utils.print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
        utils.print_colors('[+] Syncing official webrings to local webrings')
        current_instance = utils.get_current_instance()
-        webring_df = utils.get_local_webring_participants(current_instance)
+        webring_df = utils.get_local_webring_participants(conf.LOCAL_INSTANCE)
        utils.print_colors('[+] Reading local blacklist and sensitive words')
        local_blacklist_df = utils.get_local_blacklist()
@ -27,13 +25,9 @@ def run_option_4():
        utils.print_colors('[+] Reading local verified and unverified')
        local_verified_df, local_unverified_df = utils.get_local_verified_and_unverified()
        #Remove all rows
        local_unverified_df = utils.renew_csv(local_unverified_df, current_instance)
        local_verified_df = utils.renew_csv(local_verified_df, current_instance)
        for participant in webring_df.itertuples(index=False, name='columns'):
            # Check if the participant is my instance
-            if current_instance in participant:
+            if conf.LOCAL_INSTANCE in participant:
                continue
            if participant.Blacklisted == 'YES':
--- a/scripts/utils.py
+++ b/scripts/utils.py
@ -36,7 +36,8 @@ def get_current_instance():
        return ""
 #Set the local dir on script run
-conf.LOCAL_DIR = conf.PARTICIPANT_DIR + get_current_instance() + '/'
+conf.LOCAL_INSTANCE = get_current_instance()
 conf.LOCAL_DIR = conf.PARTICIPANT_DIR + conf.LOCAL_INSTANCE + '/'
 ###################### Validations ######################
@ -356,6 +357,39 @@ def merge_verification_df(receiving_df, merging_df):
    except Exception as err:
        return receiving_df
 def sort_instances(df, sort_by, preferred=None):
    """
    Sorts dataframe
    Parameters
    ----------
        df : pd.DataFrame
            The dataframe to sort
        sort_by : str
            The column to sort by
        preferred(optional) : str
            the preferred value is if i want the column to be sorted with a preferred value at the start of the dataframe
    Returns
    -------
        pd.DataFrame
            The sorted dataframe
    """
    try:
        if preferred:
            df['priority'] = (df[sort_by] == preferred).astype(int)
            df = df.sort_values(by=['priority', sort_by], ascending=[False, True]).drop(columns='priority')
        else:
            df = df.sort_values(by=sort_by)
    except Exception as err:
        print_colors('[-] Sorting failed',is_error=True)
    return df
 def remove_duplications(df):
    """
    Remove url and name duplications from the dataframe
@ -370,9 +404,10 @@ def remove_duplications(df):
        pd.DataFrame
            The dataframe after all duplications were removed
    """
    try:
-        df = df.drop_duplicates(subset='Name')
+        df = df.drop_duplicates(subset='Name', keep='first')
-        df = df.drop_duplicates(subset='URL')
+        df = df.drop_duplicates(subset='URL', keep='first')
    except Exception as err:
        print_colors('[-] Removing duplication failed',is_error=True)
@ -704,10 +739,8 @@ def get_official_participants():
    """
    try:
        current_instance = get_current_instance()
        with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file:
-            return [line.strip() for line in file if current_instance not in line]
+            return [line.strip() for line in file if conf.LOCAL_INSTANCE not in line]
    except Exception as err:
        print_colors('[-] Couldn\'t read official webring participants file',is_error=True )