diff --git a/scripts/conf.py b/scripts/conf.py index 9b79107..e256729 100644 --- a/scripts/conf.py +++ b/scripts/conf.py @@ -9,6 +9,7 @@ OFFICIAL_PARTICIPANTS_FILE = STATIC_PATH + '.official_participants' WEBRING_CSV_FILE = 'webring-participants.csv' LOCAL_DIR = '' # Assign on script startup +LOCAL_INSTANCE = '' # Assign on script startup PROXIES = { 'http': 'socks5h://127.0.0.1:9050', diff --git a/scripts/lantern.py b/scripts/lantern.py index cbcad6a..845e187 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -153,7 +153,7 @@ def main(): if 'Blacklisted' not in webpdf.columns: webpdf['Blacklisted'] = 'NO' - webpdf.to_csv(webpcsvfile, index=False) + save_dataframe(webpdf, webpcsvfile) ##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE ##### if len(sys.argv) == 2 and sys.argv[1] == "4": diff --git a/scripts/logic/lantern_logic.py b/scripts/logic/lantern_logic.py index f3174a7..68387d0 100644 --- a/scripts/logic/lantern_logic.py +++ b/scripts/logic/lantern_logic.py @@ -69,6 +69,8 @@ def clean_csv(df, blacklist): try: if not df.empty: + df = utils.sort_instances(df, 'Instance', conf.LOCAL_INSTANCE) + df = utils.remove_duplications(df) df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)] diff --git a/scripts/logic/options.py b/scripts/logic/options.py index fe8cc47..4b4c05b 100644 --- a/scripts/logic/options.py +++ b/scripts/logic/options.py @@ -15,10 +15,8 @@ def run_option_4(): utils.print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files") utils.print_colors('[+] Syncing official webrings to local webrings') - - current_instance = utils.get_current_instance() - webring_df = utils.get_local_webring_participants(current_instance) + webring_df = utils.get_local_webring_participants(conf.LOCAL_INSTANCE) utils.print_colors('[+] Reading local blacklist and sensitive words') local_blacklist_df = utils.get_local_blacklist() @@ -27,13 +25,9 @@ def run_option_4(): utils.print_colors('[+] Reading local verified and unverified') local_verified_df, local_unverified_df = utils.get_local_verified_and_unverified() - #Remove all rows - local_unverified_df = utils.renew_csv(local_unverified_df, current_instance) - local_verified_df = utils.renew_csv(local_verified_df, current_instance) - for participant in webring_df.itertuples(index=False, name='columns'): # Check if the participant is my instance - if current_instance in participant: + if conf.LOCAL_INSTANCE in participant: continue if participant.Blacklisted == 'YES': diff --git a/scripts/utils.py b/scripts/utils.py index 497ed19..15edffc 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -36,7 +36,8 @@ def get_current_instance(): return "" #Set the local dir on script run -conf.LOCAL_DIR = conf.PARTICIPANT_DIR + get_current_instance() + '/' +conf.LOCAL_INSTANCE = get_current_instance() +conf.LOCAL_DIR = conf.PARTICIPANT_DIR + conf.LOCAL_INSTANCE + '/' ###################### Validations ###################### @@ -356,6 +357,37 @@ def merge_verification_df(receiving_df, merging_df): except Exception as err: return receiving_df +def sort_instances(df, sort_by, preferred=None): + """ + Sorts dataframe + + Parameters + ---------- + df : pd.DataFrame + The dataframe to sort + sort_by : str + The column to sort by + preferred(optional) : str + the preferred value is if i want the column to be sorted with a preferred value at the start of the dataframe + + Returns + ------- + pd.DataFrame + The sorted dataframe + """ + + try: + df = df.sort_values(by=sort_by) + + if preferred: + df['priority'] = (df[sort_by] == preferred).astype(int) + df = df.sort_values(by=['priority', sort_by], ascending=[False, True]).drop(columns='priority') + + except Exception as err: + print_colors('[-] Sorting failed',is_error=True) + + return df + def remove_duplications(df): """ Remove url and name duplications from the dataframe @@ -370,9 +402,10 @@ def remove_duplications(df): pd.DataFrame The dataframe after all duplications were removed """ + try: - df = df.drop_duplicates(subset='Name') - df = df.drop_duplicates(subset='URL') + df = df.drop_duplicates(subset='Name', keep='first') + df = df.drop_duplicates(subset='URL', keep='first') except Exception as err: print_colors('[-] Removing duplication failed',is_error=True) @@ -704,10 +737,8 @@ def get_official_participants(): """ try: - current_instance = get_current_instance() - with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file: - return [line.strip() for line in file if current_instance not in line] + return [line.strip() for line in file if conf.LOCAL_INSTANCE not in line] except Exception as err: print_colors('[-] Couldn\'t read official webring participants file',is_error=True )