mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 19:06:41 +00:00
Merge pull request 'fix_option_4_and_10_removing_self_added_rows' (#88) from fix_option_4_and_10_removing_self_added_rows into main
Reviewed-on: http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern/pulls/88
This commit is contained in:
commit
11c1a14266
5 changed files with 45 additions and 15 deletions
|
@ -9,6 +9,7 @@ OFFICIAL_PARTICIPANTS_FILE = STATIC_PATH + '.official_participants'
|
||||||
WEBRING_CSV_FILE = 'webring-participants.csv'
|
WEBRING_CSV_FILE = 'webring-participants.csv'
|
||||||
|
|
||||||
LOCAL_DIR = '' # Assign on script startup
|
LOCAL_DIR = '' # Assign on script startup
|
||||||
|
LOCAL_INSTANCE = '' # Assign on script startup
|
||||||
|
|
||||||
PROXIES = {
|
PROXIES = {
|
||||||
'http': 'socks5h://127.0.0.1:9050',
|
'http': 'socks5h://127.0.0.1:9050',
|
||||||
|
|
|
@ -153,7 +153,7 @@ def main():
|
||||||
if 'Blacklisted' not in webpdf.columns:
|
if 'Blacklisted' not in webpdf.columns:
|
||||||
webpdf['Blacklisted'] = 'NO'
|
webpdf['Blacklisted'] = 'NO'
|
||||||
|
|
||||||
webpdf.to_csv(webpcsvfile, index=False)
|
save_dataframe(webpdf, webpcsvfile)
|
||||||
|
|
||||||
##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
|
##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
|
||||||
if len(sys.argv) == 2 and sys.argv[1] == "4":
|
if len(sys.argv) == 2 and sys.argv[1] == "4":
|
||||||
|
|
|
@ -69,6 +69,8 @@ def clean_csv(df, blacklist):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
|
df = utils.sort_instances(df, 'Instance', conf.LOCAL_INSTANCE)
|
||||||
|
|
||||||
df = utils.remove_duplications(df)
|
df = utils.remove_duplications(df)
|
||||||
|
|
||||||
df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)]
|
df = df[~df.apply(lambda row: any(word in str(value) for word in blacklist for value in row), axis=1)]
|
||||||
|
|
|
@ -15,10 +15,8 @@ def run_option_4():
|
||||||
utils.print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
|
utils.print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
|
||||||
|
|
||||||
utils.print_colors('[+] Syncing official webrings to local webrings')
|
utils.print_colors('[+] Syncing official webrings to local webrings')
|
||||||
|
|
||||||
current_instance = utils.get_current_instance()
|
|
||||||
|
|
||||||
webring_df = utils.get_local_webring_participants(current_instance)
|
webring_df = utils.get_local_webring_participants(conf.LOCAL_INSTANCE)
|
||||||
|
|
||||||
utils.print_colors('[+] Reading local blacklist and sensitive words')
|
utils.print_colors('[+] Reading local blacklist and sensitive words')
|
||||||
local_blacklist_df = utils.get_local_blacklist()
|
local_blacklist_df = utils.get_local_blacklist()
|
||||||
|
@ -27,13 +25,9 @@ def run_option_4():
|
||||||
utils.print_colors('[+] Reading local verified and unverified')
|
utils.print_colors('[+] Reading local verified and unverified')
|
||||||
local_verified_df, local_unverified_df = utils.get_local_verified_and_unverified()
|
local_verified_df, local_unverified_df = utils.get_local_verified_and_unverified()
|
||||||
|
|
||||||
#Remove all rows
|
|
||||||
local_unverified_df = utils.renew_csv(local_unverified_df, current_instance)
|
|
||||||
local_verified_df = utils.renew_csv(local_verified_df, current_instance)
|
|
||||||
|
|
||||||
for participant in webring_df.itertuples(index=False, name='columns'):
|
for participant in webring_df.itertuples(index=False, name='columns'):
|
||||||
# Check if the participant is my instance
|
# Check if the participant is my instance
|
||||||
if current_instance in participant:
|
if conf.LOCAL_INSTANCE in participant:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if participant.Blacklisted == 'YES':
|
if participant.Blacklisted == 'YES':
|
||||||
|
|
|
@ -36,7 +36,8 @@ def get_current_instance():
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
#Set the local dir on script run
|
#Set the local dir on script run
|
||||||
conf.LOCAL_DIR = conf.PARTICIPANT_DIR + get_current_instance() + '/'
|
conf.LOCAL_INSTANCE = get_current_instance()
|
||||||
|
conf.LOCAL_DIR = conf.PARTICIPANT_DIR + conf.LOCAL_INSTANCE + '/'
|
||||||
|
|
||||||
###################### Validations ######################
|
###################### Validations ######################
|
||||||
|
|
||||||
|
@ -356,6 +357,39 @@ def merge_verification_df(receiving_df, merging_df):
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return receiving_df
|
return receiving_df
|
||||||
|
|
||||||
|
def sort_instances(df, sort_by, preferred=None):
|
||||||
|
"""
|
||||||
|
Sorts dataframe
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
df : pd.DataFrame
|
||||||
|
The dataframe to sort
|
||||||
|
sort_by : str
|
||||||
|
The column to sort by
|
||||||
|
preferred(optional) : str
|
||||||
|
the preferred value is if i want the column to be sorted with a preferred value at the start of the dataframe
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pd.DataFrame
|
||||||
|
The sorted dataframe
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
if preferred:
|
||||||
|
df['priority'] = (df[sort_by] == preferred).astype(int)
|
||||||
|
df = df.sort_values(by=['priority', sort_by], ascending=[False, True]).drop(columns='priority')
|
||||||
|
|
||||||
|
else:
|
||||||
|
df = df.sort_values(by=sort_by)
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Sorting failed',is_error=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
def remove_duplications(df):
|
def remove_duplications(df):
|
||||||
"""
|
"""
|
||||||
Remove url and name duplications from the dataframe
|
Remove url and name duplications from the dataframe
|
||||||
|
@ -370,9 +404,10 @@ def remove_duplications(df):
|
||||||
pd.DataFrame
|
pd.DataFrame
|
||||||
The dataframe after all duplications were removed
|
The dataframe after all duplications were removed
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
df = df.drop_duplicates(subset='Name')
|
df = df.drop_duplicates(subset='Name', keep='first')
|
||||||
df = df.drop_duplicates(subset='URL')
|
df = df.drop_duplicates(subset='URL', keep='first')
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print_colors('[-] Removing duplication failed',is_error=True)
|
print_colors('[-] Removing duplication failed',is_error=True)
|
||||||
|
@ -704,10 +739,8 @@ def get_official_participants():
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
current_instance = get_current_instance()
|
|
||||||
|
|
||||||
with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file:
|
with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file:
|
||||||
return [line.strip() for line in file if current_instance not in line]
|
return [line.strip() for line in file if conf.LOCAL_INSTANCE not in line]
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print_colors('[-] Couldn\'t read official webring participants file',is_error=True )
|
print_colors('[-] Couldn\'t read official webring participants file',is_error=True )
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue