mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 18:56:40 +00:00
fiex 4 and 10, sorting csv's now and making sure the origin of duplication is always local instance if compared
This commit is contained in:
parent
50f6a637cd
commit
d78ed2bc86
5 changed files with 43 additions and 15 deletions
|
@ -36,7 +36,8 @@ def get_current_instance():
|
|||
return ""
|
||||
|
||||
#Set the local dir on script run
|
||||
conf.LOCAL_DIR = conf.PARTICIPANT_DIR + get_current_instance() + '/'
|
||||
conf.LOCAL_INSTANCE = get_current_instance()
|
||||
conf.LOCAL_DIR = conf.PARTICIPANT_DIR + conf.LOCAL_INSTANCE + '/'
|
||||
|
||||
###################### Validations ######################
|
||||
|
||||
|
@ -356,6 +357,37 @@ def merge_verification_df(receiving_df, merging_df):
|
|||
except Exception as err:
|
||||
return receiving_df
|
||||
|
||||
def sort_instances(df, sort_by, preferred=None):
|
||||
"""
|
||||
Sorts dataframe
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : pd.DataFrame
|
||||
The dataframe to sort
|
||||
sort_by : str
|
||||
The column to sort by
|
||||
preferred(optional) : str
|
||||
the preferred value is if i want the column to be sorted with a preferred value at the start of the dataframe
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame
|
||||
The sorted dataframe
|
||||
"""
|
||||
|
||||
try:
|
||||
df = df.sort_values(by=sort_by)
|
||||
|
||||
if preferred:
|
||||
df['priority'] = (df[sort_by] == preferred).astype(int)
|
||||
df = df.sort_values(by=['priority', sort_by], ascending=[False, True]).drop(columns='priority')
|
||||
|
||||
except Exception as err:
|
||||
print_colors('[-] Sorting failed',is_error=True)
|
||||
|
||||
return df
|
||||
|
||||
def remove_duplications(df):
|
||||
"""
|
||||
Remove url and name duplications from the dataframe
|
||||
|
@ -370,9 +402,10 @@ def remove_duplications(df):
|
|||
pd.DataFrame
|
||||
The dataframe after all duplications were removed
|
||||
"""
|
||||
|
||||
try:
|
||||
df = df.drop_duplicates(subset='Name')
|
||||
df = df.drop_duplicates(subset='URL')
|
||||
df = df.drop_duplicates(subset='Name', keep='first')
|
||||
df = df.drop_duplicates(subset='URL', keep='first')
|
||||
|
||||
except Exception as err:
|
||||
print_colors('[-] Removing duplication failed',is_error=True)
|
||||
|
@ -704,10 +737,8 @@ def get_official_participants():
|
|||
"""
|
||||
|
||||
try:
|
||||
current_instance = get_current_instance()
|
||||
|
||||
with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file:
|
||||
return [line.strip() for line in file if current_instance not in line]
|
||||
return [line.strip() for line in file if conf.LOCAL_INSTANCE not in line]
|
||||
|
||||
except Exception as err:
|
||||
print_colors('[-] Couldn\'t read official webring participants file',is_error=True )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue