mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 22:26:41 +00:00
Finished refactoring options 9 and 10
This commit is contained in:
parent
19e582203b
commit
2a827c0b8b
2 changed files with 84 additions and 97 deletions
|
@ -561,9 +561,12 @@ Maintenance:
|
||||||
|
|
||||||
participant_url = generate_local_participant_dir(participant.URL)
|
participant_url = generate_local_participant_dir(participant.URL)
|
||||||
|
|
||||||
print_colors('[+] Reading webrring participant\'s verified and unverified, and removing unverified and blacklisted rows')
|
print_colors('[+] Reading webrring participant\'s verified and unverified')
|
||||||
participant_verified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}verified.csv'), local_blacklist)
|
participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_url)
|
||||||
participant_unverified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}unverified.csv'), local_blacklist)
|
|
||||||
|
print_colors('[+] Removing unvalidated and blacklisted rows')
|
||||||
|
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist)
|
||||||
|
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist)
|
||||||
|
|
||||||
print_colors('[+] Marking sensitive rows')
|
print_colors('[+] Marking sensitive rows')
|
||||||
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
|
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
|
||||||
|
@ -585,7 +588,6 @@ Maintenance:
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
|
print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
|
||||||
raise err
|
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -1006,10 +1008,13 @@ Maintenance:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
|
print_colors('[+] Reading local verified and unverified')
|
||||||
verified_df, unverified_df = get_local_verified_and_unverified()
|
verified_df, unverified_df = get_local_verified_and_unverified()
|
||||||
|
|
||||||
|
print_colors('[+] Removing cross dataframe replications')
|
||||||
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
|
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
|
||||||
|
|
||||||
|
print_colors('[+] Saving local verified and unverified')
|
||||||
save_local_verified_and_unverified(verified_df, unverified_df)
|
save_local_verified_and_unverified(verified_df, unverified_df)
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
@ -1020,80 +1025,30 @@ Maintenance:
|
||||||
case 10:
|
case 10:
|
||||||
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
|
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
|
||||||
|
|
||||||
|
try:
|
||||||
|
print_colors('[+] Reading local blacklist and sensitive words')
|
||||||
|
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
|
||||||
|
|
||||||
|
for participant in os.listdir(conf.PARTICIPANT_DIR):
|
||||||
|
participant_local_dir = conf.PARTICIPANT_DIR + participant + '/'
|
||||||
|
|
||||||
print_colors('[+] Reading local blacklist and sensitive words')
|
print_colors('[+] Reading webrring participant\'s verified and unverified')
|
||||||
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
|
participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_local_dir)
|
||||||
|
|
||||||
|
print_colors('[+] Removing unverified and blacklisted rows')
|
||||||
|
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist)
|
||||||
|
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist)
|
||||||
|
|
||||||
|
print_colors('[+] Marking sensitive rows')
|
||||||
|
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
|
||||||
|
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
|
||||||
|
|
||||||
participantspath = rootpath+'www/participants/'
|
print_colors('[+] Saving local participant verified and unverified')
|
||||||
for participant in os.listdir(participantspath):
|
save_local_participant_verified_and_unverified(participant_verified_df, participant_unverified_df, participant_local_dir)
|
||||||
print_colors(f"Participant: {participant}")
|
|
||||||
#read=input("Continue?")
|
|
||||||
participantdir= participantspath+participant
|
|
||||||
################ BEGIN SANITY CHECKS FOR EACH PARTICIPANTS ##############
|
|
||||||
# iterate through the participant's verified.csv and unverified.csv files
|
|
||||||
for w in ['verified.csv','unverified.csv']:
|
|
||||||
csvfilepath=participantdir+'/'+w
|
|
||||||
print_colors(f"{csvfilepath}")
|
|
||||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
|
||||||
rows2delete= [] # it is an empty list at first
|
|
||||||
for i,j in csvdf.iterrows():
|
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
|
||||||
#print_colors(f"{row}")
|
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors("[-] Option 10 failed suddently, please try again", is_error=True)
|
||||||
|
|
||||||
|
|
||||||
################################ SANITY CHECKS ####################################
|
|
||||||
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
|
|
||||||
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
|
|
||||||
csvdf.at[i, 'Status'] = "YES"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
else:
|
|
||||||
csvdf.at[i, 'Status'] = "NO"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
|
|
||||||
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
|
|
||||||
csvdf.at[i, 'Sensitive'] = "YES"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
else:
|
|
||||||
csvdf.at[i, 'Sensitive'] = "NO"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
|
|
||||||
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
|
||||||
if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
|
||||||
if i not in rows2delete:
|
|
||||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
|
||||||
#print_colors(f"{row}")
|
|
||||||
print(IsURLValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsURLValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']), IsScoreValid(csvdf.at[i, 'Score']))
|
|
||||||
rows2delete.append(i)
|
|
||||||
read=input("Continue?")
|
|
||||||
|
|
||||||
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
|
||||||
for k,l in bldf.iterrows():
|
|
||||||
blword=bldf.at[k, 'blacklisted-words']
|
|
||||||
if any(blword in str(x) for x in row) == True:
|
|
||||||
if i not in rows2delete:
|
|
||||||
print_colors(f"Marking row {i} for deletion, as it matches with the blacklisted word {blword}")
|
|
||||||
rows2delete.append(i)
|
|
||||||
#read=input("Continue?")
|
|
||||||
### SANITY CHECK 3: Mark all rows that match sensitive words to be sensitive = YES
|
|
||||||
for k,l in sedf.iterrows():
|
|
||||||
seword=sedf.at[k, 'sensitive-words']
|
|
||||||
if any(seword in str(x) for x in row) == True:
|
|
||||||
print_colors(f"Marking row {i} as sensitive, as it matches with the sensitive word {seword}")
|
|
||||||
csvdf.at[i, 'Sensitive']="YES"
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
#read=input("Continue?")
|
|
||||||
|
|
||||||
|
|
||||||
for i in rows2delete:
|
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
|
||||||
print_colors(f'[+] REMOVING ROW : {i} {row}')
|
|
||||||
csvdf.drop(i, inplace= True)
|
|
||||||
csvdf.to_csv(csvfilepath, index=False)
|
|
||||||
#read=input("Continue?")
|
|
||||||
break
|
break
|
||||||
|
|
||||||
case 11:
|
case 11:
|
||||||
|
|
|
@ -60,7 +60,7 @@ def IsXFTPServerValid(url: str) -> bool:
|
||||||
Returns True if URL is a valid SimpleX XFTP Server URL
|
Returns True if URL is a valid SimpleX XFTP Server URL
|
||||||
False otherwise
|
False otherwise
|
||||||
"""
|
"""
|
||||||
return conf.RecognizeSimplexType(url) == 'xftp'
|
return RecognizeSimplexType(url) == 'xftp'
|
||||||
|
|
||||||
# stub function
|
# stub function
|
||||||
def IsSMPServerValid(url: str) -> bool:
|
def IsSMPServerValid(url: str) -> bool:
|
||||||
|
@ -68,7 +68,7 @@ def IsSMPServerValid(url: str) -> bool:
|
||||||
Returns True if URL is a valid SimpleX SMP Server URL
|
Returns True if URL is a valid SimpleX SMP Server URL
|
||||||
False otherwise
|
False otherwise
|
||||||
"""
|
"""
|
||||||
return conf.RecognizeSimplexType(url) == 'smp'
|
return RecognizeSimplexType(url) == 'smp'
|
||||||
|
|
||||||
def IsClearnetLinkValid(url: str) -> bool:
|
def IsClearnetLinkValid(url: str) -> bool:
|
||||||
"""
|
"""
|
||||||
|
@ -242,7 +242,7 @@ def IsNameValid(name: str) -> bool:
|
||||||
Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long.
|
Check the parameter name only contains [a-zA-Z0-9] and is 64 chars long.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return bool(VALID_NAME_PATTERN.fullmatch(name.strip()))
|
return bool(conf.VALID_NAME_PATTERN.fullmatch(name.strip()))
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -278,10 +278,10 @@ def is_row_valid(row):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return (
|
return (
|
||||||
IsUrlValid(row['Instance']) and
|
IsURLValid(row['Instance']) and
|
||||||
IsCategoryValid(row['Category']) and
|
IsCategoryValid(row['Category']) and
|
||||||
IsNameValid(row['Name']) and
|
IsNameValid(row['Name']) and
|
||||||
IsUrlValid(row['URL']) and
|
IsURLValid(row['URL']) and
|
||||||
IsStatusValid(row['Sensitive']) and
|
IsStatusValid(row['Sensitive']) and
|
||||||
IsDescriptionValid(row['Description']) and
|
IsDescriptionValid(row['Description']) and
|
||||||
IsStatusValid(row['Status']) and
|
IsStatusValid(row['Status']) and
|
||||||
|
@ -339,6 +339,17 @@ def remove_duplications(df):
|
||||||
return df
|
return df
|
||||||
|
|
||||||
def remove_cross_dataframe_replications(main_df, sub_df):
|
def remove_cross_dataframe_replications(main_df, sub_df):
|
||||||
|
"""
|
||||||
|
remove replications from sub_df that exist in main_df
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
main_df (Dataframe): the dataframe to keep replications
|
||||||
|
sub_df (Dataframe): the dataframe to remove replications
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dataframe: the main_df with removed duplications
|
||||||
|
Dataframe: the sub_df with removed duplications and removed replications
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
|
|
||||||
main_df = remove_duplications(main_df)
|
main_df = remove_duplications(main_df)
|
||||||
|
@ -350,7 +361,6 @@ def remove_cross_dataframe_replications(main_df, sub_df):
|
||||||
|
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print_colors('[-] Removing cross dataframe duplications failed',is_error=True)
|
print_colors('[-] Removing cross dataframe duplications failed',is_error=True)
|
||||||
raise err #REMOVE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
||||||
|
|
||||||
|
|
||||||
return main_df, sub_df
|
return main_df, sub_df
|
||||||
|
@ -365,7 +375,7 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
|
||||||
unverified_df (Dataframe): local unverified rows dataframe
|
unverified_df (Dataframe): local unverified rows dataframe
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dataframe: the combined dataframe will be returned
|
bool: True if successful, False if not
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
current_instance = get_current_instance() + '/'
|
current_instance = get_current_instance() + '/'
|
||||||
|
@ -382,6 +392,32 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
|
||||||
print_colors('[-] Saving verified and unverified failed',is_error=True)
|
print_colors('[-] Saving verified and unverified failed',is_error=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def save_local_participant_verified_and_unverified(verified_df, unverified_df, participant):
|
||||||
|
"""
|
||||||
|
saves the local verified and unverified of a participant
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
verified_df (Dataframe): local verified rows dataframe
|
||||||
|
unverified_df (Dataframe): local unverified rows dataframe
|
||||||
|
participant (str): participant's onion local path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if successful, False if not
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
|
||||||
|
verified_df.to_csv(f'{participant}verified.csv', index=False)
|
||||||
|
|
||||||
|
unverified_df.to_csv(f'{participant}unverified.csv', index=False)
|
||||||
|
|
||||||
|
print_colors('[+] Verified and unverified saved successfully')
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
print_colors('[-] Saving verified and unverified failed',is_error=True)
|
||||||
|
return False
|
||||||
|
|
||||||
###################### Getters/Generators ######################
|
###################### Getters/Generators ######################
|
||||||
def generate_participant_url(participant):
|
def generate_participant_url(participant):
|
||||||
"""
|
"""
|
||||||
|
@ -414,7 +450,7 @@ def get_participant_local_verified_and_unverified(participant):
|
||||||
reads the local verified csv and the local unverified csv of a participant
|
reads the local verified csv and the local unverified csv of a participant
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
participant (str): participant's onion address/instance
|
participant (str): participant's local files path
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
verified_df(Dataframe): verified.csv as dataframe
|
verified_df(Dataframe): verified.csv as dataframe
|
||||||
|
@ -422,25 +458,21 @@ def get_participant_local_verified_and_unverified(participant):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
current_instance = get_current_instance() + '/'
|
verified_df = pd.read_csv(f'{participant}verified.csv')
|
||||||
try:
|
|
||||||
verified_df = pd.read_csv(f'{participant}verified.csv')
|
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print_colors("[-] File not found: verified.csv", is_error=True)
|
print_colors("[-] File not found: verified.csv", is_error=True)
|
||||||
|
return pd.Dataframe(), pd.Dataframe()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
unverified_df = pd.read_csv(f'{participant}unverified.csv')
|
unverified_df = pd.read_csv(f'{participant}unverified.csv')
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print_colors("[-] Participant File not found: unverified.csv", is_error=True)
|
print_colors("[-] Participant File not found: unverified.csv", is_error=True)
|
||||||
|
return pd.Dataframe(), pd.Dataframe()
|
||||||
|
|
||||||
return verified_df, unverified_df
|
return verified_df, unverified_df
|
||||||
|
|
||||||
except Exception as err:
|
|
||||||
print_colors('[-] Failed reading the verified and unverified files',is_error=True)
|
|
||||||
|
|
||||||
return pd.DataFrame(), pd.DataFrame()
|
|
||||||
|
|
||||||
def get_official_participants():
|
def get_official_participants():
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue