Fixed conflicts

This commit is contained in:
doctor_dev 2025-06-06 20:27:52 +00:00
commit 35f2c500b8
No known key found for this signature in database
GPG key ID: F12F7F71CB84AEAA
5 changed files with 215 additions and 146 deletions

2
.gitignore vendored
View file

@ -6,4 +6,6 @@ __pycache__/
env/
submissions/submission.csv
venv/
local_testing/*
!your_folder/.gitkeep

View file

@ -533,53 +533,61 @@ Maintenance:
case 4:
print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
print_colors('[+] Syncing official webrings to local webrings')
try:
webring_df = get_local_webring_participants()
print_colors('[+] Syncing official webrings to local webrings')
current_instance = get_current_instance()
webring_df = get_local_webring_participants()
for participant in webring_df.itertuples(index=False, name='columns'):
# Check if the participant is my instance
if current_instance in participant:
continue
current_instance = get_current_instance()
if not is_participant_reachable(participant.URL):
print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
continue
for participant in webring_df.itertuples(index=False, name='columns'):
# Check if the participant is my instance
if current_instance in participant:
continue
print_colors('[+] Downloading participant\'s files to store locally')
lantern.download_participant_data(participant.URL)
if not is_participant_reachable(participant.URL):
print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
continue
print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
print_colors('[+] Downloading participant\'s files to store locally')
lantern.download_participant_data(participant.URL)
print_colors('[+] Reading local verified and unverified')
local_verified_df, local_unverified_df = get_local_verified_and_unverified()
print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
participant_url = generate_local_participant_dir(participant.URL)
print_colors('[+] Reading local verified and unverified')
local_verified_df, local_unverified_df = get_local_verified_and_unverified()
print_colors('[+] Reading webrring participant\'s verified and unverified, and removing unverified and blacklisted rows')
participant_verified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}verified.csv'), local_blacklist)
participant_unverified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}unverified.csv'), local_blacklist)
participant_url = generate_local_participant_dir(participant.URL)
print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
print_colors('[+] Reading webrring participant\'s verified and unverified')
participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_url)
if participant.Trusted == 'YES':
print_colors('[+] This participant is trusted, copying participant\'s verified to local verified')
local_verified_df = merge_verification_df(local_verified_df, participant_verified_df)
print_colors('[+] Removing unvalidated and blacklisted rows')
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist)
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist)
else:
print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df)
print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
print_colors('[+] Copying participant\'s unverified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df)
if participant.Trusted == 'YES':
print_colors('[+] This participant is trusted, copying participant\'s verified to local verified')
local_verified_df = merge_verification_df(local_verified_df, participant_verified_df)
print_colors('[+] Saving local verified and unverified')
save_local_verified_and_unverified(local_verified_df, local_unverified_df)
else:
print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df)
print_colors('[+] Copying participant\'s unverified to local unverified')
local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df)
print_colors('[+] Saving local verified and unverified')
save_local_verified_and_unverified(local_verified_df, local_unverified_df)
except Exception as err:
print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
break
@ -997,93 +1005,50 @@ Maintenance:
case 9:
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
for w in ['verified.csv', 'unverified.csv']:
csvfilepath = os.path.join(instancepath, w)
print_colors(f"Processing file: {csvfilepath}")
try:
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
print_colors(f"Removing duplicates in {csvfilepath}")
#print_colors(f"{csvdf[['URL']]}")
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
#print_colors(f"{csvdf[['URL']]}")
csvdf.to_csv(csvfilepath, index=False)
print_colors(f"Cleaned data:\n{csvdf[['URL']]}")
except FileNotFoundError:
print_colors(f"File not found: {csvfilepath}")
except Exception as e:
print_colors(f"An error occurred while processing {csvfilepath}: {e}")
break
try:
print_colors('[+] Reading local verified and unverified')
verified_df, unverified_df = get_local_verified_and_unverified()
print_colors('[+] Removing cross dataframe replications')
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
print_colors('[+] Saving local verified and unverified')
save_local_verified_and_unverified(verified_df, unverified_df)
except Exception as err:
print_colors("[-] Option 9 failed suddently, please try again", is_error=True)
break
case 10:
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
participantspath = rootpath+'www/participants/'
for participant in os.listdir(participantspath):
print_colors(f"Participant: {participant}")
#read=input("Continue?")
participantdir= participantspath+participant
################ BEGIN SANITY CHECKS FOR EACH PARTICIPANTS ##############
# iterate through the participant's verified.csv and unverified.csv files
for w in ['verified.csv','unverified.csv']:
csvfilepath=participantdir+'/'+w
print_colors(f"{csvfilepath}")
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
row=csvdf.loc[i,:].values.tolist()
#print_colors(f"{row}")
try:
print_colors('[+] Reading local blacklist and sensitive words')
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
for participant in os.listdir(conf.PARTICIPANT_DIR):
participant_local_dir = conf.PARTICIPANT_DIR + participant + '/'
################################ SANITY CHECKS ####################################
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
csvdf.at[i, 'Status'] = "YES"
csvdf.to_csv(csvfilepath, index=False)
else:
csvdf.at[i, 'Status'] = "NO"
csvdf.to_csv(csvfilepath, index=False)
print_colors('[+] Reading webrring participant\'s verified and unverified')
participant_verified_df, participant_unverified_df = get_participant_local_verified_and_unverified(participant_local_dir)
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
csvdf.at[i, 'Sensitive'] = "YES"
csvdf.to_csv(csvfilepath, index=False)
else:
csvdf.at[i, 'Sensitive'] = "NO"
csvdf.to_csv(csvfilepath, index=False)
print_colors('[+] Removing unverified and blacklisted rows')
participant_verified_df = lantern.clean_csv(participant_verified_df, local_blacklist)
participant_unverified_df = lantern.clean_csv(participant_unverified_df, local_blacklist)
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
if IsURLValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsURLValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
if i not in rows2delete:
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
#print_colors(f"{row}")
print(IsURLValid(csvdf.at[i, 'Instance']), IsCategoryValid(csvdf.at[i, 'Category']), IsNameValid(csvdf.at[i, 'Name']), IsURLValid(csvdf.at[i, 'URL']), IsStatusValid(csvdf.at[i, 'Sensitive']), IsDescriptionValid(csvdf.at[i, 'Description']), IsStatusValid(csvdf.at[i, 'Status']), IsScoreValid(csvdf.at[i, 'Score']))
rows2delete.append(i)
read=input("Continue?")
print_colors('[+] Marking sensitive rows')
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
for k,l in bldf.iterrows():
blword=bldf.at[k, 'blacklisted-words']
if any(blword in str(x) for x in row) == True:
if i not in rows2delete:
print_colors(f"Marking row {i} for deletion, as it matches with the blacklisted word {blword}")
rows2delete.append(i)
#read=input("Continue?")
### SANITY CHECK 3: Mark all rows that match sensitive words to be sensitive = YES
for k,l in sedf.iterrows():
seword=sedf.at[k, 'sensitive-words']
if any(seword in str(x) for x in row) == True:
print_colors(f"Marking row {i} as sensitive, as it matches with the sensitive word {seword}")
csvdf.at[i, 'Sensitive']="YES"
csvdf.to_csv(csvfilepath, index=False)
#read=input("Continue?")
print_colors('[+] Saving local participant verified and unverified')
save_local_participant_verified_and_unverified(participant_verified_df, participant_unverified_df, participant_local_dir)
except Exception as err:
print_colors("[-] Option 10 failed suddently, please try again", is_error=True)
for i in rows2delete:
row=csvdf.loc[i,:].values.tolist()
print_colors(f'[+] REMOVING ROW : {i} {row}')
csvdf.drop(i, inplace= True)
csvdf.to_csv(csvfilepath, index=False)
#read=input("Continue?")
break
case 11:

View file

View file

@ -43,7 +43,7 @@ def download_participant_data(participant):
utils.print_colors(f"[+] Downloaded webring {participant} csv files and banner")
except Exception:
except Exception as err:
print_colors("[-] Downloading webring participant's files failed.", is_error=True)
def clean_csv(df, blacklist):
@ -66,7 +66,7 @@ def clean_csv(df, blacklist):
if not df.empty:
df = df[df.apply(utils.is_row_valid, axis=1)]
except Exception:
except Exception as err:
print_colors("[-] cleaning dataframe failed", is_error=True)
return df
@ -90,7 +90,7 @@ def mark_sensitive(df, sensitive_list):
df.loc[sensitive_rows, 'Sensitive'] = 'YES'
df.loc[~sensitive_rows, 'Sensitive'] = 'NO'
except Exception:
except Exception as err:
print_colors("[-] MArking sensitive words failed.", is_error=True)
return df

View file

@ -152,7 +152,7 @@ def is_participant_reachable(instance):
status = requests.get(f'{url}{file_name}',proxies=conf.PROXIES, timeout=10).status_code
if status != 200:
return False
except Exception:
except Exception as err:
return False
return True
@ -278,17 +278,17 @@ def is_row_valid(row):
"""
try:
return (
IsUrlValid(row['Instance']) and
IsURLValid(row['Instance']) and
IsCategoryValid(row['Category']) and
IsNameValid(row['Name']) and
IsUrlValid(row['URL']) and
IsURLValid(row['URL']) and
IsStatusValid(row['Sensitive']) and
IsDescriptionValid(row['Description']) and
IsStatusValid(row['Status']) and
IsScoreValid(row['Score'])
)
except Exception:
except Exception as err:
return False
###################### General ######################
@ -316,7 +316,7 @@ def merge_verification_df(receiving_df, merging_df):
else:
return pd.concat([receiving_df, filtered_df], ignore_index=True)
except Exception:
except Exception as err:
return receiving_df
def remove_duplications(df):
@ -333,11 +333,38 @@ def remove_duplications(df):
df = df.drop_duplicates(subset='Name')
df = df.drop_duplicates(subset='URL')
except Exception:
pass
except Exception as err:
print_colors('[-] Removing duplication failed',is_error=True)
return df
def remove_cross_dataframe_replications(main_df, sub_df):
"""
remove replications from sub_df that exist in main_df
Parameters:
main_df (Dataframe): the dataframe to keep replications
sub_df (Dataframe): the dataframe to remove replications
Returns:
Dataframe: the main_df with removed duplications
Dataframe: the sub_df with removed duplications and removed replications
"""
try:
main_df = remove_duplications(main_df)
sub_df = remove_duplications(sub_df)
mask = sub_df['URL'].isin(main_df['URL']) | sub_df['Name'].isin(main_df['Name'])
sub_df = sub_df[~mask]
except Exception as err:
print_colors('[-] Removing cross dataframe duplications failed',is_error=True)
return main_df, sub_df
###TODO: can later remove the inputs and have a "global" local verified and unverified or a class of the local(lantern host) participant
def save_local_verified_and_unverified(verified_df, unverified_df):
"""
@ -348,7 +375,7 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
unverified_df (Dataframe): local unverified rows dataframe
Returns:
Dataframe: the combined dataframe will be returned
bool: True if successful, False if not
"""
try:
current_instance = get_current_instance() + '/'
@ -357,10 +384,38 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False)
print_colors('[+] Verified and unverified saved successfully')
return True
except Exception:
print_colors('[-] Saving verified and unverified failed',is_error=True )
except Exception as err:
print_colors('[-] Saving verified and unverified failed',is_error=True)
return False
def save_local_participant_verified_and_unverified(verified_df, unverified_df, participant):
"""
saves the local verified and unverified of a participant
Parameters:
verified_df (Dataframe): local verified rows dataframe
unverified_df (Dataframe): local unverified rows dataframe
participant (str): participant's onion local path
Returns:
bool: True if successful, False if not
"""
try:
verified_df.to_csv(f'{participant}verified.csv', index=False)
unverified_df.to_csv(f'{participant}unverified.csv', index=False)
print_colors('[+] Verified and unverified saved successfully')
return True
except Exception as err:
print_colors('[-] Saving verified and unverified failed',is_error=True)
return False
###################### Getters/Generators ######################
@ -390,6 +445,35 @@ def generate_local_participant_dir(participant):
return f'{conf.PARTICIPANT_DIR}{participant}/'
def get_participant_local_verified_and_unverified(participant):
"""
reads the local verified csv and the local unverified csv of a participant
Parameters:
participant (str): participant's local files path
Returns:
verified_df(Dataframe): verified.csv as dataframe
unverified_df(Dataframe): unverified.csv as dataframe
"""
try:
verified_df = pd.read_csv(f'{participant}verified.csv')
except FileNotFoundError:
print_colors("[-] File not found: verified.csv", is_error=True)
return pd.Dataframe(), pd.Dataframe()
try:
unverified_df = pd.read_csv(f'{participant}unverified.csv')
except FileNotFoundError:
print_colors("[-] Participant File not found: unverified.csv", is_error=True)
return pd.Dataframe(), pd.Dataframe()
return verified_df, unverified_df
def get_official_participants():
"""
reads all the official webring participants
@ -404,7 +488,7 @@ def get_official_participants():
with open(conf.OFFICIAL_PARTICIPANTS_FILE, 'r') as file:
return [line.strip() for line in file if current_instance not in line]
except Exception:
except Exception as err:
print_colors('[-] Couldn\'t read official webring participants file',is_error=True )
def get_local_blacklist_and_sensitive():
@ -417,22 +501,32 @@ def get_local_blacklist_and_sensitive():
"""
try:
current_instance = get_current_instance() + '/'
try:
blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv')
blacklist = blacklist_df.iloc[:, 0].tolist()
blacklist_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}blacklist.csv')
blacklist = blacklist_df.iloc[:, 0].tolist()
except FileNotFoundError:
print_colors("[-] File not found: blacklist.csv", is_error=True)
try:
sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv')
sensitive_list = sensitive_df.iloc[:, 0].tolist()
except FileNotFoundError:
print_colors("[-] File not found: sensitive.csv", is_error=True)
sensitive_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}sensitive.csv')
sensitive_list = sensitive_df.iloc[:, 0].tolist()
return blacklist, sensitive_list
except Exception:
print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True )
return [], []
except Exception as err:
print_colors('[-] Failed reading the blacklist and sensitive words file',is_error=True)
return [], []
def get_local_verified_and_unverified():
"""
reads the local verified csv and the local unverified csv
reads the local verified csv and the local unverified csv of the instance
Returns:
verified_df(Dataframe): verified.csv as dataframe
@ -441,16 +535,24 @@ def get_local_verified_and_unverified():
try:
current_instance = get_current_instance() + '/'
try:
verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv')
verified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}verified.csv')
except FileNotFoundError:
print_colors("[-] File not found: verified.csv", is_error=True)
unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv')
try:
unverified_df = pd.read_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv')
except FileNotFoundError:
print_colors("[-] File not found: unverified.csv", is_error=True)
return verified_df, unverified_df
except Exception:
print_colors('[-] Failed reading the verified and unverified files',is_error=True )
return pd.DataFrame(), pd.DataFrame()
except Exception as err:
print_colors('[-] Failed reading the verified and unverified files',is_error=True)
return pd.DataFrame(), pd.DataFrame()
def get_local_webring_participants():
"""
@ -474,7 +576,7 @@ def get_local_webring_participants():
return webring_df
except Exception:
except Exception as err:
print_colors(f'[-] failed reading webring participants file',is_error=True )
return pd.DataFrame()