finished option 9 ready for test

This commit is contained in:
doctor_dev 2025-05-30 17:59:12 +00:00
parent b234d9d1d0
commit c4ebef10a4
No known key found for this signature in database
GPG key ID: F12F7F71CB84AEAA
4 changed files with 61 additions and 62 deletions

1
.gitignore vendored
View file

@ -6,4 +6,5 @@ __pycache__/
env/
submissions/submission.csv
venv/
local_testing/*

View file

@ -1,3 +1,5 @@
import re
ROOT_PATH = '/srv/darknet-lantern/'
STATIC_PATH = ROOT_PATH + 'www/'
TEMPLATE_PATH = ROOT_PATH + 'templates/'

View file

@ -533,6 +533,8 @@ Maintenance:
case 4:
print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
try:
print_colors('[+] Syncing official webrings to local webrings')
webring_df = verify_official_participants_registered()
@ -583,6 +585,9 @@ Maintenance:
break
except Exception:
print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
case 5:
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
webring_participant_url = ''
@ -998,25 +1003,16 @@ Maintenance:
case 9:
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
try:
verified_df, unverified_df = utils.get_local_verified_and_unverified()
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
save_local_verified_and_unverified(verified_df, unverified_df)
for w in ['verified.csv', 'unverified.csv']:
csvfilepath = os.path.join(instancepath, w)
print_colors(f"Processing file: {csvfilepath}")
try:
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
print_colors(f"Removing duplicates in {csvfilepath}")
#print_colors(f"{csvdf[['URL']]}")
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
#print_colors(f"{csvdf[['URL']]}")
csvdf.to_csv(csvfilepath, index=False)
print_colors(f"Cleaned data:\n{csvdf[['URL']]}")
except Exception as e:
print_colors(f"An error occurred while processing {csvfilepath}: {e}")
break
break
except Exception:
print_colors("[-] Option 9 failed suddently, please try again", is_error=True)
case 10:
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")

View file

@ -334,7 +334,7 @@ def remove_duplications(df):
df = df.drop_duplicates(subset='URL')
except Exception:
pass
print_colors('[-] Removing duplication failed',is_error=True)
return df
@ -348,10 +348,8 @@ def remove_cross_dataframe_replications(main_df, sub_df):
sub_df = sub_df[~mask]
return sub_df
except:
pass
print_colors('[-] Removing cross dataframe duplications failed',is_error=True)
return main_df, sub_df
@ -374,10 +372,12 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False)
print_colors('[+] Verified and unverified saved successfully')
return True
except Exception:
print_colors('[-] Saving verified and unverified failed',is_error=True )
print_colors('[-] Saving verified and unverified failed',is_error=True)
return False
###################### Getters/Generators ######################