mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 19:06:41 +00:00
finished option 9 ready for test
This commit is contained in:
parent
b234d9d1d0
commit
c4ebef10a4
4 changed files with 61 additions and 62 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -6,4 +6,5 @@ __pycache__/
|
|||
env/
|
||||
submissions/submission.csv
|
||||
venv/
|
||||
local_testing/*
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import re
|
||||
|
||||
ROOT_PATH = '/srv/darknet-lantern/'
|
||||
STATIC_PATH = ROOT_PATH + 'www/'
|
||||
TEMPLATE_PATH = ROOT_PATH + 'templates/'
|
||||
|
|
|
@ -533,6 +533,8 @@ Maintenance:
|
|||
case 4:
|
||||
print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
|
||||
|
||||
try:
|
||||
|
||||
print_colors('[+] Syncing official webrings to local webrings')
|
||||
|
||||
webring_df = verify_official_participants_registered()
|
||||
|
@ -583,6 +585,9 @@ Maintenance:
|
|||
|
||||
break
|
||||
|
||||
except Exception:
|
||||
print_colors("[-] Option 4 failed suddently, please try again", is_error=True)
|
||||
|
||||
case 5:
|
||||
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
||||
webring_participant_url = ''
|
||||
|
@ -998,25 +1003,16 @@ Maintenance:
|
|||
case 9:
|
||||
print_colors("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
|
||||
|
||||
try:
|
||||
|
||||
verified_df, unverified_df = utils.get_local_verified_and_unverified()
|
||||
|
||||
verified_df, unverified_df = remove_cross_dataframe_replications(verified_df, unverified_df)
|
||||
|
||||
save_local_verified_and_unverified(verified_df, unverified_df)
|
||||
|
||||
for w in ['verified.csv', 'unverified.csv']:
|
||||
csvfilepath = os.path.join(instancepath, w)
|
||||
print_colors(f"Processing file: {csvfilepath}")
|
||||
try:
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
print_colors(f"Removing duplicates in {csvfilepath}")
|
||||
#print_colors(f"{csvdf[['URL']]}")
|
||||
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||
#print_colors(f"{csvdf[['URL']]}")
|
||||
csvdf.to_csv(csvfilepath, index=False)
|
||||
print_colors(f"Cleaned data:\n{csvdf[['URL']]}")
|
||||
except Exception as e:
|
||||
print_colors(f"An error occurred while processing {csvfilepath}: {e}")
|
||||
break
|
||||
break
|
||||
except Exception:
|
||||
print_colors("[-] Option 9 failed suddently, please try again", is_error=True)
|
||||
|
||||
case 10:
|
||||
print_colors("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)")
|
||||
|
|
|
@ -334,7 +334,7 @@ def remove_duplications(df):
|
|||
df = df.drop_duplicates(subset='URL')
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
print_colors('[-] Removing duplication failed',is_error=True)
|
||||
|
||||
return df
|
||||
|
||||
|
@ -348,10 +348,8 @@ def remove_cross_dataframe_replications(main_df, sub_df):
|
|||
|
||||
sub_df = sub_df[~mask]
|
||||
|
||||
return sub_df
|
||||
|
||||
except:
|
||||
pass
|
||||
print_colors('[-] Removing cross dataframe duplications failed',is_error=True)
|
||||
|
||||
return main_df, sub_df
|
||||
|
||||
|
@ -374,6 +372,8 @@ def save_local_verified_and_unverified(verified_df, unverified_df):
|
|||
|
||||
unverified_df.to_csv(f'{conf.PARTICIPANT_DIR}{current_instance}unverified.csv', index=False)
|
||||
|
||||
print_colors('[+] Verified and unverified saved successfully')
|
||||
|
||||
return True
|
||||
|
||||
except Exception:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue