mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-07-01 18:56:40 +00:00
Refactored option 4 + added conf.py + added some TODO comments for review
This commit is contained in:
parent
4b33e51d11
commit
b07ac08547
6 changed files with 429 additions and 281 deletions
|
@ -1,6 +1,9 @@
|
|||
###TODO: importing * is bad practice should import just utils and use it like in lantern_logic.py
|
||||
from utils import *
|
||||
import logic.lantern_logic as lantern
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
import os, pwd
|
||||
import pandas as pd
|
||||
import requests
|
||||
|
@ -532,259 +535,59 @@ Maintenance:
|
|||
#####################################################
|
||||
|
||||
|
||||
|
||||
#check if it works when you have a second webring participant
|
||||
case 4:
|
||||
print_colors("4) Synchronize new links from existing webring participants, into your unverified.csv file")
|
||||
participantsdir=rootpath+'www/participants/'
|
||||
name=''
|
||||
desc=''
|
||||
trusted=''
|
||||
status=''
|
||||
score=''
|
||||
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
||||
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
||||
for participant in os.listdir(participantsdir):
|
||||
participantdir=participantsdir+participant
|
||||
print_colors("4) Synchronize new links from new or existing webring participants, into your local csv files")
|
||||
|
||||
# NOTE check if the webring participant is yourself, if it is, then skip it
|
||||
if participant != myinstance: # prod: dont use your own intance
|
||||
#if participant == myinstance: # preprod testing only on your own instance
|
||||
#overwrite the existing files in the participant's directory, with their version (download all the csv files from them again)
|
||||
basewurl='http://'+participant+'/participants/'+participant+'/'
|
||||
print_colors(f"{basewurl}")
|
||||
print_colors(f"[+] Downloading the files of: {participant} ")
|
||||
w_vcsv=basewurl+'verified.csv'
|
||||
w_uvcsv=basewurl+'unverified.csv'
|
||||
w_blcsv=basewurl+'blacklist.csv'
|
||||
w_scsv=basewurl+'sensitive.csv'
|
||||
w_webcsv=basewurl+'webring-participants.csv'
|
||||
print_colors('[+] Syncing official webrings to local webrings')
|
||||
|
||||
# verify that their verified.csv csv file exists at basewurl+'verified.csv'
|
||||
if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False:
|
||||
print_colors("[-] Webring Participant isn't reachable, skipping", is_error=True)
|
||||
else: #if the webring participant is reachable, proceed
|
||||
print_colors("[+] Webring Participant is reachable, updating their csv files:")
|
||||
for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']:
|
||||
# FOR EACH CSV FILE TO GET:
|
||||
# URL: basewurl / FILE.CSV
|
||||
# PATH: participantdir / FILE.CSV
|
||||
# download the external csv file and save it into the "text" variable:
|
||||
#response = urllib.request.urlopen(basewurl+i)
|
||||
response = requests.get(basewurl+i, proxies=proxies)
|
||||
#data = response.read() # a `bytes` object
|
||||
#text = data.decode('utf-8')
|
||||
text = response.text
|
||||
# save the text variable into the destination file:
|
||||
csvfilepath=participantdir+'/'+i
|
||||
with open(csvfilepath, "w") as file:
|
||||
file.write(text)
|
||||
f = open(csvfilepath,"r")
|
||||
webring_df = verify_official_participants_registered()
|
||||
|
||||
# download the banner.png image:
|
||||
current_instance = get_current_instance()
|
||||
|
||||
for participant in webring_df.itertuples(index=False, name='columns'):
|
||||
# Check if the participant is my instance
|
||||
if current_instance in participant:
|
||||
continue
|
||||
|
||||
bannerurl=basewurl+'banner.png'
|
||||
bannerpath=participantdir+'/banner.png'
|
||||
r = requests.get(bannerurl, stream=True, proxies=proxies)
|
||||
with open(bannerpath, 'wb') as f:
|
||||
r.raw.decode_content = True
|
||||
shutil.copyfileobj(r.raw, f)
|
||||
if not is_participant_reachable(participant.URL):
|
||||
print_colors("[-] Webring {participant.URL} isn't reachable, skipping", is_error=True)
|
||||
continue
|
||||
|
||||
print_colors('[+] Downloading participant\'s files to store locally')
|
||||
lantern.download_participant_data(participant.URL)
|
||||
|
||||
# SANITY CHECK ON THE BANNER PNG IMAGE:
|
||||
if IsBannerValid(bannerpath):
|
||||
pass
|
||||
else:
|
||||
# if false, overwrite it with the template banner png file
|
||||
os.remove(bannerpath)
|
||||
# copy templates/banner.png to bannerpath
|
||||
bannertemplatepath=templatepath+'banner.png'
|
||||
shutil.copyfile(bannertemplatepath, bannerpath)
|
||||
print_colors('[+] Reading local blacklist and sensitive words')
|
||||
local_blacklist, local_sensitive = get_local_blacklist_and_sensitive()
|
||||
|
||||
print_colors('[+] Reading local verified and unverified')
|
||||
local_verified_df, local_unverified_df = get_local_verified_and_unverified()
|
||||
|
||||
participant_url = generate_local_participant_dir(participant.URL)
|
||||
|
||||
# check if the participant is already listed in webring-participants.csv or not, and add them if not already listed
|
||||
# and display only the matching entries in unverified.csv in an array format (display it in CLI).
|
||||
filter_wdf = wdf[wdf.URL.str.contains(participant,na=False)]
|
||||
# check if there are no results, dont proceed if there are none!
|
||||
if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed
|
||||
newrow=[name,participant,desc,trusted,status,score]
|
||||
wdf.loc[-1] = newrow # adding a row
|
||||
wdf.index = wdf.index + 1 # shifting index
|
||||
wdf = wdf.sort_index() # sorting by index
|
||||
wdf.to_csv(webringcsvfile, index=False)
|
||||
else:
|
||||
pass
|
||||
print_colors('[+] Reading webrring participant\'s verified and unverified, and removing unverified and blacklisted rows')
|
||||
participant_verified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}verified.csv'), local_blacklist)
|
||||
participant_unverified_df = lantern.clean_csv(pd.read_csv(f'{participant_url}unverified.csv'), local_blacklist)
|
||||
|
||||
# iterate through the participant's verified.csv and unverified.csv files
|
||||
for w in ['verified.csv','unverified.csv']:
|
||||
csvfilepath=participantdir+'/'+w
|
||||
print_colors(f"{csvfilepath}")
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
print_colors('[+] Marking sensitive rows')
|
||||
participant_verified_df = lantern.mark_sensitive(participant_verified_df, local_sensitive)
|
||||
participant_unverified_df = lantern.mark_sensitive(participant_unverified_df, local_sensitive)
|
||||
|
||||
if participant.Trusted == 'YES':
|
||||
print_colors('[+] This participant is trusted, copying participant\'s verified to local verified')
|
||||
local_verified_df = merge_verification_df(local_verified_df, participant_verified_df)
|
||||
|
||||
else:
|
||||
print_colors('[+] This participant is not trusted, copying participant\'s verified to local unverified')
|
||||
local_unverified_df = merge_verification_df(local_unverified_df, participant_verified_df)
|
||||
|
||||
print_colors('[+] Copying participant\'s unverified to local unverified')
|
||||
local_unverified_df = merge_verification_df(local_unverified_df, participant_unverified_df)
|
||||
|
||||
print("[+] Removing the participant's duplicate entries... ")
|
||||
# REMOVE DUPLICATES !!! do not accept any duplicate from remote participants
|
||||
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||
csvdf = csvdf.drop_duplicates(subset=['Name'], keep="first", inplace=False)
|
||||
csvdf.to_csv(csvfilepath, index=False)
|
||||
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
|
||||
bldf[['blacklisted-words']].iterrows()
|
||||
rows2delete= [] # it is an empty list at first
|
||||
for i,j in csvdf.iterrows():
|
||||
row=csvdf.loc[i,:].values.tolist()
|
||||
# check the number of columns in said row,
|
||||
# print('rowcolnum:',len(row),' colnum:',len(csvdf.columns))
|
||||
# print_colors(f"{row}")
|
||||
|
||||
|
||||
|
||||
|
||||
################################ SANITY CHECKS ####################################
|
||||
### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
|
||||
if csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
|
||||
csvdf.at[i, 'Status'] = "YES"
|
||||
csvdf.to_csv(csvfilepath, index=False)
|
||||
else:
|
||||
csvdf.at[i, 'Status'] = "NO"
|
||||
csvdf.to_csv(csvfilepath, index=False)
|
||||
|
||||
if csvdf.at[i, 'Sensitive'] == "✔️" or csvdf.at[i, 'Sensitive'] == "YES" :
|
||||
csvdf.at[i, 'Sensitive'] = "YES"
|
||||
csvdf.to_csv(csvfilepath, index=False)
|
||||
else:
|
||||
csvdf.at[i, 'Sensitive'] = "NO"
|
||||
csvdf.to_csv(csvfilepath, index=False)
|
||||
|
||||
### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
|
||||
if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False:
|
||||
#mark the row for deletion as it has invalid inputs
|
||||
if i not in rows2delete:
|
||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
||||
print(row)
|
||||
rows2delete.append(i) #mark the row for deletion if not already done
|
||||
|
||||
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
||||
for k,l in bldf.iterrows():
|
||||
blword=bldf.at[k, 'blacklisted-words']
|
||||
if any(blword in str(x) for x in row) == True:
|
||||
if i not in rows2delete:
|
||||
print_colors(f"Marking row {i} for deletion, as it matches with a blacklisted word")
|
||||
rows2delete.append(i) #mark the row for deletion if not already done
|
||||
else:
|
||||
if i not in rows2delete:
|
||||
# not a blacklisted link, therefore it is suitable to be added to your own csv files:
|
||||
################################ CHECKING FOR DUPLICATES! #########################
|
||||
# for each link in the participant's verified/unverified csv files,
|
||||
# check if the link is already listed in your own verified.csv or unverified.csv
|
||||
filterterm=csvdf.at[i, 'URL']
|
||||
#print('1)',filterterm)
|
||||
filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
|
||||
filter_vdf2= vdf[vdf.Name.str.contains(filterterm,na=False)] # do not accept the new link if the name already exists in verified.csv
|
||||
#print('2)',filter_vdf)
|
||||
#print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
|
||||
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||
# TODO DELETE ALL DUPLICATES OF UVDF !
|
||||
uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||
uvdf = uvdf.drop_duplicates(subset=['Name'], keep="first", inplace=False)
|
||||
filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
|
||||
filter_uvdf2= uvdf[uvdf.Name.str.contains(filterterm,na=False)] # do not accept the new link if the name already exists in unverified.csv
|
||||
if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0 and len(filter_uvdf2.index) == 0 and len(filter_vdf2.index) == 0 :
|
||||
newrow=row
|
||||
uvdf.loc[-1] = newrow # adding a row
|
||||
uvdf.index = uvdf.index + 1 # shifting index
|
||||
uvdf = uvdf.sort_index() # sorting by index
|
||||
uvdf.to_csv(unverifiedcsvfile, index=False)
|
||||
|
||||
print("[+] NEW ROW =",newrow)
|
||||
print_colors("[+] New row added to your own unverified.csv file!")
|
||||
else:
|
||||
pass
|
||||
#print_colors(f'[-] Skipping row as it is already added in {w} {row}',is_error=True)
|
||||
|
||||
|
||||
|
||||
###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE###################
|
||||
|
||||
|
||||
### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
|
||||
for k,l in sedf.iterrows():
|
||||
seword=sedf.at[k, 'sensitive-words']
|
||||
if any(seword in str(x) for x in row) == True:
|
||||
if csvdf.at[i, 'Sensitive'] != 'NO':
|
||||
print_colors(f"Marking row {i} as sensitive, as it matches with a sensitive word")
|
||||
csvdf.at[i, 'Sensitive']='YES'
|
||||
|
||||
#print_colors(f'[-] Rows to delete: {rows2delete}', is_error=True)
|
||||
# only delete rows after you've gone through all the unverified.csv OR verified.csv rows'
|
||||
# check for NAME duplicates and mark them for deletion:
|
||||
# remove name duplicates that are in unverifie.csv yet exist in verified.csv (as verified.csv takes the priority)
|
||||
if w == 'unverified.csv':
|
||||
try:
|
||||
# check if the given row Name already exists in verified.csv
|
||||
filterterm=csvdf.at[i, 'Name']
|
||||
filter_vdf= vdf[vdf.Name.str.contains(filterterm,na=False)]
|
||||
print('[+] CHECKING FOR DUPLIATES: ',filterterm)
|
||||
if len(filter_vdf.index) != 0:
|
||||
# drop the unverified.csv row if its name already exists in verified.csv
|
||||
print('[+] DUPLICATE FOUND, MARKING ROW FOR DELETION: ',row)
|
||||
rows2delete.append(i) #mark the row for deletion if not already done
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
for i in rows2delete:
|
||||
row=csvdf.loc[i,:].values.tolist()
|
||||
print_colors(f'[+] REMOVING ROW: {i}{row}')
|
||||
csvdf.drop(i, inplace= True)
|
||||
csvdf.to_csv(csvfilepath, index=False)
|
||||
rows2delete= [] # it is an empty list at first
|
||||
|
||||
# fill missing description in our unverified.csv that other participants verified.csv have filled
|
||||
if w == 'verified.csv':
|
||||
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||
# merge participant's verified.csv on our unverified.csv on URL
|
||||
merged_df = uvdf.merge(csvdf[['URL', 'Description']],
|
||||
on='URL',
|
||||
how='left',
|
||||
suffixes=('', '_participant'))
|
||||
# filter empty description that has participant's description
|
||||
no_descr_filter = ((merged_df['Description'].isna()) | (merged_df['Description'].str.strip() == '')) & \
|
||||
(~merged_df['Description_participant'].isna()) & (merged_df['Description_participant'].str.strip() != '')
|
||||
no_descr_filter_count = no_descr_filter.sum()
|
||||
# update our empty description if the participant has any filled description
|
||||
if no_descr_filter_count > 0:
|
||||
merged_df.loc[no_descr_filter, 'Description'] = merged_df.loc[no_descr_filter, 'Description_participant']
|
||||
# keep only original columns
|
||||
uvdf_updated = merged_df[uvdf.columns]
|
||||
uvdf_updated.to_csv(unverifiedcsvfile, index=False)
|
||||
print(f'[+] Updated {no_descr_filter_count} empty description(s) in your unverified.csv found on partipant\'s {w}')
|
||||
# remove all name duplicates from your own unverified.csv file:
|
||||
for i,j in uvdf.iterrows():
|
||||
row=uvdf.loc[i,:].values.tolist()
|
||||
# check if the given row Name already exists in verified.csv
|
||||
filterterm=uvdf.at[i, 'Name']
|
||||
filter_vdf= vdf[vdf.Name.str.contains(filterterm,na=False)]
|
||||
print('[+] CHECKING FOR DUPLIATES: ',filterterm)
|
||||
if len(filter_vdf.index) != 0:
|
||||
# drop the unverified.csv row if its name already exists in verified.csv
|
||||
print('[+] DUPLICATE FOUND, MARKING ROW FOR DELETION: ',row)
|
||||
rows2delete.append(i) #mark the row for deletion if not already done
|
||||
for i in rows2delete:
|
||||
row=uvdf.loc[i,:].values.tolist()
|
||||
print_colors(f'[+] REMOVING ROW: {i}{row}')
|
||||
uvdf.drop(i, inplace= True)
|
||||
uvdf.to_csv(unverifiedcsvfile, index=False)
|
||||
rows2delete= [] # it is an empty list at first
|
||||
print_colors('[+] Saving local verified and unverified')
|
||||
save_local_verified_and_unverified(local_verified_df, local_unverified_df)
|
||||
|
||||
break
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
case 5:
|
||||
print_colors("[+] Add a new webring participant (and download their files into their directory (without trusting them yet!))")
|
||||
webring_participant_url = ''
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue