mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-16 20:26:58 +00:00
make sure that simplex chatroom links with a ; arent accepted, and that option 4 removes duplicates before iterating
This commit is contained in:
parent
075ea091d4
commit
22489e571e
2 changed files with 35 additions and 23 deletions
|
@ -144,11 +144,11 @@ def main():
|
|||
src=templatepath+i
|
||||
shutil.copyfile(src, filepath)
|
||||
# now that they exist, get vdf and uvdf and the rest
|
||||
vdf = pd.read_csv(verifiedcsvfile)
|
||||
uvdf = pd.read_csv(unverifiedcsvfile)
|
||||
bldf = pd.read_csv(blcsvfile)
|
||||
sedf = pd.read_csv(secsvfile)
|
||||
webpdf = pd.read_csv(webpcsvfile)
|
||||
vdf = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
||||
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||
bldf = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
||||
sedf = pd.read_csv(secsvfile, on_bad_lines='skip')
|
||||
webpdf = pd.read_csv(webpcsvfile, on_bad_lines='skip')
|
||||
print_colors(f"[+] file exists, your Webring URL is {instance}")
|
||||
|
||||
##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
|
||||
|
@ -257,8 +257,8 @@ Maintenance:
|
|||
case 2:
|
||||
print_colors("[+] Trust/Untrust/Blacklist a Website entry (move an entry from unverified to verified.csv)")
|
||||
while True:
|
||||
vdf = pd.read_csv(verifiedcsvfile)
|
||||
uvdf = pd.read_csv(unverifiedcsvfile)
|
||||
vdf = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
||||
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||
# search for a word
|
||||
name=''
|
||||
|
||||
|
@ -385,7 +385,7 @@ Maintenance:
|
|||
for w in ['verified.csv','unverified.csv']:
|
||||
csvfilepath=instancepath+'/'+w
|
||||
print_colors(f"{csvfilepath}")
|
||||
csvdf = pd.read_csv(csvfilepath)
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
rows2delete= [] # it is an empty list at first
|
||||
for i,j in csvdf.iterrows():
|
||||
row=csvdf.loc[i,:].values.tolist()
|
||||
|
@ -535,7 +535,7 @@ Maintenance:
|
|||
status=''
|
||||
score=''
|
||||
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
||||
wdf = pd.read_csv(webringcsvfile)
|
||||
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
||||
for participant in os.listdir(participantsdir):
|
||||
participantdir=participantsdir+participant
|
||||
|
||||
|
@ -610,7 +610,15 @@ Maintenance:
|
|||
for w in ['verified.csv','unverified.csv']:
|
||||
csvfilepath=participantdir+'/'+w
|
||||
print_colors(f"{csvfilepath}")
|
||||
csvdf = pd.read_csv(csvfilepath)
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
|
||||
print("[+] Removing the participant's duplicate entries... ")
|
||||
# REMOVE DUPLICATES !!! do not accept any duplicate from remote participants
|
||||
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||
csvdf.to_csv(csvfilepath, index=False)
|
||||
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
|
||||
bldf[['blacklisted-words']].iterrows()
|
||||
rows2delete= [] # it is an empty list at first
|
||||
for i,j in csvdf.iterrows():
|
||||
|
@ -640,6 +648,7 @@ Maintenance:
|
|||
#mark the row for deletion as it has invalid inputs
|
||||
if i not in rows2delete:
|
||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
||||
print(row)
|
||||
rows2delete.append(i) #mark the row for deletion if not already done
|
||||
|
||||
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
||||
|
@ -667,6 +676,7 @@ Maintenance:
|
|||
uvdf.index = uvdf.index + 1 # shifting index
|
||||
uvdf = uvdf.sort_index() # sorting by index
|
||||
uvdf.to_csv(unverifiedcsvfile, index=False)
|
||||
print("[+] NEW ROW =",newrow)
|
||||
print_colors("[+] New row added to your own unverified.csv file!")
|
||||
else:
|
||||
pass
|
||||
|
@ -736,7 +746,7 @@ Maintenance:
|
|||
score=''
|
||||
newrow=[name,webring_participant_url,desc,trusted,status,score]
|
||||
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
||||
wdf = pd.read_csv(webringcsvfile)
|
||||
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
||||
wdf.loc[-1] = newrow # adding a row
|
||||
wdf.index = wdf.index + 1 # shifting index
|
||||
wdf = wdf.sort_index() # sorting by index
|
||||
|
@ -783,7 +793,7 @@ Maintenance:
|
|||
########### PERFORM SANITY CHECKS ON the webring participant's verified.csv and unverified.csv ##################
|
||||
for w in ['verified.csv','unverified.csv']:
|
||||
csvfilepath=participantdir+'/'+w
|
||||
csvdf = pd.read_csv(csvfilepath)
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
|
||||
#print_colors(bldf[['blacklisted-words']])
|
||||
bldf[['blacklisted-words']].iterrows()
|
||||
|
@ -852,7 +862,7 @@ Maintenance:
|
|||
while True:
|
||||
print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
|
||||
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
||||
wdf = pd.read_csv(webringcsvfile)
|
||||
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
||||
print_colors(f'{wdf[["URL","Trusted"]]}')
|
||||
try:
|
||||
index = int(input("What is the index of the webring participant that you want to edit? -1 to exit ").strip())
|
||||
|
@ -1120,7 +1130,7 @@ Maintenance:
|
|||
csvfilepath = os.path.join(instancepath, w)
|
||||
print_colors(f"Processing file: {csvfilepath}")
|
||||
try:
|
||||
csvdf = pd.read_csv(csvfilepath)
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
print_colors(f"Removing duplicates in {csvfilepath}")
|
||||
#print_colors(f"{csvdf[['URL']]}")
|
||||
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||
|
@ -1146,7 +1156,7 @@ Maintenance:
|
|||
for w in ['verified.csv','unverified.csv']:
|
||||
csvfilepath=participantdir+'/'+w
|
||||
print_colors(f"{csvfilepath}")
|
||||
csvdf = pd.read_csv(csvfilepath)
|
||||
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||
rows2delete= [] # it is an empty list at first
|
||||
for i,j in csvdf.iterrows():
|
||||
row=csvdf.loc[i,:].values.tolist()
|
||||
|
@ -1208,10 +1218,10 @@ Maintenance:
|
|||
case 11:
|
||||
#review the submitted websites:
|
||||
try:
|
||||
submission_df = pd.read_csv(submission_file_abs_path)
|
||||
verified_csv_df = pd.read_csv(verifiedcsvfile)
|
||||
unverified_csv_df = pd.read_csv(unverifiedcsvfile)
|
||||
blacklist_df = pd.read_csv(blcsvfile)
|
||||
submission_df = pd.read_csv(submission_file_abs_path, on_bad_lines='skip')
|
||||
verified_csv_df = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
||||
unverified_csv_df = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||
blacklist_df = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
||||
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
||||
for i, row in submission_df.iterrows():
|
||||
link = row['link']
|
||||
|
@ -1290,10 +1300,10 @@ Maintenance:
|
|||
# review the crawled websites
|
||||
try:
|
||||
print(crawled_file_abs_path)
|
||||
crawled_df = pd.read_csv(crawled_file_abs_path)
|
||||
verified_csv_df = pd.read_csv(verifiedcsvfile)
|
||||
unverified_csv_df = pd.read_csv(unverifiedcsvfile)
|
||||
blacklist_df = pd.read_csv(blcsvfile)
|
||||
crawled_df = pd.read_csv(crawled_file_abs_path, on_bad_lines='skip')
|
||||
verified_csv_df = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
||||
unverified_csv_df = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||
blacklist_df = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
||||
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
||||
for i, row in crawled_df.iterrows():
|
||||
link = row['URL']
|
||||
|
|
|
@ -150,6 +150,8 @@ def IsUrlValid(url:str)->bool:
|
|||
else:
|
||||
if not url.__contains__('.'):
|
||||
return False
|
||||
if url.__contains__(';'):
|
||||
return False #required otherwise lantern thinks there are extra columns
|
||||
if pattern.fullmatch(url) is None:
|
||||
return False
|
||||
return True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue