mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-16 20:26:58 +00:00
make sure that simplex chatroom links with a ; arent accepted, and that option 4 removes duplicates before iterating
This commit is contained in:
parent
075ea091d4
commit
22489e571e
2 changed files with 35 additions and 23 deletions
|
@ -144,11 +144,11 @@ def main():
|
||||||
src=templatepath+i
|
src=templatepath+i
|
||||||
shutil.copyfile(src, filepath)
|
shutil.copyfile(src, filepath)
|
||||||
# now that they exist, get vdf and uvdf and the rest
|
# now that they exist, get vdf and uvdf and the rest
|
||||||
vdf = pd.read_csv(verifiedcsvfile)
|
vdf = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
||||||
uvdf = pd.read_csv(unverifiedcsvfile)
|
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||||
bldf = pd.read_csv(blcsvfile)
|
bldf = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
||||||
sedf = pd.read_csv(secsvfile)
|
sedf = pd.read_csv(secsvfile, on_bad_lines='skip')
|
||||||
webpdf = pd.read_csv(webpcsvfile)
|
webpdf = pd.read_csv(webpcsvfile, on_bad_lines='skip')
|
||||||
print_colors(f"[+] file exists, your Webring URL is {instance}")
|
print_colors(f"[+] file exists, your Webring URL is {instance}")
|
||||||
|
|
||||||
##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
|
##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
|
||||||
|
@ -257,8 +257,8 @@ Maintenance:
|
||||||
case 2:
|
case 2:
|
||||||
print_colors("[+] Trust/Untrust/Blacklist a Website entry (move an entry from unverified to verified.csv)")
|
print_colors("[+] Trust/Untrust/Blacklist a Website entry (move an entry from unverified to verified.csv)")
|
||||||
while True:
|
while True:
|
||||||
vdf = pd.read_csv(verifiedcsvfile)
|
vdf = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
||||||
uvdf = pd.read_csv(unverifiedcsvfile)
|
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||||
# search for a word
|
# search for a word
|
||||||
name=''
|
name=''
|
||||||
|
|
||||||
|
@ -385,7 +385,7 @@ Maintenance:
|
||||||
for w in ['verified.csv','unverified.csv']:
|
for w in ['verified.csv','unverified.csv']:
|
||||||
csvfilepath=instancepath+'/'+w
|
csvfilepath=instancepath+'/'+w
|
||||||
print_colors(f"{csvfilepath}")
|
print_colors(f"{csvfilepath}")
|
||||||
csvdf = pd.read_csv(csvfilepath)
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||||
rows2delete= [] # it is an empty list at first
|
rows2delete= [] # it is an empty list at first
|
||||||
for i,j in csvdf.iterrows():
|
for i,j in csvdf.iterrows():
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
row=csvdf.loc[i,:].values.tolist()
|
||||||
|
@ -535,7 +535,7 @@ Maintenance:
|
||||||
status=''
|
status=''
|
||||||
score=''
|
score=''
|
||||||
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
||||||
wdf = pd.read_csv(webringcsvfile)
|
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
||||||
for participant in os.listdir(participantsdir):
|
for participant in os.listdir(participantsdir):
|
||||||
participantdir=participantsdir+participant
|
participantdir=participantsdir+participant
|
||||||
|
|
||||||
|
@ -610,7 +610,15 @@ Maintenance:
|
||||||
for w in ['verified.csv','unverified.csv']:
|
for w in ['verified.csv','unverified.csv']:
|
||||||
csvfilepath=participantdir+'/'+w
|
csvfilepath=participantdir+'/'+w
|
||||||
print_colors(f"{csvfilepath}")
|
print_colors(f"{csvfilepath}")
|
||||||
csvdf = pd.read_csv(csvfilepath)
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||||
|
|
||||||
|
print("[+] Removing the participant's duplicate entries... ")
|
||||||
|
# REMOVE DUPLICATES !!! do not accept any duplicate from remote participants
|
||||||
|
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||||
|
csvdf.to_csv(csvfilepath, index=False)
|
||||||
|
|
||||||
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||||
|
|
||||||
bldf[['blacklisted-words']].iterrows()
|
bldf[['blacklisted-words']].iterrows()
|
||||||
rows2delete= [] # it is an empty list at first
|
rows2delete= [] # it is an empty list at first
|
||||||
for i,j in csvdf.iterrows():
|
for i,j in csvdf.iterrows():
|
||||||
|
@ -640,6 +648,7 @@ Maintenance:
|
||||||
#mark the row for deletion as it has invalid inputs
|
#mark the row for deletion as it has invalid inputs
|
||||||
if i not in rows2delete:
|
if i not in rows2delete:
|
||||||
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
|
||||||
|
print(row)
|
||||||
rows2delete.append(i) #mark the row for deletion if not already done
|
rows2delete.append(i) #mark the row for deletion if not already done
|
||||||
|
|
||||||
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
|
||||||
|
@ -667,6 +676,7 @@ Maintenance:
|
||||||
uvdf.index = uvdf.index + 1 # shifting index
|
uvdf.index = uvdf.index + 1 # shifting index
|
||||||
uvdf = uvdf.sort_index() # sorting by index
|
uvdf = uvdf.sort_index() # sorting by index
|
||||||
uvdf.to_csv(unverifiedcsvfile, index=False)
|
uvdf.to_csv(unverifiedcsvfile, index=False)
|
||||||
|
print("[+] NEW ROW =",newrow)
|
||||||
print_colors("[+] New row added to your own unverified.csv file!")
|
print_colors("[+] New row added to your own unverified.csv file!")
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
@ -736,7 +746,7 @@ Maintenance:
|
||||||
score=''
|
score=''
|
||||||
newrow=[name,webring_participant_url,desc,trusted,status,score]
|
newrow=[name,webring_participant_url,desc,trusted,status,score]
|
||||||
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
||||||
wdf = pd.read_csv(webringcsvfile)
|
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
||||||
wdf.loc[-1] = newrow # adding a row
|
wdf.loc[-1] = newrow # adding a row
|
||||||
wdf.index = wdf.index + 1 # shifting index
|
wdf.index = wdf.index + 1 # shifting index
|
||||||
wdf = wdf.sort_index() # sorting by index
|
wdf = wdf.sort_index() # sorting by index
|
||||||
|
@ -783,7 +793,7 @@ Maintenance:
|
||||||
########### PERFORM SANITY CHECKS ON the webring participant's verified.csv and unverified.csv ##################
|
########### PERFORM SANITY CHECKS ON the webring participant's verified.csv and unverified.csv ##################
|
||||||
for w in ['verified.csv','unverified.csv']:
|
for w in ['verified.csv','unverified.csv']:
|
||||||
csvfilepath=participantdir+'/'+w
|
csvfilepath=participantdir+'/'+w
|
||||||
csvdf = pd.read_csv(csvfilepath)
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||||
|
|
||||||
#print_colors(bldf[['blacklisted-words']])
|
#print_colors(bldf[['blacklisted-words']])
|
||||||
bldf[['blacklisted-words']].iterrows()
|
bldf[['blacklisted-words']].iterrows()
|
||||||
|
@ -852,7 +862,7 @@ Maintenance:
|
||||||
while True:
|
while True:
|
||||||
print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
|
print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
|
||||||
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
webringcsvfile=instancepath+'/'+'webring-participants.csv'
|
||||||
wdf = pd.read_csv(webringcsvfile)
|
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
|
||||||
print_colors(f'{wdf[["URL","Trusted"]]}')
|
print_colors(f'{wdf[["URL","Trusted"]]}')
|
||||||
try:
|
try:
|
||||||
index = int(input("What is the index of the webring participant that you want to edit? -1 to exit ").strip())
|
index = int(input("What is the index of the webring participant that you want to edit? -1 to exit ").strip())
|
||||||
|
@ -1120,7 +1130,7 @@ Maintenance:
|
||||||
csvfilepath = os.path.join(instancepath, w)
|
csvfilepath = os.path.join(instancepath, w)
|
||||||
print_colors(f"Processing file: {csvfilepath}")
|
print_colors(f"Processing file: {csvfilepath}")
|
||||||
try:
|
try:
|
||||||
csvdf = pd.read_csv(csvfilepath)
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||||
print_colors(f"Removing duplicates in {csvfilepath}")
|
print_colors(f"Removing duplicates in {csvfilepath}")
|
||||||
#print_colors(f"{csvdf[['URL']]}")
|
#print_colors(f"{csvdf[['URL']]}")
|
||||||
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
|
||||||
|
@ -1146,7 +1156,7 @@ Maintenance:
|
||||||
for w in ['verified.csv','unverified.csv']:
|
for w in ['verified.csv','unverified.csv']:
|
||||||
csvfilepath=participantdir+'/'+w
|
csvfilepath=participantdir+'/'+w
|
||||||
print_colors(f"{csvfilepath}")
|
print_colors(f"{csvfilepath}")
|
||||||
csvdf = pd.read_csv(csvfilepath)
|
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
|
||||||
rows2delete= [] # it is an empty list at first
|
rows2delete= [] # it is an empty list at first
|
||||||
for i,j in csvdf.iterrows():
|
for i,j in csvdf.iterrows():
|
||||||
row=csvdf.loc[i,:].values.tolist()
|
row=csvdf.loc[i,:].values.tolist()
|
||||||
|
@ -1208,10 +1218,10 @@ Maintenance:
|
||||||
case 11:
|
case 11:
|
||||||
#review the submitted websites:
|
#review the submitted websites:
|
||||||
try:
|
try:
|
||||||
submission_df = pd.read_csv(submission_file_abs_path)
|
submission_df = pd.read_csv(submission_file_abs_path, on_bad_lines='skip')
|
||||||
verified_csv_df = pd.read_csv(verifiedcsvfile)
|
verified_csv_df = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
||||||
unverified_csv_df = pd.read_csv(unverifiedcsvfile)
|
unverified_csv_df = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||||
blacklist_df = pd.read_csv(blcsvfile)
|
blacklist_df = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
||||||
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
||||||
for i, row in submission_df.iterrows():
|
for i, row in submission_df.iterrows():
|
||||||
link = row['link']
|
link = row['link']
|
||||||
|
@ -1290,10 +1300,10 @@ Maintenance:
|
||||||
# review the crawled websites
|
# review the crawled websites
|
||||||
try:
|
try:
|
||||||
print(crawled_file_abs_path)
|
print(crawled_file_abs_path)
|
||||||
crawled_df = pd.read_csv(crawled_file_abs_path)
|
crawled_df = pd.read_csv(crawled_file_abs_path, on_bad_lines='skip')
|
||||||
verified_csv_df = pd.read_csv(verifiedcsvfile)
|
verified_csv_df = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
|
||||||
unverified_csv_df = pd.read_csv(unverifiedcsvfile)
|
unverified_csv_df = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
|
||||||
blacklist_df = pd.read_csv(blcsvfile)
|
blacklist_df = pd.read_csv(blcsvfile, on_bad_lines='skip')
|
||||||
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
||||||
for i, row in crawled_df.iterrows():
|
for i, row in crawled_df.iterrows():
|
||||||
link = row['URL']
|
link = row['URL']
|
||||||
|
|
|
@ -150,6 +150,8 @@ def IsUrlValid(url:str)->bool:
|
||||||
else:
|
else:
|
||||||
if not url.__contains__('.'):
|
if not url.__contains__('.'):
|
||||||
return False
|
return False
|
||||||
|
if url.__contains__(';'):
|
||||||
|
return False #required otherwise lantern thinks there are extra columns
|
||||||
if pattern.fullmatch(url) is None:
|
if pattern.fullmatch(url) is None:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue