make sure that simplex chatroom links with a ; arent accepted, and that option 4 removes duplicates before iterating

This commit is contained in:
root 2025-05-04 23:02:02 +02:00
parent 075ea091d4
commit 22489e571e
2 changed files with 35 additions and 23 deletions

View file

@ -144,11 +144,11 @@ def main():
src=templatepath+i
shutil.copyfile(src, filepath)
# now that they exist, get vdf and uvdf and the rest
vdf = pd.read_csv(verifiedcsvfile)
uvdf = pd.read_csv(unverifiedcsvfile)
bldf = pd.read_csv(blcsvfile)
sedf = pd.read_csv(secsvfile)
webpdf = pd.read_csv(webpcsvfile)
vdf = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
bldf = pd.read_csv(blcsvfile, on_bad_lines='skip')
sedf = pd.read_csv(secsvfile, on_bad_lines='skip')
webpdf = pd.read_csv(webpcsvfile, on_bad_lines='skip')
print_colors(f"[+] file exists, your Webring URL is {instance}")
##### CHECK IF ARGUMENTS ARE PASSED TO ENTER PROMPT-LESS MODE #####
@ -257,8 +257,8 @@ Maintenance:
case 2:
print_colors("[+] Trust/Untrust/Blacklist a Website entry (move an entry from unverified to verified.csv)")
while True:
vdf = pd.read_csv(verifiedcsvfile)
uvdf = pd.read_csv(unverifiedcsvfile)
vdf = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
# search for a word
name=''
@ -385,7 +385,7 @@ Maintenance:
for w in ['verified.csv','unverified.csv']:
csvfilepath=instancepath+'/'+w
print_colors(f"{csvfilepath}")
csvdf = pd.read_csv(csvfilepath)
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
row=csvdf.loc[i,:].values.tolist()
@ -535,7 +535,7 @@ Maintenance:
status=''
score=''
webringcsvfile=instancepath+'/'+'webring-participants.csv'
wdf = pd.read_csv(webringcsvfile)
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
for participant in os.listdir(participantsdir):
participantdir=participantsdir+participant
@ -610,7 +610,15 @@ Maintenance:
for w in ['verified.csv','unverified.csv']:
csvfilepath=participantdir+'/'+w
print_colors(f"{csvfilepath}")
csvdf = pd.read_csv(csvfilepath)
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
print("[+] Removing the participant's duplicate entries... ")
# REMOVE DUPLICATES !!! do not accept any duplicate from remote participants
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
csvdf.to_csv(csvfilepath, index=False)
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
bldf[['blacklisted-words']].iterrows()
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
@ -640,6 +648,7 @@ Maintenance:
#mark the row for deletion as it has invalid inputs
if i not in rows2delete:
print_colors(f"Marking row {i} for deletion, as it has invalid inputs")
print(row)
rows2delete.append(i) #mark the row for deletion if not already done
### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
@ -667,6 +676,7 @@ Maintenance:
uvdf.index = uvdf.index + 1 # shifting index
uvdf = uvdf.sort_index() # sorting by index
uvdf.to_csv(unverifiedcsvfile, index=False)
print("[+] NEW ROW =",newrow)
print_colors("[+] New row added to your own unverified.csv file!")
else:
pass
@ -736,7 +746,7 @@ Maintenance:
score=''
newrow=[name,webring_participant_url,desc,trusted,status,score]
webringcsvfile=instancepath+'/'+'webring-participants.csv'
wdf = pd.read_csv(webringcsvfile)
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
wdf.loc[-1] = newrow # adding a row
wdf.index = wdf.index + 1 # shifting index
wdf = wdf.sort_index() # sorting by index
@ -783,7 +793,7 @@ Maintenance:
########### PERFORM SANITY CHECKS ON the webring participant's verified.csv and unverified.csv ##################
for w in ['verified.csv','unverified.csv']:
csvfilepath=participantdir+'/'+w
csvdf = pd.read_csv(csvfilepath)
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
#print_colors(bldf[['blacklisted-words']])
bldf[['blacklisted-words']].iterrows()
@ -852,7 +862,7 @@ Maintenance:
while True:
print_colors("[+] Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)")
webringcsvfile=instancepath+'/'+'webring-participants.csv'
wdf = pd.read_csv(webringcsvfile)
wdf = pd.read_csv(webringcsvfile, on_bad_lines='skip')
print_colors(f'{wdf[["URL","Trusted"]]}')
try:
index = int(input("What is the index of the webring participant that you want to edit? -1 to exit ").strip())
@ -1120,7 +1130,7 @@ Maintenance:
csvfilepath = os.path.join(instancepath, w)
print_colors(f"Processing file: {csvfilepath}")
try:
csvdf = pd.read_csv(csvfilepath)
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
print_colors(f"Removing duplicates in {csvfilepath}")
#print_colors(f"{csvdf[['URL']]}")
csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
@ -1146,7 +1156,7 @@ Maintenance:
for w in ['verified.csv','unverified.csv']:
csvfilepath=participantdir+'/'+w
print_colors(f"{csvfilepath}")
csvdf = pd.read_csv(csvfilepath)
csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
rows2delete= [] # it is an empty list at first
for i,j in csvdf.iterrows():
row=csvdf.loc[i,:].values.tolist()
@ -1208,10 +1218,10 @@ Maintenance:
case 11:
#review the submitted websites:
try:
submission_df = pd.read_csv(submission_file_abs_path)
verified_csv_df = pd.read_csv(verifiedcsvfile)
unverified_csv_df = pd.read_csv(unverifiedcsvfile)
blacklist_df = pd.read_csv(blcsvfile)
submission_df = pd.read_csv(submission_file_abs_path, on_bad_lines='skip')
verified_csv_df = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
unverified_csv_df = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
blacklist_df = pd.read_csv(blcsvfile, on_bad_lines='skip')
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
for i, row in submission_df.iterrows():
link = row['link']
@ -1290,10 +1300,10 @@ Maintenance:
# review the crawled websites
try:
print(crawled_file_abs_path)
crawled_df = pd.read_csv(crawled_file_abs_path)
verified_csv_df = pd.read_csv(verifiedcsvfile)
unverified_csv_df = pd.read_csv(unverifiedcsvfile)
blacklist_df = pd.read_csv(blcsvfile)
crawled_df = pd.read_csv(crawled_file_abs_path, on_bad_lines='skip')
verified_csv_df = pd.read_csv(verifiedcsvfile, on_bad_lines='skip')
unverified_csv_df = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
blacklist_df = pd.read_csv(blcsvfile, on_bad_lines='skip')
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
for i, row in crawled_df.iterrows():
link = row['URL']

View file

@ -150,6 +150,8 @@ def IsUrlValid(url:str)->bool:
else:
if not url.__contains__('.'):
return False
if url.__contains__(';'):
return False #required otherwise lantern thinks there are extra columns
if pattern.fullmatch(url) is None:
return False
return True