mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-16 04:06:59 +00:00
finish option12 : listing crawled links
This commit is contained in:
parent
6de26c5fa5
commit
212e44c3a5
3 changed files with 127 additions and 3 deletions
BIN
SimpleX/__pycache__/regex_simplexlinks.cpython-311.pyc
Normal file
BIN
SimpleX/__pycache__/regex_simplexlinks.cpython-311.pyc
Normal file
Binary file not shown.
|
@ -130,6 +130,7 @@ def main():
|
||||||
secsvfile=instancepath+'/sensitive.csv'
|
secsvfile=instancepath+'/sensitive.csv'
|
||||||
webpcsvfile=instancepath+'/webring-participants.csv'
|
webpcsvfile=instancepath+'/webring-participants.csv'
|
||||||
submission_file_abs_path = os.path.abspath('submissions/submission.csv')
|
submission_file_abs_path = os.path.abspath('submissions/submission.csv')
|
||||||
|
crawled_file_abs_path = os.path.abspath('crawler/onion_crawler.csv')
|
||||||
|
|
||||||
if not os.path.exists(instancepath):
|
if not os.path.exists(instancepath):
|
||||||
print_colors(f"{rootpath}",is_error=True, bold=True)
|
print_colors(f"{rootpath}",is_error=True, bold=True)
|
||||||
|
@ -181,11 +182,12 @@ Managing Wordlists:
|
||||||
Maintenance:
|
Maintenance:
|
||||||
9) Remove the duplicate URLs for your own instance
|
9) Remove the duplicate URLs for your own instance
|
||||||
10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
|
10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
|
||||||
11) Review submissions (Add to verified.csv/ add to unverified.csv/ delete /blacklist)
|
11) Review submissions (Add to verified.csv /add to unverified.csv /delete /blacklist)
|
||||||
|
12) Review crawled websites (Add to verified.csv /add to unverified.csv /delete /blacklist)
|
||||||
|
|
||||||
0) Exit
|
0) Exit
|
||||||
""")
|
""")
|
||||||
option = input("Select an option? (0-11): ").strip()
|
option = input("Select an option? (0-12): ").strip()
|
||||||
try:
|
try:
|
||||||
option = int(option)
|
option = int(option)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -1202,6 +1204,7 @@ Maintenance:
|
||||||
break
|
break
|
||||||
|
|
||||||
case 11:
|
case 11:
|
||||||
|
#review the submitted websites:
|
||||||
try:
|
try:
|
||||||
submission_df = pd.read_csv(submission_file_abs_path)
|
submission_df = pd.read_csv(submission_file_abs_path)
|
||||||
verified_csv_df = pd.read_csv(verifiedcsvfile)
|
verified_csv_df = pd.read_csv(verifiedcsvfile)
|
||||||
|
@ -1215,6 +1218,7 @@ Maintenance:
|
||||||
print_colors("\n1) Move entry to verified.csv \n2) Move entry from submission.csv to unverified.csv \n3) Delete from submission.csv file \n4) Add to blacklist.csv \n-1) exit")
|
print_colors("\n1) Move entry to verified.csv \n2) Move entry from submission.csv to unverified.csv \n3) Delete from submission.csv file \n4) Add to blacklist.csv \n-1) exit")
|
||||||
if link in blacklisted_words:
|
if link in blacklisted_words:
|
||||||
print_colors("Black listed entry found", bold=True)
|
print_colors("Black listed entry found", bold=True)
|
||||||
|
#TODO delete the entry as its already blacklisted
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
name = row['name']
|
name = row['name']
|
||||||
|
@ -1277,7 +1281,127 @@ Maintenance:
|
||||||
break
|
break
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
print_colors("End of file")
|
print_colors("No more submissions to review, exiting.")
|
||||||
|
|
||||||
|
|
||||||
|
case 12:
|
||||||
|
# review the crawled websites
|
||||||
|
try:
|
||||||
|
print(crawled_file_abs_path)
|
||||||
|
crawled_df = pd.read_csv(crawled_file_abs_path)
|
||||||
|
verified_csv_df = pd.read_csv(verifiedcsvfile)
|
||||||
|
unverified_csv_df = pd.read_csv(unverifiedcsvfile)
|
||||||
|
blacklist_df = pd.read_csv(blcsvfile)
|
||||||
|
blacklisted_words = [word for word in blacklist_df['blacklisted-words']]
|
||||||
|
for i, row in crawled_df.iterrows():
|
||||||
|
link = row['URL']
|
||||||
|
print('\n',row[['URL','Category','Name']])
|
||||||
|
print('\nLink to verify: ',link)
|
||||||
|
print_colors("\n1) Move entry to verified.csv \n2) Move entry from submission.csv to unverified.csv \n3) Delete from submission.csv file \n4) Add to blacklist.csv \n-1) exit")
|
||||||
|
if link in blacklisted_words:
|
||||||
|
print_colors("Black listed entry found", bold=True)
|
||||||
|
#TODO delete the entry as its already blacklisted
|
||||||
|
crawled_df.drop(index=i,inplace=True)
|
||||||
|
crawled_df.to_csv(submission_file_abs_path, index=False)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
name = row['Name']
|
||||||
|
category = row['Category']
|
||||||
|
#desc = row['esc']
|
||||||
|
desc = ''
|
||||||
|
#sensi = "YES" if row['sensitive'] == 'y' else "NO"
|
||||||
|
sensi = ''
|
||||||
|
number = int(input("Enter an option: "))
|
||||||
|
if number == 1:
|
||||||
|
# Add to verified.csv
|
||||||
|
# ask the name if invalid
|
||||||
|
while(IsNameValid(name) is not True):
|
||||||
|
name = input("What is the name of the website? ")
|
||||||
|
|
||||||
|
# ask the category
|
||||||
|
while((IsCategoryValid(category) != True) or (category == 'Tor Hidden Service')):
|
||||||
|
category = input("What is the website Category? (ex: Indexes) ")
|
||||||
|
# ask the sensitivity
|
||||||
|
choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ")
|
||||||
|
if choice == "n":
|
||||||
|
sensi = 'NO'
|
||||||
|
else:
|
||||||
|
sensi = 'YES'
|
||||||
|
|
||||||
|
# ask if its sensitive or not
|
||||||
|
# ask the user to write a description
|
||||||
|
newrow=[instance,category,name,link,sensi,desc,'YES','100']
|
||||||
|
verified_csv_df.loc[-1] = newrow # adding a row
|
||||||
|
verified_csv_df.index = verified_csv_df.index + 1 # shifting index
|
||||||
|
verified_csv_df = verified_csv_df.sort_index() # sorting by index
|
||||||
|
verified_csv_df = verified_csv_df.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
||||||
|
print_colors("[+] New row added! now writing the csv file")
|
||||||
|
verified_csv_df.to_csv(verifiedcsvfile, index=False)
|
||||||
|
crawled_df.drop(index=i,inplace=True)
|
||||||
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
||||||
|
elif number == 2:
|
||||||
|
# Add to unverified.csv
|
||||||
|
# consider it as sensitive by default and category must just be 'crawled'
|
||||||
|
# ask the name if invalid
|
||||||
|
while(IsNameValid(name) is not True):
|
||||||
|
name = input("What is the name of the website? ")
|
||||||
|
# ask the category
|
||||||
|
print('CATEGORY = ', category)
|
||||||
|
while((IsCategoryValid(category) != True) or (category == 'Tor Hidden Service')):
|
||||||
|
category = input("What is the website Category? (ex: Indexes) ")
|
||||||
|
choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ")
|
||||||
|
if choice == "n":
|
||||||
|
sensi = 'NO'
|
||||||
|
else:
|
||||||
|
sensi = 'YES'
|
||||||
|
# ask for the category, if empty then the category is 'crawled'
|
||||||
|
# add new row
|
||||||
|
newrow=[instance,category,name,link,sensi,desc,'YES','100']
|
||||||
|
|
||||||
|
unverified_csv_df.loc[-1] = newrow # adding a row
|
||||||
|
unverified_csv_df.index = unverified_csv_df.index + 1 # shifting index
|
||||||
|
unverified_csv_df = unverified_csv_df.sort_index() # sorting by index
|
||||||
|
unverified_csv_df = unverified_csv_df.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories
|
||||||
|
print_colors("[+] New row added! now writing the csv file")
|
||||||
|
unverified_csv_df.to_csv(unverifiedcsvfile, index=False)
|
||||||
|
crawled_df.drop(index=i,inplace=True)
|
||||||
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
||||||
|
|
||||||
|
elif number == 3:
|
||||||
|
# Delete from crawled_onion.csv
|
||||||
|
crawled_df.drop(index=i,inplace=True)
|
||||||
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
||||||
|
|
||||||
|
elif number == 4:
|
||||||
|
# Add to blacklist.csv
|
||||||
|
newrow=[link]
|
||||||
|
|
||||||
|
blacklist_df.loc[-1] = newrow # adding a row
|
||||||
|
blacklist_df.index = blacklist_df.index + 1 # shifting index
|
||||||
|
blacklist_df = blacklist_df.sort_index() # sorting by index
|
||||||
|
print_colors("[+] New row added! now writing the csv file")
|
||||||
|
blacklist_df.to_csv(blcsvfile, index=False)
|
||||||
|
crawled_df.drop(index=i,inplace=True)
|
||||||
|
crawled_df.to_csv(crawled_file_abs_path, index=False)
|
||||||
|
|
||||||
|
elif number == -1:
|
||||||
|
break
|
||||||
|
|
||||||
|
else:
|
||||||
|
print_colors("Invalid Number",is_error=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print_colors(f'Try again {e}',is_error=True)
|
||||||
|
break
|
||||||
|
|
||||||
|
finally:
|
||||||
|
print_colors("No more crawled websites to review, exiting.")
|
||||||
|
|
||||||
break
|
break
|
||||||
case 0:
|
case 0:
|
||||||
|
|
0
submissions/README.md
Normal file → Executable file
0
submissions/README.md
Normal file → Executable file
Loading…
Add table
Add a link
Reference in a new issue