add blacklist

This commit is contained in:
cynthia 2025-04-05 00:27:54 +00:00
parent c041e5df19
commit fe6b826027

View file

@ -70,10 +70,17 @@ def get_output_file():
except FileNotFoundError:
return pd.DataFrame(columns=["URL","Name"])
def get_blacklist_file():
try:
return pd.read_csv(args.blacklist_file)
except FileNotFoundError:
return None
# get list of .onion links from the verified.csv file
verified_csv_file = pd.read_csv(args.verified_csv)
crawler_file = get_crawler_file()
output_file = get_output_file()
blacklist_file = get_blacklist_file()
vcsv_urls = []
vcsv_hostnames = []
crawled_urls = []
@ -104,8 +111,16 @@ def add_urls(urls):
if "Content-Type" not in req.headers or "text/html" not in req.headers["Content-Type"]:
output_file.loc[-1] = [url, ""]
else:
forbidden = False
html = BeautifulSoup(req.text, features="lxml")
title = html.title.string if html.title is not None else ""
if blacklist_file is not None:
for word in blacklist_file.loc["blacklisted-words"]:
if word in title:
print_colors('[+] Forbidden word found. Rejecting.')
forbidden = True
if forbidden:
continue
output_file.loc[-1] = [url, title]
output_file.index += 1
output_file = output_file.sort_index()