From c041e5df199dc5234b56b146fa68c5ba826a3497 Mon Sep 17 00:00:00 2001 From: cynthia Date: Sat, 5 Apr 2025 00:00:47 +0000 Subject: [PATCH] replace default parameters with paths to the current lantern instance paths --- scripts/crawler.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/scripts/crawler.py b/scripts/crawler.py index b4cc6d0..783d714 100644 --- a/scripts/crawler.py +++ b/scripts/crawler.py @@ -4,11 +4,24 @@ import requests import urllib.parse import argparse import os +import pwd import re from utils import print_colors, IsUrlValid from dotenv import load_dotenv +# Make default parameters for arguments +rootpath='/srv/darknet-lantern/' +urlpath=pwd.getpwuid(os.getuid()).pw_dir+"/.darknet_participant_url" +instance = "" +if os.path.isfile(urlpath): + with open(urlpath) as f: + instance = f.read().rstrip() + +instancepath=rootpath+'www/participants/'+instance +verifiedcsvfile=instancepath+'/verified.csv' +blcsvfile=instancepath+'/blacklist.csv' + parser = argparse.ArgumentParser( prog='Lantern crawler', description='Crawls .onion sites for links to more .onion sites') @@ -19,7 +32,9 @@ parser.add_argument('-o', '--output', help='Output CSV file for found .onion links', type=str, default='onion_crawler.csv') parser.add_argument('-c', '--crawler-file', help='Crawler CSV file to log .onion sites and the amount crawled', type=str, default='crawler.csv') -parser.add_argument('verified_csv', help='Input file to read for .onion links to crawl', type=str, default='verified.csv') +parser.add_argument('-b', '--blacklist-file', help='Blacklist CSV files to filter out sites with forbidden words in them', + type=str, default=blcsvfile) +parser.add_argument('verified_csv', help='Input file to read for .onion links to crawl', type=str, default=verifiedcsvfile) args = parser.parse_args() script_abs_path = os.path.dirname(os.path.abspath(__file__))