add crawler dir for default crawler outputs

This commit is contained in:
cynthia 2025-04-05 14:41:26 +00:00
parent e76d29807d
commit 63d89b9b8b
2 changed files with 9 additions and 3 deletions

3
.gitignore vendored
View file

@ -1,5 +1,6 @@
.git .git
www/participants/** www/participants/**
crawler/**
scripts/__pycache__/** scripts/__pycache__/**
.env .env
env/ env/

View file

@ -21,6 +21,9 @@ if os.path.isfile(urlpath):
instancepath=rootpath+'www/participants/'+instance instancepath=rootpath+'www/participants/'+instance
verifiedcsvfile=instancepath+'/verified.csv' verifiedcsvfile=instancepath+'/verified.csv'
blcsvfile=instancepath+'/blacklist.csv' blcsvfile=instancepath+'/blacklist.csv'
crawlerdir=instancepath+'/crawler'
if not os.path.exists(crawlerdir):
os.makedirs(crawlerdir)
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog='Lantern crawler', prog='Lantern crawler',
@ -29,9 +32,11 @@ parser = argparse.ArgumentParser(
parser.add_argument('-l', '--limit', parser.add_argument('-l', '--limit',
help='Page crawl limit per .onion link.', type=int, default=10) help='Page crawl limit per .onion link.', type=int, default=10)
parser.add_argument('-o', '--output', parser.add_argument('-o', '--output',
help='Output CSV file for found .onion links', type=str, default='onion_crawler.csv') help='Output CSV file for found .onion links', type=str,
default=os.path.join(crawlerdir, 'onion_crawler.csv'))
parser.add_argument('-c', '--crawler-file', parser.add_argument('-c', '--crawler-file',
help='Crawler CSV file to log .onion sites and the amount crawled', type=str, default='crawler.csv') help='Crawler CSV file to log .onion sites and the amount crawled', type=str,
default=os.path.join(crawlerdir, 'crawler.csv'))
parser.add_argument('-b', '--blacklist-file', help='Blacklist CSV files to filter out sites with forbidden words in them', parser.add_argument('-b', '--blacklist-file', help='Blacklist CSV files to filter out sites with forbidden words in them',
type=str, default=blcsvfile) type=str, default=blcsvfile)
parser.add_argument('-V', '--verified-file', help='Input file to read for .onion links to crawl', type=str, default=verifiedcsvfile) parser.add_argument('-V', '--verified-file', help='Input file to read for .onion links to crawl', type=str, default=verifiedcsvfile)