mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-17 04:36:57 +00:00
add crawler dir for default crawler outputs
This commit is contained in:
parent
e76d29807d
commit
63d89b9b8b
2 changed files with 9 additions and 3 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,5 +1,6 @@
|
||||||
.git
|
.git
|
||||||
www/participants/**
|
www/participants/**
|
||||||
|
crawler/**
|
||||||
scripts/__pycache__/**
|
scripts/__pycache__/**
|
||||||
.env
|
.env
|
||||||
env/
|
env/
|
|
@ -21,6 +21,9 @@ if os.path.isfile(urlpath):
|
||||||
instancepath=rootpath+'www/participants/'+instance
|
instancepath=rootpath+'www/participants/'+instance
|
||||||
verifiedcsvfile=instancepath+'/verified.csv'
|
verifiedcsvfile=instancepath+'/verified.csv'
|
||||||
blcsvfile=instancepath+'/blacklist.csv'
|
blcsvfile=instancepath+'/blacklist.csv'
|
||||||
|
crawlerdir=instancepath+'/crawler'
|
||||||
|
if not os.path.exists(crawlerdir):
|
||||||
|
os.makedirs(crawlerdir)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog='Lantern crawler',
|
prog='Lantern crawler',
|
||||||
|
@ -29,9 +32,11 @@ parser = argparse.ArgumentParser(
|
||||||
parser.add_argument('-l', '--limit',
|
parser.add_argument('-l', '--limit',
|
||||||
help='Page crawl limit per .onion link.', type=int, default=10)
|
help='Page crawl limit per .onion link.', type=int, default=10)
|
||||||
parser.add_argument('-o', '--output',
|
parser.add_argument('-o', '--output',
|
||||||
help='Output CSV file for found .onion links', type=str, default='onion_crawler.csv')
|
help='Output CSV file for found .onion links', type=str,
|
||||||
|
default=os.path.join(crawlerdir, 'onion_crawler.csv'))
|
||||||
parser.add_argument('-c', '--crawler-file',
|
parser.add_argument('-c', '--crawler-file',
|
||||||
help='Crawler CSV file to log .onion sites and the amount crawled', type=str, default='crawler.csv')
|
help='Crawler CSV file to log .onion sites and the amount crawled', type=str,
|
||||||
|
default=os.path.join(crawlerdir, 'crawler.csv'))
|
||||||
parser.add_argument('-b', '--blacklist-file', help='Blacklist CSV files to filter out sites with forbidden words in them',
|
parser.add_argument('-b', '--blacklist-file', help='Blacklist CSV files to filter out sites with forbidden words in them',
|
||||||
type=str, default=blcsvfile)
|
type=str, default=blcsvfile)
|
||||||
parser.add_argument('-V', '--verified-file', help='Input file to read for .onion links to crawl', type=str, default=verifiedcsvfile)
|
parser.add_argument('-V', '--verified-file', help='Input file to read for .onion links to crawl', type=str, default=verifiedcsvfile)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue