mirror of
http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern.git
synced 2025-05-16 20:26:58 +00:00
option 7 and 8 done
This commit is contained in:
parent
c863f71951
commit
95474141f4
4 changed files with 114 additions and 45 deletions
10
README.md
10
README.md
|
@ -9,11 +9,15 @@ DONE:
|
||||||
-php : if valid make it filter your own verified.csv and unverified.csv files
|
-php : if valid make it filter your own verified.csv and unverified.csv files
|
||||||
-py : option 9) cleanup all duplicates in your own unverified.csv and verified.csv
|
-py : option 9) cleanup all duplicates in your own unverified.csv and verified.csv
|
||||||
-py : option 10) perform sanity checks on all csv files (to mark them as sensitive or remove the ones that are blacklisted)
|
-py : option 10) perform sanity checks on all csv files (to mark them as sensitive or remove the ones that are blacklisted)
|
||||||
|
-py : option 7) Add/Remove words in the sensitive list
|
||||||
|
-py : option 8) Add/Remove words in the blacklist
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
-py : option 7) Add/Remove words in the sensitive list (assigned to anon)
|
-manual work: fit all the existing links into the current format one by one
|
||||||
-py : option 8) Add/Remove words in the blacklist (assigned to anon)
|
-php/css: make the search page preety
|
||||||
|
-doc: redo the documentation for the project
|
||||||
|
-doc: finish the blogpost about it
|
||||||
|
-release it
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -103,12 +103,12 @@ Managing Webring Participants:
|
||||||
6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
|
6) Trust/UnTrust/Blacklist a webring participant (Potentially dangerous)
|
||||||
|
|
||||||
Managing Wordlists:
|
Managing Wordlists:
|
||||||
7) Add/Remove words or links in the sensitive list (ex: drug)
|
7) Add/Remove Words/URLs in the sensitive list (ex: drug)
|
||||||
8) Add/Remove words or links in the blacklist (ex: porn)
|
8) Add/Remove Words/URLs or links in the blacklist (ex: porn)
|
||||||
|
|
||||||
Maintenance:
|
Maintenance:
|
||||||
9) remove the duplicate URLs for your own instance
|
9) Remove the duplicate URLs for your own instance
|
||||||
10) perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
|
10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted)
|
||||||
|
|
||||||
0) Exit
|
0) Exit
|
||||||
""")
|
""")
|
||||||
|
@ -712,73 +712,133 @@ Maintenance:
|
||||||
|
|
||||||
################### MANAGING WORDLISTS #################
|
################### MANAGING WORDLISTS #################
|
||||||
#Managing Wordlists:
|
#Managing Wordlists:
|
||||||
# 7) Add/Remove words or links in the sensitive list (ex: drug)
|
# 7) Add/Remove Words/URLs in the sensitive list (ex: drug)
|
||||||
# 8) Add/Remove words or links in the blacklist (ex: porn)
|
# 8) Add/Remove words or links in the blacklist (ex: porn)
|
||||||
|
|
||||||
#Maintenance:
|
#Maintenance:
|
||||||
#9) remove the duplicate URLs for your own instance
|
# 9) remove the duplicate URLs for your own instance
|
||||||
#10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
|
# 10) perform sanity checks on all csv files (all instances) (to mark them as sensitive / or remove the ones that are blacklisted)
|
||||||
#########################################################
|
#########################################################
|
||||||
|
|
||||||
case "7":
|
case "7":
|
||||||
print("[+] Add/Remove words in the sensitive list (ex: drug)")
|
print("[+] Add/Remove Words/URLs in the sensitive list (ex: drug)")
|
||||||
print("do you want to 1) add words or 2) remove words ?")
|
#secsvfile=instancepath+'/sensitive.csv' #fyi
|
||||||
|
#sedf = pd.read_csv(secsvfile) #fyi
|
||||||
option="0"
|
option="0"
|
||||||
|
|
||||||
done = False
|
done = False
|
||||||
while(done == False):
|
while(done == False):
|
||||||
while option != "1" and option != "2" and option != "exit":
|
while option != "1" and option != "2" and option != "-1":
|
||||||
option=input("do you want to 1) add words or 2) remove words ? (type exit to exit)")
|
option=input("[+] do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
|
||||||
# TODO display the contents of sensitive.csv file
|
if option == "1":
|
||||||
if option == 1:
|
word=input("[+] which Sensitive word do you want to add? (write -1 to exit) ")
|
||||||
word=input("which word do you want to add? (write 0 to exit")
|
if word == "-1":
|
||||||
if word == "0":
|
|
||||||
done = True
|
done = True
|
||||||
#True to get out of the while loop
|
#True to get out of the while loop
|
||||||
else:
|
else:
|
||||||
print("checking if word is valid")
|
print("[+] checking if the Word/URL is valid: ")
|
||||||
# TODO check if word is valid in a while loop (dont check if
|
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
||||||
# TODO if invalid! remove word at index
|
print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
|
||||||
else:
|
print('[+] Word/URL is valid, adding the word into the sensitive wordlist ')
|
||||||
index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)")
|
# add it to the sensitive wordlist
|
||||||
if index == "exit":
|
newrow=[word]
|
||||||
|
print("[+] NEWROW=",newrow)
|
||||||
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
||||||
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
||||||
|
# if it doesnt exist, add it into unverified.csv
|
||||||
|
sedf.loc[-1] = newrow # adding a row
|
||||||
|
sedf.index = sedf.index + 1 # shifting index
|
||||||
|
sedf = sedf.sort_index() # sorting by index
|
||||||
|
print("[+] New row added! now writing the csv file: ")
|
||||||
|
sedf.to_csv(secsvfile, index=False)
|
||||||
|
|
||||||
|
|
||||||
|
if option == "2":
|
||||||
|
print(sedf)
|
||||||
|
index=""
|
||||||
|
while (index not in sedf.index) and index != -1:
|
||||||
|
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
|
||||||
|
if index == -1:
|
||||||
done = True
|
done = True
|
||||||
#True to get out of the while loop
|
#True to get out of the while loop
|
||||||
else:
|
else:
|
||||||
print("checking if index is valid")
|
if (index in sedf.index):
|
||||||
# TODO check if index is valid or not
|
#if index exists, remove it
|
||||||
# TODO if valid! remove word at index
|
print("[+] removing selected index: ")
|
||||||
# TODO if invalid! just pass to ask for another word
|
sedf.drop(index, inplace= True)
|
||||||
|
sedf.to_csv(secsvfile, index=False)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print('[-] Error, invalid index')
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
case "8":
|
case "8":
|
||||||
print("[+] Add/Remove words in the blacklist list (ex: porn)")
|
print("[+] Add/Remove words in the blacklist list (ex: porn)")
|
||||||
|
#blcsvfile=instancepath+'/sensitive.csv' #fyi
|
||||||
|
#bldf = pd.read_csv(secsvfile) #fyi
|
||||||
option="0"
|
option="0"
|
||||||
|
|
||||||
done = False
|
done = False
|
||||||
while(done == False):
|
while(done == False):
|
||||||
while option != "1" and option != "2" and option != "exit":
|
while option != "1" and option != "2" and option != "-1":
|
||||||
option=input("do you want to 1) add words or 2) remove words ? (type exit to exit)")
|
option=input("[+] Do you want to 1) add or 2) remove Words/URLs? (type exit to exit) ")
|
||||||
# TODO display the contents of blacklist.csv file
|
if option == "1":
|
||||||
if option == 1:
|
word=input("[+] Which Sensitive word do you want to add? (write -1 to exit) ")
|
||||||
word=input("which word do you want to add? (write 0 to exit")
|
if word == "-1":
|
||||||
if word == "0":
|
|
||||||
done = True
|
done = True
|
||||||
#True to get out of the while loop
|
#True to get out of the while loop
|
||||||
else:
|
else:
|
||||||
print("checking if word is valid")
|
print("[+] Checking if the Word/URL is valid: ")
|
||||||
# TODO check if word is valid in a while loop (dont check if
|
if IsUrlValid(word) or IsOnionValid(word) or IsDescriptionValid(word):
|
||||||
# TODO if invalid! remove word at index
|
print(IsUrlValid(word), IsOnionValid(word), IsDescriptionValid(word))
|
||||||
else:
|
print('[+] Word/URL is valid, adding the word into the blacklist ')
|
||||||
index=input("which word do you want to remove? (index 0 to (max index) (write exit to exit)")
|
# add it to the sensitive wordlist
|
||||||
if index == "exit":
|
newrow=[word]
|
||||||
|
print("[+] NEWROW=",newrow)
|
||||||
|
# (rest is automatic: status, score, instance is = '' because it is your own instance)
|
||||||
|
# check if the entry doesn't already exist in verified.csv and in unverified.csv
|
||||||
|
# if it doesnt exist, add it into unverified.csv
|
||||||
|
bldf.loc[-1] = newrow # adding a row
|
||||||
|
bldf.index = bldf.index + 1 # shifting index
|
||||||
|
bldf = bldf.sort_index() # sorting by index
|
||||||
|
print("[+] New row added! now writing the csv file: ")
|
||||||
|
bldf.to_csv(blcsvfile, index=False)
|
||||||
|
|
||||||
|
|
||||||
|
if option == "2":
|
||||||
|
print(bldf)
|
||||||
|
index=""
|
||||||
|
while (index not in bldf.index) and index != -1:
|
||||||
|
index=int(input("which word do you want to remove? (index 0 to (max index) (write -1 to exit) "))
|
||||||
|
if index == -1:
|
||||||
done = True
|
done = True
|
||||||
#True to get out of the while loop
|
#True to get out of the while loop
|
||||||
else:
|
else:
|
||||||
print("checking if index is valid")
|
if (index in bldf.index):
|
||||||
# TODO check if index is valid or not
|
#if index exists, remove it
|
||||||
# TODO if valid! remove word at index
|
print("[+] removing selected index: ")
|
||||||
# TODO if invalid! just pass to ask for another word
|
bldf.drop(index, inplace= True)
|
||||||
|
bldf.to_csv(blcsvfile, index=False)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print('[-] Error, invalid index')
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file)
|
# CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file)
|
||||||
case "9":
|
case "9":
|
||||||
print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
|
print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)")
|
||||||
|
@ -977,6 +1037,9 @@ def IsUrlValid(url:str)->bool:
|
||||||
#if not : return False
|
#if not : return False
|
||||||
pattern = re.compile("^[A-Za-z0-9:/.]+$")
|
pattern = re.compile("^[A-Za-z0-9:/.]+$")
|
||||||
url = str(url)
|
url = str(url)
|
||||||
|
if len(url) < 4:
|
||||||
|
#print("Status: Got more than one character or nothing.")
|
||||||
|
return False
|
||||||
if url.endswith('.onion'):
|
if url.endswith('.onion'):
|
||||||
return IsOnionValid(url)
|
return IsOnionValid(url)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,2 +1,5 @@
|
||||||
blacklisted-words
|
blacklisted-words
|
||||||
|
pr0n
|
||||||
|
pron
|
||||||
|
teen
|
||||||
porn
|
porn
|
||||||
|
|
|
|
@ -2,4 +2,3 @@ sensitive-words
|
||||||
Market
|
Market
|
||||||
market
|
market
|
||||||
drug
|
drug
|
||||||
|
|
||||||
|
|
|
Loading…
Add table
Add a link
Reference in a new issue