diff --git a/scripts/darknet_exploration.py b/scripts/darknet_exploration.py index 9689e30..b4c5e13 100644 --- a/scripts/darknet_exploration.py +++ b/scripts/darknet_exploration.py @@ -131,46 +131,157 @@ def main(): print("[-] ERROR, incorrect input") -def IsOnionValid(domain): - # check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max? - # check that it is only url.onion or subdomain.url.onion, - # if OK return True - #if not : return False - return True +#### PROTECTIONS AGAINST MALICIOUS CSV INPUTS #### -def IsUrlValid(url): - # check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max? - # check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!) - # if OK return True - #if not : return False - return True -def IsStatusValid(status): - # check if the characters are only [vx] with maximum 1 chars max - # if OK return True - #if not : return False - return True +def IsOnionValid(url: str)-> bool: + """ + Checks if the domain(param) is a valid onion domain and return True else False. + """ + # check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max? + # check that it is only url.onion or subdomain.url.onion, + # if OK return True + #if not : return False + try: + pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$") + url = url.strip().removesuffix('/') + if url.startswith('http://'): + print('URL starts with http') + # Removes the http:// + domain = url.split('/')[2] + if pattern.fullmatch(domain) is not None: + if len(domain.split('.')) > 3: + n_subdomians = len(domain.split('.')) + # Checks if there is more than 1 subdomain. "subdomain.url.onion" only + print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains") + return False + else: + if len(domain) < 62: + print("Domain length is less than 62.") + return False + return True + elif pattern.fullmatch(domain) is None: + print("Domain contains invalid character.") + print(domain) + return False + else: + print("Domain not valid") + return False + else: + print("URL doesn't start http") + if pattern.fullmatch(url) is not None: + if len(url.split('.')) > 3: + n_subdomians = len(url.split('.')) + # Checks if there is more than 1 subdomain. "subdomain.url.onion" only + print(f"This domain have more than one subdomain. There are {n_subdomians - 1} subdomains") + return False + else: + if len(url) < 62: + print("Domain length is less than 62.") + return False + return True + elif pattern.fullmatch(url) is None: + print("Domain contains invalid character.") + print(url) + return False + else: + print("Domain not valid") + return False + except Exception as e: + print(f"Error: {e}") -def IsDescriptionValid(desc): - # check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max - #(careful with the ' and , make sure you test if it fucks the csv up or else) - # if OK return True - #if not : return False - return True -def IsCategoryValid(category): - # check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max - #(careful with the ' and , make sure you test if it fucks the csv up or else) - # if OK return True - #if not : return False - return True -def IsNameValid(name): - # check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max - #(careful with the ' and , make sure you test if it fucks the csv up or else) - # if OK return True - #if not : return False - return True +def IsUrlValid(url:str)->bool: + """ + Check if url is valid both dark net end clearnet. + """ + # check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max? + # check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!) + # if OK return True + #if not : return False + pattern = re.compile("^[A-Za-z0-9:/.]+$") + if url.endswith('.onion'): + return IsOnionValid(url) + else: + if not url.__contains__('.'): + print("No (DOT) in url") + return False + if pattern.fullmatch(url) is None: + print('Url contains invalid chars') + return False + return True + +def IsStatusValid(status: str)-> bool: + """ + Checks if status contains only [v,w]. Verbose only if False is returned + """ + # check if the characters are only [vx] with maximum 1 chars max + # if OK return True + #if not : return False + pattern = ['v','x'] + if len(status) != 1: + print("Got more than one character or nothing.") + return False + elif (status not in pattern): + print("Got an invalid character") + return False + return True + +def IsDescriptionValid(desc:str)->bool: + """ + Check the categories are only [a-zA-Z0-9,.' ] with 256 max chars. + """ + # check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max + #(careful with the ' and , make sure you test if it fucks the csv up or else) + # if OK return True + #if not : return False + pattern = re.compile("^[A-Za-z0-9-.,' ]+$") + desc.strip() + if pattern.fullmatch(desc) is None: + print('Got an empty desc or invalid chars') + return False + elif len(desc) > 256: + print("desc is greater than 256 chars") + return False + return True + +def IsCategoryValid(categories: list)-> bool: + """ + Check the categories are only [a-zA-Z0-9 ] with 64 max chars. + """ + # check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max + #(careful with the ' and , make sure you test if it fucks the csv up or else) + # if OK return True + #if not : return False + pattern = re.compile("^[A-Za-z0-9 ]+$") + for category in categories: + category.strip() + if pattern.fullmatch(category) is None: + print('Got an empty list or invalid chars') + return False + elif len(category) > 64: + print('Category is too long') + else: + return True + +def IsNameValid(name: str)->bool: + """ + Check the parameter name only contains [a-zA-Z0-9 ] and is 64 chars long. + """ + # check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max + #(careful with the ' and , make sure you test if it fucks the csv up or else) + # if OK return True + #if not : return False + pattern = re.compile("^[A-Za-z0-9 ]+$") + name = name.strip() + if (pattern.fullmatch(name) is None): + print("Got an invalid character or nothing") + return False + elif len(name) > 64: + print(f'Got a name lenght greater than 64. {len(name)}') + return False + return True