make it cool now

2025-06-30 22:06:40 +00:00 · 2025-01-19 17:53:52 +01:00 · 2025-01-19 17:53:52 +01:00 · 4d4567caf0
commit 4d4567caf0
parent f23a6e3ac8
40 changed files with 504 additions and 611 deletions
--- a/scripts/tests/csvwork2.py
+++ b/scripts/tests/csvwork2.py
@ -1,255 +0,0 @@
-import csv, json, pandas as pd, glob, re
-
-def main():
-	#print("aaa")
-	vcsvfile="verified.csv"
-	vdf = pd.read_csv(vcsvfile)
-	unverifiedcsvfile="unverified.csv"
-	uvdf = pd.read_csv(unverifiedcsvfile)
-	blcsvfile="blacklist.csv"
-	bldf = pd.read_csv(blcsvfile)
-	secsvfile="sensitive.csv"
-	sedf = pd.read_csv(secsvfile)
-
-	###########
-	# list every word in the blacklist wordlist csv file
-		# for each word, check if it matches with any of the rows in unverified.csv 
-			# if it matches (on any column!), remove that row and write to the csv file
-
-
-
-	########### SANITY CHECKS ON UNVERIFIED.CSV ##################
-	#print(bldf[['blacklisted-words']])
-	bldf[['blacklisted-words']].iterrows()
-	rows2delete= [] # it is an empty list at first
-	for i,j in uvdf.iterrows():
-		#print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description'])
-		#print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]])
-		#row=uvdf.iloc[[i]] #it displays the index
-		row=uvdf.loc[i,:].values.tolist() 
-		#print(i,row)
-
-		### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion###
-		#print("[+] ROW=",i,"ROW CONTENTS=",  IsUrlValid(uvdf.at[i, 'Instance']),  IsCategoryValid(uvdf.at[i, 'Category']),  IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']),  IsDescriptionValid(uvdf.at[i, 'Description']),  IsStatusValid(uvdf.at[i, 'Status']),  IsScoreValid(uvdf.at[i, 'Score']))
-		if IsUrlValid(uvdf.at[i, 'Instance']) is False or IsCategoryValid(uvdf.at[i, 'Category']) is False or IsNameValid(uvdf.at[i, 'Name']) is False or IsUrlValid(uvdf.at[i, 'URL']) is False or IsStatusValid(uvdf.at[i, 'Sensitive']) is False or  IsDescriptionValid(uvdf.at[i, 'Description']) is False or  IsStatusValid(uvdf.at[i, 'Status']) is False or  IsScoreValid(uvdf.at[i, 'Score']) is False:
-			#mark the row for deletion as it has invalid inputs
-			if i not in rows2delete:
-				print("Marking row", i,"for deletion, as it has invalid inputs")
-				rows2delete.append(i) #mark the row for deletion if not already done
-
-		### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ###
-		for k,l in bldf.iterrows():
-			#print("[+] Blacklisted word=",k,  bldf.at[k, 'blacklisted-words'])
-			blword=bldf.at[k, 'blacklisted-words']	
-			if any(blword in str(x) for x in row) == True:
-				#print("found blacklisted word! marking row for deletion")
-				if i not in rows2delete:
-					print("Marking row", i,"for deletion, as it matches with a blacklisted word")
-					rows2delete.append(i) #mark the row for deletion if not already done
-		### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ###
-		for k,l in sedf.iterrows():
-			#print("[+] Sensitive word=",k,  sedf.at[k, 'sensitive-words'])
-			seword=sedf.at[k, 'sensitive-words']	
-			if any(seword in str(x) for x in row) == True:
-					if uvdf.at[i, 'Sensitive'] != '✔️':
-						print("Marking row", i,"as sensitive, as it matches with a sensitive word")
-						uvdf.at[i, 'Sensitive']='✔️'
-
-	print('[-] Rows to delete: ',rows2delete)
-
-	for i in rows2delete:
-		row=uvdf.loc[i,:].values.tolist() 
-		print('[+] REMOVING ROW :',i,row)
-		uvdf.drop(i, inplace= True)
-		uvdf.to_csv(unverifiedcsvfile, index=False)
-	##############################################
-	
-	# list every word in the sensitive wordlist csv file
-		# if it matches (on any column!), mark the sensitive column as V
-
-	
-	#############################################################################
-	return True
-	##############################################################################
-
-
-#### PROTECTIONS AGAINST MALICIOUS CSV INPUTS ####
-
-def IsOnionValid(url: str)-> bool:
-    """
-    Checks if the domain(param) is a valid onion domain and return True else False.
-    """
-        # check if the characters are only [a-zA-Z0-9.] with maximum 128 chars max?
-                # check that it is only url.onion or subdomain.url.onion,
-                        #  if OK return True
-        #if not : return False
-    try:
-        pattern = re.compile("^[A-Za-z0-9.]+(\.onion)?$")
-        url = url.strip().removesuffix('/')
-        if url.startswith('http://'):
-            #print('URL starts with http')
-            # Removes the http://
-            domain = url.split('/')[2]
-            if pattern.fullmatch(domain) is not None:
-                if len(domain.split('.')) > 3:
-                    n_subdomians = len(domain.split('.'))
-                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
-                    #print(f"This domain have more than one subdomain. There are {n_subdomians} subdomains")
-                    return False
-                else:
-                    if len(domain) < 62:
-                        #print("Domain length is less than 62.")
-                        return False
-                    return True
-            elif pattern.fullmatch(domain) is None:
-                #print("Domain contains invalid character.")
-                #print(domain)
-                return False
-            else:
-                #print("Domain not valid")
-                return False
-        else:
-            #print("URL doesn't start http")
-            if pattern.fullmatch(url) is not None:
-                if len(url.split('.')) > 3:
-                    n_subdomians = len(url.split('.'))
-                    # Checks if there is more than 1 subdomain. "subdomain.url.onion" only
-                    #print(f"This domain have more than one subdomain. There are {n_subdomians - 1} subdomains")
-                    return False
-                else:
-                    if len(url) < 62:
-                        #print("Domain length is less than 62.")
-                        return False
-                    return True
-            elif pattern.fullmatch(url) is None:
-                #print("Domain contains invalid character.")
-                #print(url)
-                return False
-            else:
-                #print("Domain not valid")
-                return False
-    except Exception as e:
-        print(f"Error: {e}")
-
-
-
-def IsUrlValid(url:str)->bool:
-        """
-        Check if url is valid both dark net end clearnet.
-        """
-        # check if the characters are only [a-zA-Z0-9.:/] with maximum 128 chars max?
-                # check that it is only http(s)://wordA.wordB or http(s)://WordC.WordB.WordC, (onion or not), clearnet is fine too (double check if those are fine!)
-                        #  if OK return True
-        #if not : return False
-        pattern  = re.compile("^[A-Za-z0-9:/.]+$")
-        if url.endswith('.onion'):
-                return IsOnionValid(url)
-        else:
-                if not url.__contains__('.'):
-                        #print("No (DOT) in clearnet url")
-                        return False
-                if pattern.fullmatch(url) is None:
-                        #print('Url contains invalid chars')
-                        return False
-                return True
-
-def IsStatusValid(status: str)-> bool:
-        """
-        Checks if status contains only [v,x,❌,✔️]. Verbose only if False is returned
-        """
-        pattern = ['y','n','✔️','❌','','nan']
-        status = str(status)
-        status.strip()
-        #print('[+] STATUS = ',status.splitlines())
-        if len(status) > 4:
-                #print("Status: Got more than one character or nothing.")
-                return False
-        elif (status not in pattern):
-                #print("Status: Got an invalid character it must be either y, n, ✔️, or ❌ ")
-                return False
-
-        return True
-
-def IsScoreValid(score:str)->bool:
-        """
-        Check the Score is only "^[0-9.,]+$" with 8 max chars.
-        """
-        # check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max
-                #(careful with the ' and , make sure you test if it fucks the csv up or else)
-                #  if OK return True
-        #if not : return False
-        pattern = re.compile("^[0-9.,]+$")
-        score = str(score)
-        score.strip()
-        if pattern.fullmatch(score) is None:
-                # empty description is fine as it's optional
-                return False
-        elif len(score) > 8:
-                #print("score is greater than 8 chars")
-                return False
-        # empty score is fine
-        return True
-
-
-def IsDescriptionValid(desc:str)->bool:
-        """
-        Check the categories are only [a-zA-Z0-9,.' ] with 256 max chars.
-        """
-        # check if the characters are only [a-zA-Z0-9.,' ] with maximum 256 chars max
-                #(careful with the ' and , make sure you test if it fucks the csv up or else)
-                #  if OK return True
-        #if not : return False
-        pattern = re.compile("^[A-Za-z0-9-.,' ]+$")
-        desc.strip()
-        if pattern.fullmatch(desc) is None:
-                # empty description is fine as it's optional
-                return False
-        if desc == "DEFAULT":
-                return False
-        elif len(desc) > 256:
-                #print("desc is greater than 256 chars")
-                return False
-        return True
-
-def IsCategoryValid(categories: list)-> bool:
-        """
-        Check the categories are only [a-zA-Z0-9 ] with 64 max chars.
-        """
-        # check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max
-                #(careful with the ' and , make sure you test if it fucks the csv up or else)
-                #  if OK return True
-        #if not : return False
-        pattern = re.compile("^[A-Za-z0-9 ]+$")
-        for category in categories:
-                category.strip()
-                if pattern.fullmatch(category) is None:
-                        #print('Got an empty list or invalid chars')
-                        return False
-                elif len(category) > 64:
-                        #print('Category is too long')
-                        return False
-                else:
-                       return True
-
-def IsNameValid(name: str)->bool:
-        """
-        Check the parameter name only contains [a-zA-Z0-9 ] and is 64 chars long.
-        """
-        # check if the characters are only [a-zA-Z0-9 ] with maximum 64 chars max
-                #(careful with the ' and , make sure you test if it fucks the csv up or else)
-                #  if OK return True
-        #if not : return False
-        pattern = re.compile("^[A-Za-z0-9 ]+$")
-        name = name.strip()
-        if (pattern.fullmatch(name) is None):
-                #print("Got an invalid character or nothing")
-                return False
-        elif len(name) > 64:
-                #print(f'Got a name length greater than 64. {len(name)}')
-                return False
-        return True
-
-
-	
-if __name__ == '__main__':
-	main()