From cff061f9f2d2ca6ee79ab78429413708626222da Mon Sep 17 00:00:00 2001 From: root Date: Wed, 15 Jan 2025 13:45:49 +0100 Subject: [PATCH] updated --- scripts/darknet_exploration.py | 99 ++++++++++++++++++- .../unverified.csv | 16 --- 2 files changed, 94 insertions(+), 21 deletions(-) diff --git a/scripts/darknet_exploration.py b/scripts/darknet_exploration.py index e30c2b8..76a2c90 100644 --- a/scripts/darknet_exploration.py +++ b/scripts/darknet_exploration.py @@ -782,14 +782,103 @@ Maintenance: # TODO CASE 9 : cleanup all duplicates in unverified + verified.csv, based on the url (check if each url appears more than once, and if they do, remove them + write to csv file) case "9": print("[+] 9) Cleaning up all duplicates in your own unverified + verified.csv (based on the url)") - # TODO for unverified.csv, and verified.csv - # TODO iterate through each row of the csv file - # TODO get the index of that row (save it as indextocheck), and the url in that row - # TODO for unverified.csv, and verified.csv - # check if that URL appears more than once in both unverified.csv and verified.csv, # ignore it if the index is "indextocheck" and if the index is already listed in rows2delete # else: add the index to "rows2delete" # go drop the rows by their index listed in "rows2delete" +################################ CHECKING FOR DUPLICATES! ######################### + # for unverified.csv, and verified.csv + for w in ['verified.csv','unverified.csv']: + #instancepath=rootpath+'www/participants/'+instance # fyi + csvfilepath=instancepath+'/'+w + print(csvfilepath) + csvdf = pd.read_csv(csvfilepath) + rows2deletevdf= [] # it is an empty list at first + rows2deleteuvdf= [] # it is an empty list at first + # iterate through each row of the csv file + for i,j in csvdf.iterrows(): + #print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description']) + #print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]]) + #row=uvdf.iloc[[i]] #it displays the index + row=csvdf.loc[i,:].values.tolist() + print(row) + + # for each link in the participant's verified/unverified csv files, + # check if the link is already listed in your own verified.csv or unverified.csv + filterterm=csvdf.at[i, 'URL'] + filter_vdf= vdf[vdf.URL.str.contains(filterterm)] + filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm)] + # for unverified.csv, and verified.csv ; + if len(filter_vdf.index) > 1: + # if link exists more than once in verified.csv, remove the duplicates in verified.csv + for m,n in filter_vdf.iterrows(): + if m != i and m not in rows2deletevdf: + rows2deletevdf.append(m) #mark the DUPLICATE ROWS for deletion, meaning the ones that are not i!!! if not already done + #TODO make sure it doesnt mark the previous i indexes for deletion (as it is a duplicate of the next row) + if len(filter_vdf.index) == 1: + # if link exists ONCE in verified.csv check that it doesnt in unverified.csv: + if len(filter_uvdf.index) >= 1: + # if link exists ONCE in verified.csv AND in unverified.csv, cleanup the duplicates in unverified.csv - KO + for m,n in filter_uvdf.iterrows(): + if m != i and m not in rows2deleteuvdf: + rows2deleteuvdf.append(m) #mark the DUPLICATE ROWS for deletion, meaning the ones that are not i!!! if not already done + #TODO make sure it doesnt mark the previous i indexes for deletion (as it is a duplicate of the next row) + + #else: + # link only exists in verified.csv, and not in unverified.csv - OK + if len(filter_vdf.index) == 0: + # if link does not exist in verified.csv, check if it exists in unverified.csv: + if len(filter_uvdf.index) > 1: + # link exists more than once in unverified.csv, get rid of the duplicates in unverified.csv - KO + for m,n in filter_uvdf.iterrows(): + if m != i and m not in rows2deleteuvdf: + rows2deleteuvdf.append(m) #mark the DUPLICATE ROWS for deletion, meaning the ones that are not i!!! if not already done + #TODO make sure it doesnt mark the previous i indexes for deletion (as it is a duplicate of the next row) + #else: + # link either exists once or doesnt exist in unverified.csv, therefore OK + #rows2deletevdf.sort() + #reverse it so that it starts removing the last elements first + #rows2deletevdf = rows2deletevdf[::-1] + print("ROWS TO DELETE IN VERIFIED.CSV:", rows2deletevdf) + if rows2deletevdf != []: + for p in rows2deletevdf: + row=vdf.loc[p,:].values.tolist() + print('[+] REMOVING ROW :',p,row) + vdf.drop(p, inplace= True) + vdf.to_csv(verifiedcsvfile, index=False) + rows2deletevdf= [] # it is an empty list at first + + + #rows2deleteuvdf.sort() + #reverse it so that it starts removing the last elements first + #rows2deleteuvdf = rows2deleteuvdf[::-1] + print("ROWS TO DELETE IN UNVERIFIED.CSV:", rows2deleteuvdf) + if rows2deleteuvdf != []: + for p in rows2deleteuvdf: + row=uvdf.loc[p,:].values.tolist() + print('[+] REMOVING ROW :',p,row) + uvdf.drop(p, inplace= True) + uvdf.to_csv(unverifiedcsvfile, index=False) + rows2deleteuvdf= [] # it is an empty list at first + + + + + + + #if len(filter_uvdf.index) == 1 and len(filter_vdf.index) == 1: + # if link exists only ONCE in verified.csv or unverified.csv, then skip + # if link doesnt exist in either of your verified/unverified csv files, + # then add it to your own unverified.csv file: + # newrow=row + # uvdf.loc[-1] = newrow # adding a row + # uvdf.index = uvdf.index + 1 # shifting index + # uvdf = uvdf.sort_index() # sorting by index + # uvdf.to_csv(unverifiedcsvfile, index=False) + # print("[+] ") + #else: + # print('[-] Skipping row as it is already added in ',w,row,) + + case "10": print("[+] 10) perform sanity checks on all csv files (to mark them as sensitive / or remove the ones that are blacklisted)") # TODO find the list of all csv files (in www/participants/*/*.csv) (templates should remain empty by default) diff --git a/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/unverified.csv b/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/unverified.csv index 3933f56..aaa4ba0 100644 --- a/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/unverified.csv +++ b/www/participants/webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion/unverified.csv @@ -1,18 +1,2 @@ Instance,Category,Name,URL,Sensitive,Description,Status,Score -uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,Infos and Links,Tor Taxi,http://tortaxi2dev6xjwbaydqzla77rrnth7yn2oqzjfmiuwn5h6vsk2a4syd.onion/,,List of links to go to popular darknet places,✔️,100.0 -uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,Infos and Links,Tor Taxi,http://tortaxi2dev6xjwbaydqzla77rrnth7yn2oqzjfmiuwn5h6vsk2a4syd.onion/,,List of links to go to popular darknet places,✔️,100.0 -uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,Infos and Links,Tor Taxi,http://tortaxi2dev6xjwbaydqzla77rrnth7yn2oqzjfmiuwn5h6vsk2a4syd.onion/,,List of links to go to popular darknet places,✔️,100.0 uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,Infos and Links,Tor Taxi,http://tortaxi2dev6xjwbaydqzldruga77rrnth7yn2oqzjfmiuwn5h6vsk2a4syd.onion/,✔️,List of links to go to popular darknet places,❌,0.0 -uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,Infos and Links,Tor Taxi,http://tortaxi2dev6xjwbaydqzla77rrnth7yn2oqzjfmiuwn5h6vsk2a4syd.onion/,,List of links to go to popular darknet places,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Hackliberty,Hackliberty Gitea,http://vkp7367tcjpqdwwckigrdrvmwvispvbpg5rlsr2chjxvppfg7hipagyd.onion,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Hackliberty,Hackliberty Gitea,http://vkp7367tcjpqdwwckigrdrvmwvispvbpg5rlsr2chjxvppfg7hipagyd.onion,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Hackliberty,Hackliberty Gitea,http://vkp7367tcjpqdwwckigrdrvmwvispvbpg5rlsr2chjxvppfg7hipagyd.onion,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Hackliberty,Hackliberty Gitea,http://vkp7367tcjpqdwwckigrdrvmwvispvbpg5rlsr2chjxvppfg7hipagyd.onion,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Forums,Hackliberty Forum,http://yw7nc56v4nsudvwewhmhhwltxpncedfuc43qbubj4nmwhdhwtiu4o6yd.onion/,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Forums,Hackliberty Forum,http://yw7nc56v4nsudvwewhmhhwltxpncedfuc43qbubj4nmwhdhwtiu4o6yd.onion/,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Forums,Hackliberty Forum,http://yw7nc56v4nsudvwewhmhhwltxpncedfuc43qbubj4nmwhdhwtiu4o6yd.onion/,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Forums,Hackliberty Forum,http://yw7nc56v4nsudvwewhmhhwltxpncedfuc43qbubj4nmwhdhwtiu4o6yd.onion/,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Communities,Hackliberty main website,http://kj3wvs3wyfhm3uhhuqxlrhhcp6dneuau4mmvptlor27ghmrqx63fqnid.onion/,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Communities,Hackliberty main website,http://kj3wvs3wyfhm3uhhuqxlrhhcp6dneuau4mmvptlor27ghmrqx63fqnid.onion/,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Communities,Hackliberty main website,http://kj3wvs3wyfhm3uhhuqxlrhhcp6dneuau4mmvptlor27ghmrqx63fqnid.onion/,,,✔️,100.0 -webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Communities,Hackliberty main website,http://kj3wvs3wyfhm3uhhuqxlrhhcp6dneuau4mmvptlor27ghmrqx63fqnid.onion/,,,✔️,100.0