From 49e39f481cf0f7d57789cba2a06824cab0a0d845 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 12 Jan 2025 18:14:21 +0100 Subject: [PATCH] fixed the synchronization feature --- scripts/darknet_exploration.py | 248 +++++++++++------- .../unverified.csv | 3 + .../webring-participants.csv | 1 + 3 files changed, 162 insertions(+), 90 deletions(-) diff --git a/scripts/darknet_exploration.py b/scripts/darknet_exploration.py index efeb26f..d3a079c 100644 --- a/scripts/darknet_exploration.py +++ b/scripts/darknet_exploration.py @@ -259,105 +259,169 @@ Maintenance: for participant in os.listdir(participantsdir): participantdir=participantsdir+participant #print(participant) - # TODO check if the webring participant is yourself, if it is, then skip it + + # NOTE check if the webring participant is yourself, if it is, then skip it if participant != myinstance: # prod: dont use your own intance #if participant == myinstance: # preprod testing only on your own instance - #print("[+] Webring Participant is valid, adding it if it's not already added.") - print('[+] PARTICIPANT=',participant) - # check if the participant is already listed in webring-participants.csv or not, and add them if not already listed - # and display only the matching entries in unverified.csv in an array format (display it in CLI). - filter_wdf = wdf[wdf.URL.str.contains(participant)] - #print(filter_wdf[['Name','URL']]) - # check if there are no results, dont proceed if there are none! - if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed - newrow=[name,participant,desc,trusted,status,score] - #print("[+] NEWROW=",newrow) - wdf.loc[-1] = newrow # adding a row - wdf.index = wdf.index + 1 # shifting index - wdf = wdf.sort_index() # sorting by index - print("[+] New row added! now writing the csv file:",webringcsvfile) - wdf.to_csv(webringcsvfile, index=False) - else: - print('[+] Webring participant is already listed in your own webring-participants.csv file!') + #TODO overwrite the existing files in the participant's directory, with their version (download all the csv files from them again) + basewurl='http://'+participant+'/participants/'+participant+'/' + print(basewurl) + print('[+] Downloading the files of ',participant, ": ") + w_vcsv=basewurl+'verified.csv' + w_uvcsv=basewurl+'unverified.csv' + #print(CheckUrl(w_uvcsv)) + w_blcsv=basewurl+'blacklist.csv' + #print(CheckUrl(w_blcsv)) + w_scsv=basewurl+'sensitive.csv' + #print(CheckUrl(w_scsv)) + w_webcsv=basewurl+'webring-participants.csv' + #print(CheckUrl(w_webcsv)) - # iterate through the participant's verified.csv and unverified.csv files - for w in ['verified.csv','unverified.csv']: - csvfilepath=participantdir+'/'+w - print(csvfilepath) - csvdf = pd.read_csv(csvfilepath) - #print(bldf[['blacklisted-words']]) - bldf[['blacklisted-words']].iterrows() - rows2delete= [] # it is an empty list at first - for i,j in csvdf.iterrows(): - #print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description']) - #print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]]) - #row=uvdf.iloc[[i]] #it displays the index - row=csvdf.loc[i,:].values.tolist() - print(row) - #print(i,row) + # verify that their verified.csv csv file exists at basewurl+'verified.csv' + if CheckUrl(w_vcsv) is False or CheckUrl(w_uvcsv) is False or CheckUrl(w_blcsv) is False or CheckUrl(w_scsv) is False or CheckUrl(w_webcsv) is False: + print("[-] Webring Participant isn't reachable, skipping") + #return False #dont do anything if the webring participant isnt reachable. + else: #if the webring participant is reachable, proceed + print("[+] Webring Participant is reachable, updating their csv files:") + for i in ['verified.csv','unverified.csv','blacklist.csv','sensitive.csv','webring-participants.csv']: + # FOR EACH CSV FILE TO GET: + # URL: basewurl / FILE.CSV + # PATH: participantdir / FILE.CSV + #print('[+] DOWNLOADING ',basewurl+i) + # download the external csv file and save it into the "text" variable: + #response = urllib.request.urlopen(basewurl+i) + response = requests.get(basewurl+i, proxies=proxies) + #data = response.read() # a `bytes` object + #text = data.decode('utf-8') + text = response.text + # save the text variable into the destination file: + #print('[+] SAVING IT INTO ',participantdir+'/'+i) + csvfilepath=participantdir+'/'+i + with open(csvfilepath, "w") as file: + file.write(text) + #print("[+] file written, let's read it") + f = open(csvfilepath,"r") + #print(f.read()) + + # download the banner.png image: + + bannerurl=basewurl+'banner.png' + bannerpath=participantdir+'/banner.png' + r = requests.get(bannerurl, stream=True, proxies=proxies) + with open(bannerpath, 'wb') as f: + r.raw.decode_content = True + shutil.copyfileobj(r.raw, f) + + # SANITY CHECK ON THE BANNER PNG IMAGE: + if IsBannerValid(bannerpath): + #print('[+] Banner is valid') + pass + else: + # if false, overwrite it with the template banner png file + #print('[-] Banner is not valid, replacing it with the default banner') + os.remove(bannerpath) + # copy templates/banner.png to bannerpath + bannertemplatepath=templatepath+'banner.png' + shutil.copyfile(bannertemplatepath, bannerpath) + + + #print("[+] Webring Participant is valid, adding it if it's not already added.") + #print('[+] PARTICIPANT=',participant) + # check if the participant is already listed in webring-participants.csv or not, and add them if not already listed + # and display only the matching entries in unverified.csv in an array format (display it in CLI). + filter_wdf = wdf[wdf.URL.str.contains(participant)] + #print(filter_wdf[['Name','URL']]) + # check if there are no results, dont proceed if there are none! + if filter_wdf.size == 0: #skip if webring participant is already listed, otherwise proceed + newrow=[name,participant,desc,trusted,status,score] + #print("[+] NEWROW=",newrow) + wdf.loc[-1] = newrow # adding a row + wdf.index = wdf.index + 1 # shifting index + wdf = wdf.sort_index() # sorting by index + #print("[+] New row added! now writing the csv file:",webringcsvfile) + wdf.to_csv(webringcsvfile, index=False) + else: + pass + #print('[+] Webring participant is already listed in your own webring-participants.csv file!') + + # iterate through the participant's verified.csv and unverified.csv files + for w in ['verified.csv','unverified.csv']: + csvfilepath=participantdir+'/'+w + print(csvfilepath) + csvdf = pd.read_csv(csvfilepath) + #print(bldf[['blacklisted-words']]) + bldf[['blacklisted-words']].iterrows() + rows2delete= [] # it is an empty list at first + for i,j in csvdf.iterrows(): + #print("[+] Unverified.csv ROW=",i, uvdf.at[i, 'Instance'], uvdf.at[i, 'Category'], uvdf.at[i, 'Name'], uvdf.at[i, 'URL'], uvdf.at[i, 'Description']) + #print("[+] Unverified.csv ROW=",i, uvdf.iloc[[i]]) + #row=uvdf.iloc[[i]] #it displays the index + row=csvdf.loc[i,:].values.tolist() + print(row) + #print(i,row) - ################################ SANITY CHECKS #################################### - ### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion### - #print("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score'])) - if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False: - #mark the row for deletion as it has invalid inputs - if i not in rows2delete: - print("Marking row", i,"for deletion, as it has invalid inputs") - rows2delete.append(i) #mark the row for deletion if not already done - - ### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ### - for k,l in bldf.iterrows(): - #print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words']) - blword=bldf.at[k, 'blacklisted-words'] - if any(blword in str(x) for x in row) == True: - #print("found blacklisted word! marking row for deletion") + ################################ SANITY CHECKS #################################### + ### SANITY CHECK 1: Mark all the rows that have incorrect formatting for deletion### + #print("[+] ROW=",i,"ROW CONTENTS=", IsUrlValid(uvdf.at[i, 'Instance']), IsCategoryValid(uvdf.at[i, 'Category']), IsNameValid(uvdf.at[i, 'Name']), IsUrlValid(uvdf.at[i, 'URL']), IsStatusValid(uvdf.at[i, 'Sensitive']), IsDescriptionValid(uvdf.at[i, 'Description']), IsStatusValid(uvdf.at[i, 'Status']), IsScoreValid(uvdf.at[i, 'Score'])) + if IsUrlValid(csvdf.at[i, 'Instance']) is False or IsCategoryValid(csvdf.at[i, 'Category']) is False or IsNameValid(csvdf.at[i, 'Name']) is False or IsUrlValid(csvdf.at[i, 'URL']) is False or IsStatusValid(csvdf.at[i, 'Sensitive']) is False or IsDescriptionValid(csvdf.at[i, 'Description']) is False or IsStatusValid(csvdf.at[i, 'Status']) is False or IsScoreValid(csvdf.at[i, 'Score']) is False: + #mark the row for deletion as it has invalid inputs if i not in rows2delete: - print("Marking row", i,"for deletion, as it matches with a blacklisted word") + print("Marking row", i,"for deletion, as it has invalid inputs") rows2delete.append(i) #mark the row for deletion if not already done - else: - # not a blacklisted link, therefore it is suitable to be added to your own csv files: - ################################ CHECKING FOR DUPLICATES! ######################### - # for each link in the participant's verified/unverified csv files, - # check if the link is already listed in your own verified.csv or unverified.csv - filterterm=csvdf.at[i, 'URL'] - filter_vdf= vdf[vdf.URL.str.contains(filterterm)] - filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm)] - if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0: - #if link doesnt exist in either of your verified/unverified csv files, - # then add it to your own unverified.csv file: - newrow=row - uvdf.loc[-1] = newrow # adding a row - uvdf.index = uvdf.index + 1 # shifting index - uvdf = uvdf.sort_index() # sorting by index - uvdf.to_csv(unverifiedcsvfile, index=False) - print("[+] New row added to your own unverified.csv file!") + + ### SANITY CHECK 2: Mark all rows that are not allowed (blacklist) for deletion ### + for k,l in bldf.iterrows(): + #print("[+] Blacklisted word=",k, bldf.at[k, 'blacklisted-words']) + blword=bldf.at[k, 'blacklisted-words'] + if any(blword in str(x) for x in row) == True: + #print("found blacklisted word! marking row for deletion") + if i not in rows2delete: + print("Marking row", i,"for deletion, as it matches with a blacklisted word") + rows2delete.append(i) #mark the row for deletion if not already done else: - print('[-] Skipping row as it is already added in ',w,row,) + # not a blacklisted link, therefore it is suitable to be added to your own csv files: + ################################ CHECKING FOR DUPLICATES! ######################### + # for each link in the participant's verified/unverified csv files, + # check if the link is already listed in your own verified.csv or unverified.csv + filterterm=csvdf.at[i, 'URL'] + filter_vdf= vdf[vdf.URL.str.contains(filterterm)] + filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm)] + if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0: + #if link doesnt exist in either of your verified/unverified csv files, + # then add it to your own unverified.csv file: + newrow=row + uvdf.loc[-1] = newrow # adding a row + uvdf.index = uvdf.index + 1 # shifting index + uvdf = uvdf.sort_index() # sorting by index + uvdf.to_csv(unverifiedcsvfile, index=False) + print("[+] New row added to your own unverified.csv file!") + else: + print('[-] Skipping row as it is already added in ',w,row,) - - ###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE################### + + ###################### APPENDING TO YOUR OWN UNVERIFIED.CSV FILE################### - ### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ### - for k,l in sedf.iterrows(): - #print("[+] Sensitive word=",k, sedf.at[k, 'sensitive-words']) - seword=sedf.at[k, 'sensitive-words'] - if any(seword in str(x) for x in row) == True: - if csvdf.at[i, 'Sensitive'] != '✔️': - print("Marking row", i,"as sensitive, as it matches with a sensitive word") - csvdf.at[i, 'Sensitive']='✔️' + ### SANITY CHECK 3: Mark all the rows that are supposed to be sensitive ### + for k,l in sedf.iterrows(): + #print("[+] Sensitive word=",k, sedf.at[k, 'sensitive-words']) + seword=sedf.at[k, 'sensitive-words'] + if any(seword in str(x) for x in row) == True: + if csvdf.at[i, 'Sensitive'] != '✔️': + print("Marking row", i,"as sensitive, as it matches with a sensitive word") + csvdf.at[i, 'Sensitive']='✔️' - print('[-] Rows to delete: ',rows2delete) - # only delete rows after you've gone through all the unverified.csv OR verified.csv rows' - for i in rows2delete: - row=csvdf.loc[i,:].values.tolist() - print('[+] REMOVING ROW :',i,row) - csvdf.drop(i, inplace= True) - csvdf.to_csv(csvfilepath, index=False) - rows2delete= [] # it is an empty list at first + print('[-] Rows to delete: ',rows2delete) + # only delete rows after you've gone through all the unverified.csv OR verified.csv rows' + for i in rows2delete: + row=csvdf.loc[i,:].values.tolist() + print('[+] REMOVING ROW :',i,row) + csvdf.drop(i, inplace= True) + csvdf.to_csv(csvfilepath, index=False) + rows2delete= [] # it is an empty list at first @@ -682,18 +746,18 @@ def CheckUrl(url): } try: status = requests.get(url,proxies=proxies, timeout=5).status_code - print('[+]',url,status) + #print('[+]',url,status) if status != 502: - print(url,"✔️") + #print(url,"✔️") return True else: - print(url,"❌") + #print(url,"❌") return False except requests.ConnectionError as e: - print(url,"❌") + #print(url,"❌") return False except requests.exceptions.ReadTimeout as e: - print(url,"❌") + #print(url,"❌") return False #### PROTECTIONS AGAINST MALICIOUS CSV INPUTS #### @@ -825,6 +889,10 @@ def IsScoreValid(score:str)->bool: pattern = re.compile("^[0-9.,]+$") score = str(score) score.strip() + #pattern = ['','nan'] + if score in ['','nan']: + #Score can be empty when initially added + return True if pattern.fullmatch(score) is None: # empty description is fine as it's optional return False diff --git a/www/participants/uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/unverified.csv b/www/participants/uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/unverified.csv index 1ff856e..2eeab70 100644 --- a/www/participants/uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/unverified.csv +++ b/www/participants/uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/unverified.csv @@ -1,4 +1,7 @@ Instance,Category,Name,URL,Sensitive,Description,Status,Score +webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Forums,Hackliberty Forum,http://yw7nc56v4nsudvwewhmhhwltxpncedfuc43qbubj4nmwhdhwtiu4o6yd.onion/,,,, +webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Communities,Hackliberty main website,http://kj3wvs3wyfhm3uhhuqxlrhhcp6dneuau4mmvptlor27ghmrqx63fqnid.onion/,,,, +webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,Hackliberty,Hackliberty Gitea,http://vkp7367tcjpqdwwckigrdrvmwvispvbpg5rlsr2chjxvppfg7hipagyd.onion,,,, uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,Infos and Links,Tor Taxi,http://tortaxi2dev6xjwbaydqzla77rrnth7yn2oqzjfmiuwn5h6vsk2a4syd.onion/,,List of links to go to popular darknet places,✔️,100.0 uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,Infos and Links,Tor Taxi,http://tortaxi2dev6xjwbaydqzla77rrnth7yn2oqzjfmiuwn5h6vsk2a4syd.onion/,,List of links to go to popular darknet places,✔️,100.0 uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,Infos and Links,Tor Taxi,http://tortaxi2dev6xjwbaydqzla77rrnth7yn2oqzjfmiuwn5h6vsk2a4syd.onion/,,List of links to go to popular darknet places,✔️,100.0 diff --git a/www/participants/uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/webring-participants.csv b/www/participants/uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/webring-participants.csv index 4c00ada..72905aa 100644 --- a/www/participants/uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/webring-participants.csv +++ b/www/participants/uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/webring-participants.csv @@ -1,4 +1,5 @@ Name,URL,Description,Trusted,Status,Score +,webring.nowhevi57f4lxxd6db43miewcsgtovakbh6v5f52ci7csc2yjzy5rnid.onion,,,, ,uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,,,, Nowhere,uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,,,, Nowhere,uptime.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion,,,,