import re import requests from bs4 import BeautifulSoup GIT_BASE = 'http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/blog-contributions' PROXIES = {'http': 'socks5h://127.0.0.1:9050'} PATTERN = re.compile(r']*href="' + GIT_BASE + r'/issues/(?P\d+)"[^>]*>(?P[✅❌🚧])', re.DOTALL) # makes HTTP request to the git page of given issue # retrieves the labels and returns them as dictionary def check_issue_labels(iid: int) -> dict[str, bool]: r = requests.get(GIT_BASE+f'/issues/{iid}', proxies=PROXIES) soup = BeautifulSoup(r.content, "html.parser") # find the labels list element in html labels_span = soup.find('span', class_='labels-list ugc-labels') result_dict = {} if labels_span: anchors = labels_span.find_all("a", class_="item") for anchor in anchors: label_text = anchor.find('div', class_='ui label').get_text(strip=True) # if label has 'tw-hidden' class, it means it's not present result_dict[label_text] = "tw-hidden" not in anchor['class'] return result_dict # if there're no labels, the blog is done def is_done_by_labels(labels: dict[str, bool]) -> bool: return not any(labels.values()) f = open('opsec/index.html') html_content = f.read() f.close() # used for changing the symbols scratch_hc = list(html_content) # iterate over found git issue references for match in PATTERN.finditer(html_content): issue_id = match.group('issue_id') symbol_offset = match.start('symbol') print(f"Issue ID: {issue_id}\tSymbol Offset: {symbol_offset}\tCurrent: {html_content[symbol_offset]}\t", end='') issue_labels = check_issue_labels(issue_id) is_done = is_done_by_labels(issue_labels) symbol = '✅' if is_done else '❌' print(f"Determined: {symbol}") scratch_hc[symbol_offset] = symbol fo = open('opsec/index.autogen.html', 'w', encoding='utf-8') fo.write(''.join(scratch_hc)) fo.close()