From 212e44c3a560922d60c9b8aa091ddcbad7ded0a4 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 3 May 2025 22:54:04 +0200 Subject: [PATCH] finish option12 : listing crawled links --- .../regex_simplexlinks.cpython-311.pyc | Bin 0 -> 3664 bytes scripts/lantern.py | 130 +++++++++++++++++- submissions/README.md | 0 3 files changed, 127 insertions(+), 3 deletions(-) create mode 100644 SimpleX/__pycache__/regex_simplexlinks.cpython-311.pyc mode change 100644 => 100755 submissions/README.md diff --git a/SimpleX/__pycache__/regex_simplexlinks.cpython-311.pyc b/SimpleX/__pycache__/regex_simplexlinks.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fe6ae3617299a83a0623e5f009a8858fe1b7e87 GIT binary patch literal 3664 zcmZ`*>rWfm6~E)x_`w(x0|_i_AYh8wfWd?%fsm|$K)gxF!y#I{NnGO@u!&!}Gv?vg zSuaWz3$28^N*mNKwJJ56DypmAv^376IDMFRnbH^BC?{eq9 z@44sPbMD;VIo~mi9znYLXN`Zm1EGJDK{=`SJMaGvo#%)}EEPm<(kg;fNa0pcr8%Wr zr9d27DbpiX`GP{|6_iqPYiib2HS091=CrJa(=Dl3`iaV|KQz|D*ud4Xx(5ov4WD53 zZU*=nq3AiLnp5=%&PTcAVl})Yo9C=lLGR?}aqx~e7+~YZF9-Y2Ir`n+{&>IVUSE9n ze(%MxFJT08urknp?~?=}x%X)3^l%c^?haIj)e=?+!%5fk@ceT~Nkwu%N-5 z5W`{LfgRuk#70Ue@B2U-uqlK+Va~NWCzXP>BDeCVW;K=BN-u#DiiGl4ik5?^8ogAkE}=TGhAMkV zr-Lns)v@~O^WKG%C)G(!)$P>StaLl9p<->e&V{T@{FMC5dIhdS@N*rmKqwmIzUc7z zy#kIzLLKwBt`5YTyKEO%C+z*BJ$xuS0mU-xu-Ip;%01dI2+=W{ZBI)&Jn89pnaBE= z&8ome158XbgLP2HnJNbF{s(D~>e-_{dq@7}xDvSDwrUG{UhrapUkeC+On$k7YAFzA z3(6oDE@AHQ( zIX7F-fpCw^xj>UH2Q1~eRc|oH6$~I_nZr>W2nz+RKf()PZ;0cG3LT}iH(}%Ps*UyH z2VqX=4|>A_hr>4BhXYZ8w+#%~_U&iG+%mWB*|$V65PrZ9L^le}PJW+YYTv%H@o^i7 zXoSK~ffZ6{w`C}&YA$M!*6{T4=Hp!Rm`IOH^mq}e)lItwJ|9Vj#?wk}&R>@>dIrFqBGr9Ry>MNOGSH=9sobiYa7OBA^sw}W>Y}od1`7`{EQ#AHS z#-7w{o;GGqZ6;I6x5t_?zF+q~>rK-?nwp>0y)kvaHg#uRFI&DJ%~{7J%eZJdFPYA# zoU)1;YmWX1a!FE~<@eWrcm3tm_YGp(u+%mzs#;Q4GAG^|nlgUTa8fdytk#6dOy%g~ zkjt9XnbK$8gDp`NK9YYz#>Y)~E6>qU81(|kUXjgmN+KPhJdCsDB|zBfw~POKB)%xX zZ&l$7tEwOX;52mv>eb+7vI^q>v*276U&v}6$k+@>O1l7OSnVMhN*F6E<0IthYB(O| zpsGT_8o$8m_pvyB7!9)re*3^^&`H$n#lI8)oy#dvk8s>!scrwT9M}3uuk9 z*3DTJaSebFAZDCiUy?yIuCUoKu?8l=6Rv~m9dSxCnACgJ>3650y=~P2)(HSPn4B6D z;J`$BfsT3w0!RgQ$Se5vk<(NLGEaF`cWK?nMFkR3XiH#8M{y*`@w|+w0*}eBf?Eko zh&A|FP$=l-2n1MQp+IpwLG*o0?7$xp!o~w=b8&c;!zExN=e-Vv|4at9JgtAavAOZ} zqbHA2=AxoiQJ+_9pWffRpKG*=>OM)`S42(9hFyTJg{_5*uya#nPD;$ltU_YCQlqmpaE`;z#EA zj#gwkC8jeqlc$*?(x@ADwTARs+L$)x&8^QTzMaULMYBaRTe4os-228n@Y*~eng=EG zU~1(p)Bc9(dd+lYKNlIB#Ml71=tekIn#%)7P3LvSbkm2o+J+)hDC#TKE}1WXA0QRg zunsLxuI0CmmT4>c=g_1bnjftC=?lsq&Z(g;Xg!{gHxTxCynZlt*&Xk@paJ60ZN5lY z@cM+G_vYYw&&AHJxY=`GTp6A8thgo@mL0>>@gY3qxVG-NvCJ-q*VxO~u6tJPGY-q} zwB^hoyX^OmuB~jWV9qBjEe0QkH)0!SLW{f~Z!E3`SHr#$b|kPg65H_lxX7{)2s|8F z3U9EbfEJ>NkzxBV2>x{Nrh9!Za)(`r%?)B<(J{-e&z~7$H!e>(uVE)^k*O@hGpitX zs2{d`%KD*uZ7Q-hd2?ax}V>i`eU3 z9OdJ)u50VA(ZGWRi+yBxx`*`&-U-X-q{VJu#*tX`D!6JQ&Mrqd_8Uu4#|gi3DZ$kI7RZcSvFmkEfs_K?A%;5{&!_;dZi*JhVOS`A~$71-T3OEDVWn z;{OFzQAtr$9_do@mq%^6>YPXBTsap{AVX97)Hi*(CX1-=k@P(&8eDPX#~J&xvE0Xg z(HxM>fgCawRaVNBRTdF6FI93=Tdh)URQ-;xh@i=`a#LGfK&rZQ{I#+rr)(+qAsw$y kHRdTzswqdeL*8LUx=W(FM5_@6r*deR_f4#6*9RL6T literal 0 HcmV?d00001 diff --git a/scripts/lantern.py b/scripts/lantern.py index 19c9cfb..a31acc2 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -130,6 +130,7 @@ def main(): secsvfile=instancepath+'/sensitive.csv' webpcsvfile=instancepath+'/webring-participants.csv' submission_file_abs_path = os.path.abspath('submissions/submission.csv') + crawled_file_abs_path = os.path.abspath('crawler/onion_crawler.csv') if not os.path.exists(instancepath): print_colors(f"{rootpath}",is_error=True, bold=True) @@ -181,11 +182,12 @@ Managing Wordlists: Maintenance: 9) Remove the duplicate URLs for your own instance 10) Perform sanity checks on all csv files for all instances (to mark them as sensitive / or remove the ones that are blacklisted) - 11) Review submissions (Add to verified.csv/ add to unverified.csv/ delete /blacklist) + 11) Review submissions (Add to verified.csv /add to unverified.csv /delete /blacklist) + 12) Review crawled websites (Add to verified.csv /add to unverified.csv /delete /blacklist) 0) Exit """) - option = input("Select an option? (0-11): ").strip() + option = input("Select an option? (0-12): ").strip() try: option = int(option) except ValueError: @@ -1202,6 +1204,7 @@ Maintenance: break case 11: + #review the submitted websites: try: submission_df = pd.read_csv(submission_file_abs_path) verified_csv_df = pd.read_csv(verifiedcsvfile) @@ -1215,6 +1218,7 @@ Maintenance: print_colors("\n1) Move entry to verified.csv \n2) Move entry from submission.csv to unverified.csv \n3) Delete from submission.csv file \n4) Add to blacklist.csv \n-1) exit") if link in blacklisted_words: print_colors("Black listed entry found", bold=True) + #TODO delete the entry as its already blacklisted continue else: name = row['name'] @@ -1277,7 +1281,127 @@ Maintenance: break finally: - print_colors("End of file") + print_colors("No more submissions to review, exiting.") + + + case 12: + # review the crawled websites + try: + print(crawled_file_abs_path) + crawled_df = pd.read_csv(crawled_file_abs_path) + verified_csv_df = pd.read_csv(verifiedcsvfile) + unverified_csv_df = pd.read_csv(unverifiedcsvfile) + blacklist_df = pd.read_csv(blcsvfile) + blacklisted_words = [word for word in blacklist_df['blacklisted-words']] + for i, row in crawled_df.iterrows(): + link = row['URL'] + print('\n',row[['URL','Category','Name']]) + print('\nLink to verify: ',link) + print_colors("\n1) Move entry to verified.csv \n2) Move entry from submission.csv to unverified.csv \n3) Delete from submission.csv file \n4) Add to blacklist.csv \n-1) exit") + if link in blacklisted_words: + print_colors("Black listed entry found", bold=True) + #TODO delete the entry as its already blacklisted + crawled_df.drop(index=i,inplace=True) + crawled_df.to_csv(submission_file_abs_path, index=False) + continue + else: + name = row['Name'] + category = row['Category'] + #desc = row['esc'] + desc = '' + #sensi = "YES" if row['sensitive'] == 'y' else "NO" + sensi = '' + number = int(input("Enter an option: ")) + if number == 1: + # Add to verified.csv + # ask the name if invalid + while(IsNameValid(name) is not True): + name = input("What is the name of the website? ") + + # ask the category + while((IsCategoryValid(category) != True) or (category == 'Tor Hidden Service')): + category = input("What is the website Category? (ex: Indexes) ") + # ask the sensitivity + choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ") + if choice == "n": + sensi = 'NO' + else: + sensi = 'YES' + + # ask if its sensitive or not + # ask the user to write a description + newrow=[instance,category,name,link,sensi,desc,'YES','100'] + verified_csv_df.loc[-1] = newrow # adding a row + verified_csv_df.index = verified_csv_df.index + 1 # shifting index + verified_csv_df = verified_csv_df.sort_index() # sorting by index + verified_csv_df = verified_csv_df.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories + print_colors("[+] New row added! now writing the csv file") + verified_csv_df.to_csv(verifiedcsvfile, index=False) + crawled_df.drop(index=i,inplace=True) + crawled_df.to_csv(crawled_file_abs_path, index=False) + elif number == 2: + # Add to unverified.csv + # consider it as sensitive by default and category must just be 'crawled' + # ask the name if invalid + while(IsNameValid(name) is not True): + name = input("What is the name of the website? ") + # ask the category + print('CATEGORY = ', category) + while((IsCategoryValid(category) != True) or (category == 'Tor Hidden Service')): + category = input("What is the website Category? (ex: Indexes) ") + choice=input("Is the website sensitive ? (ex: related to drugs) (y/n) ") + if choice == "n": + sensi = 'NO' + else: + sensi = 'YES' + # ask for the category, if empty then the category is 'crawled' + # add new row + newrow=[instance,category,name,link,sensi,desc,'YES','100'] + + unverified_csv_df.loc[-1] = newrow # adding a row + unverified_csv_df.index = unverified_csv_df.index + 1 # shifting index + unverified_csv_df = unverified_csv_df.sort_index() # sorting by index + unverified_csv_df = unverified_csv_df.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories + print_colors("[+] New row added! now writing the csv file") + unverified_csv_df.to_csv(unverifiedcsvfile, index=False) + crawled_df.drop(index=i,inplace=True) + crawled_df.to_csv(crawled_file_abs_path, index=False) + + elif number == 3: + # Delete from crawled_onion.csv + crawled_df.drop(index=i,inplace=True) + crawled_df.to_csv(crawled_file_abs_path, index=False) + + elif number == 4: + # Add to blacklist.csv + newrow=[link] + + blacklist_df.loc[-1] = newrow # adding a row + blacklist_df.index = blacklist_df.index + 1 # shifting index + blacklist_df = blacklist_df.sort_index() # sorting by index + print_colors("[+] New row added! now writing the csv file") + blacklist_df.to_csv(blcsvfile, index=False) + crawled_df.drop(index=i,inplace=True) + crawled_df.to_csv(crawled_file_abs_path, index=False) + + elif number == -1: + break + + else: + print_colors("Invalid Number",is_error=True) + continue + + + + + + + except Exception as e: + print_colors(f'Try again {e}',is_error=True) + break + + finally: + print_colors("No more crawled websites to review, exiting.") break case 0: diff --git a/submissions/README.md b/submissions/README.md old mode 100644 new mode 100755