fix lantern option 4

fix stuff
Merge pull request 'lantern.py: option 4: at the end of the synchronization phase, iterate over (your own) unverified links that do NOT have a description to try and find someone added one' (#60 ) from valuer/darknet-lantern:issue-19 into main
2025-07-01 20:26:40 +00:00 · 2025-05-18 12:25:01 +02:00 · 2025-05-18 12:24:43 +02:00 · 2025-05-18 09:55:08 +02:00 · 2025-05-10 20:28:48 +02:00 · 2025-05-10 19:01:33 +02:00
1 changed files with 60 additions and 1 deletions
--- a/scripts/lantern.py
+++ b/scripts/lantern.py
@ -635,6 +635,7 @@ Maintenance:
                                print("[+] Removing the participant's duplicate entries... ")
                                # REMOVE DUPLICATES !!! do not accept any duplicate from remote participants
                                csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
+                                csvdf = csvdf.drop_duplicates(subset=['Name'], keep="first", inplace=False)
                                csvdf.to_csv(csvfilepath, index=False)

                                csvdf = pd.read_csv(csvfilepath, on_bad_lines='skip')
@ -649,6 +650,7 @@ Maintenance:



+
                                    ################################ SANITY CHECKS ####################################
    							    ### SANITY CHECK 0: make sure that ✔️ and x are replaced with YES/NO, as it changed since v1.0.1 ###
                                    if  csvdf.at[i, 'Status'] == "✔️" or csvdf.at[i, 'Status'] == "YES" :
@ -689,13 +691,16 @@ Maintenance:
                                                filterterm=csvdf.at[i, 'URL']
                                                #print('1)',filterterm)
                                                filter_vdf= vdf[vdf.URL.str.contains(filterterm,na=False)]
+                                                filter_vdf2= vdf[vdf.Name.str.contains(filterterm,na=False)] # do not accept the new link if the name already exists in verified.csv
                                                #print('2)',filter_vdf)
                                                #print('3)',uvdf[uvdf.URL.str.contains(filterterm,na=False)] )
                                                uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
                                                # TODO DELETE ALL DUPLICATES OF UVDF !
                                                uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False)
+                                                uvdf = uvdf.drop_duplicates(subset=['Name'], keep="first", inplace=False)
                                                filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)]
-                                                if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0:
+                                                filter_uvdf2= uvdf[uvdf.Name.str.contains(filterterm,na=False)] # do not accept the new link if the name already exists in unverified.csv
+                                                if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0 and len(filter_uvdf2.index) == 0 and len(filter_vdf2.index) == 0 :
                                                    newrow=row
                                                    uvdf.loc[-1] = newrow  # adding a row
                                                    uvdf.index = uvdf.index + 1  # shifting index
@ -723,12 +728,66 @@ Maintenance:

                                    #print_colors(f'[-] Rows to delete: {rows2delete}', is_error=True)
                                # only delete rows after you've gone through all the unverified.csv OR verified.csv rows'
+                                # check for NAME duplicates and mark them for deletion:
+                                # remove name duplicates that are in unverifie.csv yet exist in verified.csv (as verified.csv takes the priority)
+                                if w == 'unverified.csv':
+                                    try:
+                                        # check if the given row Name already exists in verified.csv
+                                        filterterm=csvdf.at[i, 'Name']
+                                        filter_vdf= vdf[vdf.Name.str.contains(filterterm,na=False)]
+                                        print('[+] CHECKING FOR DUPLIATES: ',filterterm)
+                                        if len(filter_vdf.index) != 0:
+                                            # drop the unverified.csv row if its name already exists in verified.csv
+                                            print('[+] DUPLICATE FOUND, MARKING ROW FOR DELETION: ',row)
+                                            rows2delete.append(i) #mark the row for deletion if not already done
+                                    except:
+                                        pass
+
+
                                for i in rows2delete:
                                    row=csvdf.loc[i,:].values.tolist()
                                    print_colors(f'[+] REMOVING ROW: {i}{row}')
                                    csvdf.drop(i, inplace= True)
                                    csvdf.to_csv(csvfilepath, index=False)
                                    rows2delete= [] # it is an empty list at first
+
+                                # fill missing description in our unverified.csv that other participants verified.csv have filled
+                                if w == 'verified.csv':
+                                    uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip')
+                                    # merge participant's verified.csv on our unverified.csv on URL
+                                    merged_df = uvdf.merge(csvdf[['URL', 'Description']], 
+                                        on='URL', 
+                                        how='left', 
+                                        suffixes=('', '_participant'))
+                                    # filter empty description that has participant's description
+                                    no_descr_filter = ((merged_df['Description'].isna()) | (merged_df['Description'].str.strip() == '')) & \
+                                        (~merged_df['Description_participant'].isna()) & (merged_df['Description_participant'].str.strip() != '')
+                                    no_descr_filter_count = no_descr_filter.sum()
+                                    # update our empty description if the participant has any filled description
+                                    if no_descr_filter_count > 0:
+                                        merged_df.loc[no_descr_filter, 'Description'] = merged_df.loc[no_descr_filter, 'Description_participant']
+                                        # keep only original columns
+                                        uvdf_updated = merged_df[uvdf.columns]
+                                        uvdf_updated.to_csv(unverifiedcsvfile, index=False)
+                                        print(f'[+] Updated {no_descr_filter_count} empty description(s) in your unverified.csv found on partipant\'s {w}')
+                # remove all name duplicates from your own unverified.csv file:  
+                for i,j in uvdf.iterrows():
+                    row=uvdf.loc[i,:].values.tolist()
+                    # check if the given row Name already exists in verified.csv
+                    filterterm=uvdf.at[i, 'Name']
+                    filter_vdf= vdf[vdf.Name.str.contains(filterterm,na=False)]
+                    print('[+] CHECKING FOR DUPLIATES: ',filterterm)
+                    if len(filter_vdf.index) != 0:
+                        # drop the unverified.csv row if its name already exists in verified.csv
+                        print('[+] DUPLICATE FOUND, MARKING ROW FOR DELETION: ',row)
+                        rows2delete.append(i) #mark the row for deletion if not already done
+                for i in rows2delete:
+                    row=uvdf.loc[i,:].values.tolist()
+                    print_colors(f'[+] REMOVING ROW: {i}{row}')
+                    uvdf.drop(i, inplace= True)
+                    uvdf.to_csv(unverifiedcsvfile, index=False)
+                    rows2delete= [] # it is an empty list at first
+
                break
Author	SHA1	Message	Date
root	237da32d4f	fix lantern option 4	2025-05-18 12:25:01 +02:00
root	6150373c3f	fix stuff	2025-05-18 12:24:43 +02:00
nihilist	e592602b82	Merge pull request 'lantern.py: option 4: at the end of the synchronization phase, iterate over (your own) unverified links that do NOT have a description to try and find someone added one' (#60 ) from valuer/darknet-lantern:issue-19 into main Reviewed-on: http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/nihilist/darknet-lantern/pulls/60	2025-05-18 09:55:08 +02:00
valuer	5d67c21d14	Fill missing description in our unverified.csv that other participants verified.csv have filled	2025-05-10 20:28:48 +02:00
valuer	46912beaa1	Merge pull request 'main' (#1 ) from nihilist/darknet-lantern:main into main Reviewed-on: http://git.nowherejezfoltodf4jiyl6r56jnzintap5vyjlia7fkirfsnfizflqd.onion/valuer/darknet-lantern/pulls/1	2025-05-10 19:01:33 +02:00