diff --git a/scripts/lantern.py b/scripts/lantern.py index a4d973d..ed68d32 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -729,6 +729,26 @@ Maintenance: csvdf.drop(i, inplace= True) csvdf.to_csv(csvfilepath, index=False) rows2delete= [] # it is an empty list at first + + # fill missing description in our unverified.csv that other participants verified.csv have filled + if w == 'verified.csv': + uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip') + # merge participant's verified.csv on our unverified.csv on URL + merged_df = uvdf.merge(csvdf[['URL', 'Description']], + on='URL', + how='left', + suffixes=('', '_participant')) + # filter empty description that has participant's description + no_descr_filter = ((merged_df['Description'].isna()) | (merged_df['Description'].str.strip() == '')) & \ + (~merged_df['Description_participant'].isna()) & (merged_df['Description_participant'].str.strip() != '') + no_descr_filter_count = no_descr_filter.sum() + # update our empty description if the participant has any filled description + if no_descr_filter_count > 0: + merged_df.loc[no_descr_filter, 'Description'] = merged_df.loc[no_descr_filter, 'Description_participant'] + # keep only original columns + uvdf_updated = merged_df[uvdf.columns] + uvdf_updated.to_csv(unverifiedcsvfile, index=False) + print(f'[+] Updated {no_descr_filter_count} empty description(s) in your unverified.csv found on partipant\'s {w}') break