From 5204164f9400e2bce6eb694ffbc08b6a6aaf07fe Mon Sep 17 00:00:00 2001 From: root Date: Fri, 9 May 2025 14:42:57 +0200 Subject: [PATCH] fix lantern --- scripts/lantern.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/lantern.py b/scripts/lantern.py index 43e5c8c..746a1da 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -674,6 +674,7 @@ Maintenance: uvdf = pd.read_csv(unverifiedcsvfile, on_bad_lines='skip') # TODO DELETE ALL DUPLICATES OF UVDF ! uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False) + uvdf.to_csv(unverifiedcsvfile, index=False) filter_uvdf= uvdf[uvdf.URL.str.contains(filterterm,na=False)] if len(filter_uvdf.index) == 0 and len(filter_vdf.index) == 0: newrow=row @@ -709,6 +710,13 @@ Maintenance: csvdf.drop(i, inplace= True) csvdf.to_csv(csvfilepath, index=False) rows2delete= [] # it is an empty list at first + # TODO DELETE ALL DUPLICATES OF UVDF ! + uvdf = uvdf.sort_index() # sorting by index + uvdf = uvdf.sort_values(by=["Category","Score"], ascending=[True,False]) # sorting categories + + uvdf = uvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False) + uvdf.to_csv(unverifiedcsvfile, index=False) + break