diff --git a/scripts/lantern.py b/scripts/lantern.py index 65f1cde..81f8975 100644 --- a/scripts/lantern.py +++ b/scripts/lantern.py @@ -1122,7 +1122,9 @@ Maintenance: try: csvdf = pd.read_csv(csvfilepath) print_colors(f"Removing duplicates in {csvfilepath}") - csvdf = csvdf.drop_duplicates(subset=['URL']) + #print_colors(f"{csvdf[['URL']]}") + csvdf = csvdf.drop_duplicates(subset=['URL'], keep="first", inplace=False) + #print_colors(f"{csvdf[['URL']]}") csvdf.to_csv(csvfilepath, index=False) print_colors(f"Cleaned data:\n{csvdf[['URL']]}") except FileNotFoundError: