Skip to content

Instantly share code, notes, and snippets.

@marirs
Created August 3, 2020 12:18
Show Gist options
  • Save marirs/9dbd962716337817bba99edb45afb47f to your computer and use it in GitHub Desktop.
Save marirs/9dbd962716337817bba99edb45afb47f to your computer and use it in GitHub Desktop.
Given a list of date objects - remove duplicates from that list
#!/usr/bin/env python
import datetime
import time
import calendar
from typing import List
def dedupe_dates(lst: list, precision: str="day", tz_aware: bool=False) -> List:
"""Dedupe a given list of dateobjects
:param lst: list of date objects
:param precision: "day" to dedupe upto day (y/m/d) or "time" for y/m/d h:m:s
:param tz_aware: If true, then dedupe keeping timezones information
"""
dates_dict = {}
k = None
for dt in lst:
if tz_aware:
if not dt.tzinfo:
timestamp = calendar.timegm(dt.timetuple())
k = datetime.datetime.utcfromtimestamp(timestamp)
else:
timestamp = time.mktime(dt.timetuple())
k = datetime.datetime.fromtimestamp(timestamp)
else:
if precision == "day":
k = datetime.datetime(dt.year, dt.month, dt.day).timestamp()
elif precision == "time":
k = datetime.datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second).timestamp()
dates_dict.update({k: dt})
return list(dates_dict.values())
res = dedupe_dates(
[
datetime.datetime(2006, 4, 13, 0, 0),
datetime.datetime(2006, 4, 13, 0, 0),
datetime.datetime(2021, 5, 1, 0, 0),
datetime.datetime(2021, 5, 1, 0, 0),
datetime.datetime(2020, 4, 23, 0, 0),
datetime.datetime(1997, 9, 15, 4, 0),
datetime.datetime(1997, 9, 15, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))),
datetime.datetime(2020, 4, 23, 0, 0),
datetime.datetime(2005, 5, 30, 0, 0),
datetime.datetime(2021, 5, 31, 0, 0),
datetime.datetime(2020, 6, 1, 1, 5, 9, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400))),
datetime.datetime(2020, 6, 1),
datetime.datetime(2011, 4, 26, 17, 57, 27),
datetime.datetime(2010, 2, 25, 1, 4, 59),
datetime.datetime(2021, 2, 24, 23, 59, 59),
datetime.datetime(2020, 1, 28, 10, 35, 17),
datetime.datetime(2020, 11, 9, 0, 0),
datetime.datetime(2019, 10, 7, 17, 50, 9, tzinfo=datetime.timezone(datetime.timedelta(seconds=10800))),
datetime.datetime(2019, 10, 7, 17, 50, 9, tzinfo=datetime.timezone(datetime.timedelta(seconds=10800))),
]
)
print(len(res))
for r in res:
print(r)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment