Why does using a set() snapshot for deduplication still allow duplicate records in my Django/Outlook integration?
I am building a synchronization script to pull emails from a specific Outlook folder into a Django database using pywin32. To optimize performance and avoid redundant database queries, I use a set() to store a snapshot of existing records for lookups. However, I am still seeing duplicate records in my database.
My Environment:
OS: Windows 10
Outlook: 2016
Tech Stack: Python 3.13, Django 5.1.4,
pywin32Database: SQLite
The Logic (Minimal Reproducible Example):
time zone
TIME_ZONE = 'UTC' # I also tried 'Asia/Ho_Chi_Minh'
USE_TZ = True
model
class FloorRequest(models.Model):
request_type = models.CharField(max_length=10) # 'BADGE' or 'FLOOR'
email = models.EmailField()
request_time = models.DateTimeField() # This is where I compare timestamps
mail_entry_id = models.CharField(max_length=255, unique=True)
python
def backfill_all() -> None:
items = t1_folder.Items
item_list = [i for i in items]
existing_records = set(
FloorRequest.objects.values_list('email', 'request_time', 'request_type')
)
count = 0
for item in item_list:
try:
sender = str(getattr(item, "SenderEmailAddress", "")).lower().strip()
received_time = item.ReceivedTime.replace(tzinfo=None) # Naive datetime
current_type = 'BADGE' if "badge" in item.Subject.lower() else 'FLOOR'
if (sender, received_time, current_type) not in existing_records:
FloorRequest.objects.create(
email=sender,
request_time=received_time,
request_type=current_type,
)
existing_records.add((sender, received_time, current_type))
count += 1
except Exception as e:
continue