diff --git a/inbox/mailsync/backends/imap/common.py b/inbox/mailsync/backends/imap/common.py index 27ca3a419..5258fb920 100644 --- a/inbox/mailsync/backends/imap/common.py +++ b/inbox/mailsync/backends/imap/common.py @@ -16,7 +16,7 @@ from typing import List, Set from sqlalchemy import bindparam, desc -from sqlalchemy.orm import Query, Session +from sqlalchemy.orm import Session from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.sql.expression import func @@ -73,33 +73,22 @@ def lastseenuid(account_id, session, folder_id): return res or 0 -IMAPUID_PER_MESSAGE_SANITY_LIMIT = 100 - - def update_message_metadata( session: Session, account: Account, message: Message, is_draft: bool ) -> None: """Update the message's metadata""" - # Sort imapuids in a way that the ones that were added later come first. - # There are non-conforming IMAP servers that can list the same message thousands of times - # in the same folder. This is a workaround to limit the memory pressure caused by such - # servers. The metadata is meaningless for such messages anyway. - latest_imapuids = ( - imapuids_for_message_query( - account_id=account.id, - message_id=message.id, - only_latest=IMAPUID_PER_MESSAGE_SANITY_LIMIT, - ) - .with_session(session) - .all() + # Sort imapuids in a way that the ones that were added later come last + now = datetime.utcnow() + sorted_imapuids: List[ImapUid] = sorted( + message.imapuids, key=lambda imapuid: imapuid.updated_at or now ) - message.is_read = any(imapuid.is_seen for imapuid in latest_imapuids) - message.is_starred = any(imapuid.is_flagged for imapuid in latest_imapuids) + message.is_read = any(imapuid.is_seen for imapuid in sorted_imapuids) + message.is_starred = any(imapuid.is_flagged for imapuid in sorted_imapuids) message.is_draft = is_draft - latest_categories: List[Category] = [ - category for imapuid in latest_imapuids for category in imapuid.categories + sorted_categories: List[Category] = [ + category for imapuid in sorted_imapuids for category in imapuid.categories ] categories: Set[Category] @@ -112,9 +101,9 @@ def update_message_metadata( # (and in turn one category) depending on the order they were returned # from the database. This makes it deterministic and more-correct because a message # is likely in a folder (and category) it was added to last. - categories = {latest_categories[0]} if latest_categories else set() + categories = {sorted_categories[-1]} if sorted_categories else set() elif account.category_type == "label": - categories = set(latest_categories) + categories = set(sorted_categories) else: raise AssertionError("Unreachable") @@ -209,18 +198,6 @@ def update_metadata(account_id, folder_id, folder_role, new_flags, session): log.info("Updated UID metadata", changed=change_count, out_of=len(new_flags)) -def imapuids_for_message_query( - *, account_id: int, message_id: int, only_latest: int | None = None -) -> Query: - query = Query([ImapUid]).filter( - ImapUid.account_id == account_id, ImapUid.message_id == message_id - ) - if only_latest is not None: - query = query.order_by(ImapUid.updated_at.desc()).limit(only_latest) - - return query - - def remove_deleted_uids(account_id, folder_id, uids): """ Make sure you're holding a db write lock on the account. (We don't try @@ -261,13 +238,7 @@ def remove_deleted_uids(account_id, folder_id, uids): db_session.delete(imapuid) if message is not None: - message_imapuids_exist = db_session.query( - imapuids_for_message_query( - account_id=account_id, message_id=message.id - ).exists() - ).scalar() - - if not message_imapuids_exist and message.is_draft: + if not message.imapuids and message.is_draft: # Synchronously delete drafts. thread = message.thread if thread is not None: @@ -286,7 +257,7 @@ def remove_deleted_uids(account_id, folder_id, uids): update_message_metadata( db_session, account, message, message.is_draft ) - if not message_imapuids_exist: + if not message.imapuids: # But don't outright delete messages. Just mark them as # 'deleted' and wait for the asynchronous # dangling-message-collector to delete them. diff --git a/inbox/mailsync/backends/imap/monitor.py b/inbox/mailsync/backends/imap/monitor.py index 2d08fe483..dc00a127d 100644 --- a/inbox/mailsync/backends/imap/monitor.py +++ b/inbox/mailsync/backends/imap/monitor.py @@ -165,6 +165,7 @@ def start_delete_handler(self): account_id=self.account_id, namespace_id=self.namespace_id, provider_name=self.provider_name, + uid_accessor=lambda m: m.imapuids, ) self.delete_handler.start() diff --git a/inbox/mailsync/gc.py b/inbox/mailsync/gc.py index c7dd6be42..a738e8dfd 100644 --- a/inbox/mailsync/gc.py +++ b/inbox/mailsync/gc.py @@ -44,6 +44,10 @@ class DeleteHandler(InterruptibleThread): ---------- account_id, namespace_id: int IDs for the namespace to check. + uid_accessor: function + Function that takes a message and returns a list of associated uid + objects. For IMAP sync, this would just be + `uid_accessor=lambda m: m.imapuids` message_ttl: int Number of seconds to wait after a message is marked for deletion before deleting it for good. @@ -55,6 +59,7 @@ def __init__( account_id, namespace_id, provider_name, + uid_accessor, message_ttl=DEFAULT_MESSAGE_TTL, thread_ttl=DEFAULT_THREAD_TTL, ): @@ -62,6 +67,7 @@ def __init__( self.account_id = account_id self.namespace_id = namespace_id self.provider_name = provider_name + self.uids_for_message = uid_accessor self.log = log.new(account_id=account_id) self.message_ttl = datetime.timedelta(seconds=message_ttl) self.thread_ttl = datetime.timedelta(seconds=thread_ttl) @@ -100,18 +106,14 @@ def check(self, current_time): # If the message isn't *actually* dangling (i.e., it has # imapuids associated with it), undelete it. try: - message_imapuids_exist = db_session.query( - common.imapuids_for_message_query( - account_id=self.account_id, message_id=message.id - ).exists() - ).scalar() + uids_for_message = self.uids_for_message(message) except ObjectDeletedError: # It looks like we are expiring the session potentially when one message is deleted, # and then when accessing the IMAP uids, there is a lazy load trying to get the data. # If that object has also been deleted (how?) it raises this exception. continue - if message_imapuids_exist: + if uids_for_message: message.deleted_at = None continue diff --git a/tests/imap/test_delete_handling.py b/tests/imap/test_delete_handling.py index 9c2610ecd..7b8d4cbf1 100644 --- a/tests/imap/test_delete_handling.py +++ b/tests/imap/test_delete_handling.py @@ -89,6 +89,7 @@ def test_deletion_with_short_ttl( account_id=default_account.id, namespace_id=default_namespace.id, provider_name=default_account.provider, + uid_accessor=lambda m: m.imapuids, message_ttl=0, thread_ttl=0, ) @@ -109,6 +110,7 @@ def test_thread_deletion_with_short_ttl( account_id=default_account.id, namespace_id=default_namespace.id, provider_name=default_account.provider, + uid_accessor=lambda m: m.imapuids, message_ttl=0, thread_ttl=120, ) @@ -146,6 +148,7 @@ def test_non_orphaned_messages_get_unmarked( account_id=default_account.id, namespace_id=default_namespace.id, provider_name=default_account.provider, + uid_accessor=lambda m: m.imapuids, message_ttl=0, ) handler.check(marked_deleted_message.deleted_at + timedelta(seconds=1)) @@ -162,6 +165,7 @@ def test_threads_only_deleted_when_no_messages_left( account_id=default_account.id, namespace_id=default_namespace.id, provider_name=default_account.provider, + uid_accessor=lambda m: m.imapuids, message_ttl=0, ) # Add another message onto the thread @@ -183,6 +187,7 @@ def test_deletion_deferred_with_longer_ttl( account_id=default_account.id, namespace_id=default_namespace.id, provider_name=default_account.provider, + uid_accessor=lambda m: m.imapuids, message_ttl=5, ) db.session.commit() @@ -202,6 +207,7 @@ def test_deletion_creates_revision( account_id=default_account.id, namespace_id=default_namespace.id, provider_name=default_account.provider, + uid_accessor=lambda m: m.imapuids, message_ttl=0, ) handler.check(marked_deleted_message.deleted_at + timedelta(seconds=1)) @@ -264,6 +270,7 @@ def test_deleted_labels_get_gced( account_id=default_account.id, namespace_id=default_namespace.id, provider_name=default_account.provider, + uid_accessor=lambda m: m.imapuids, message_ttl=0, ) handler.gc_deleted_categories()