Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ESORT/ESEARCH support. #382

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 66 additions & 1 deletion imapclient/imapclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@
from . import tls
from .datetime_util import datetime_to_INTERNALDATE, format_criteria_date
from .imap_utf7 import encode as encode_utf7, decode as decode_utf7
from .response_parser import parse_response, parse_message_list, parse_fetch_response
from .response_parser import (
parse_esearch_response,
parse_fetch_response,
parse_message_list,
parse_response,
)
from .util import to_bytes, to_unicode, assert_imap_protocol, chunk
xrange = moves.xrange

Expand Down Expand Up @@ -886,6 +891,49 @@ def unsubscribe_folder(self, folder):
"""
return self._command_and_check('unsubscribe', self._normalise_folder(folder))

@require_capability('ESEARCH')
def esearch(self, criteria='ALL', returns=None, charset=None):
"""Performs a search using the ESEARCH syntax as defined in :rfc:`4731`.

See the :py:meth:`.search` method below for what the *criteria*
argument should contain; *returns* should be a string that
contains items to be returned.

Currently supported are:
ALL
PARTIAL first:last
MIN
MAX
COUNT

Combinations are possible, except you cannot have both ALL and PARTIAL.

An example value could be: 'PARTIAL 1:50 COUNT'

This will return a dictionary with keys matching the item names. The values will
be parsed as you would expect them, meaning PARTIAL and ALL will be a list of ints,
and the remaining three will be ints.

For PARTIAL and ALL, there will also be matching PARTIAL_RAW and ALL_RAW values, that
contain the list of messages as returned by the server. This might be a more compact
representation and can be fed easily to :py:meth:`.fetch` without having to
(re)serialize the ids.
Note that if no messages match, the _RAW versions will be set to None.

Note that ESEARCH is an extension to the IMAP4 standard so it
may not be supported by all IMAP servers.
"""
args = []
if returns:
args.extend([b'RETURN', to_bytes('('+ returns+')')])

if charset:
args.extend([b'CHARSET', to_bytes(charset)])
args.extend(_normalise_search_criteria(criteria, charset))

data = self._raw_command_untagged(b'SEARCH', args, response_name='ESEARCH')
return parse_esearch_response(data)

def search(self, criteria='ALL', charset=None):
"""Return a list of messages ids from the currently selected
folder matching *criteria*.
Expand Down Expand Up @@ -994,6 +1042,23 @@ def _search(self, criteria, charset):

return parse_message_list(data)

@require_capability('ESORT')
def esort(self, sort_criteria, criteria='ALL', returns=None, charset='UTF-8'):
"""Performs a search and sorts the result using ESORT as defined in :rfc:`5267`.

See the :py:meth:`.sort` method below for what the *criteria* and
*sort_criteria* arguments should contain; for the *returns* argument see
:py:meth:`.esearch*`.
"""
args = []
if returns:
args.extend([b'RETURN', to_bytes('(' + returns + ')')])
args.append(_normalise_sort_criteria(sort_criteria))
args.append(to_bytes(charset))
args.extend(_normalise_search_criteria(criteria, charset))
data = self._raw_command_untagged(b'SORT', args, response_name='ESEARCH')
return parse_esearch_response(data)

@require_capability('SORT')
def sort(self, sort_criteria, criteria='ALL', charset='UTF-8'):
"""Return a list of message ids from the currently selected
Expand Down
63 changes: 63 additions & 0 deletions imapclient/response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,69 @@ def parse_fetch_response(text, normalise_times=True, uid_is_key=True):
return parsed_response


def parse_esearch_response(data):
"""Parses the IMAP ESEARCH responses as returned by imaplib.

These are generated by ESORT and ESEARCH queries. This function will return
a dictionary, with the keys matching the *returns* value.

See :py:meth:`ImapClient.esearch` for more info.
"""
retval = {}
it = iter(parse_response(data))
try:
while 1:
bite = six.next(it)
if isinstance(bite, tuple) and len(bite) == 2 and bite[0] == b'TAG':
# FIXME: should verify we only consume messages matching our tag
continue
elif bite == b'UID': # this is just a marker that we are using UIDs, ignore it.
continue
elif bite == b'ALL':
message_bite = six.next(it)
retval[bite + b'_RAW'] = _raw_as_bytes(message_bite)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the raw value being returned along with the parsed value? Is the parsed version not more useful?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be in the docstring - the raw version allows for a small optimisation, as it can be passed on to a subsequent fetch as well (removing the need to serialise again, and the raw version might contain ranges, making for a small size).
I'm not sure if it has any real life benefits, as the serialisation and transport costs will probably be dwarfed by the time it takes the server to construct and send the response, so if you feel it makes the interface too complicated, I can remove it as well. Real life data point: we skipped using the raw version in the end.

Copy link
Owner

@mjs mjs Jul 14, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about making each of these values a (to be implemented) "SequenceSet" object which implement __iter__ to yield the expanded message ids and __str__ to provide the original raw set string?

This makes the raw and parsed versions easily available without requiring separate dictionary values for the raw and parsed variants. Yielding message ids instead of generating a list helps to avoid unnecessary memory consumption in the case of large message ranges.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds like a better way to do this, let me find some time to implement that. Besides __str__ I'd prefer __bytes__ as well, except that is a python3 thing, but I can do both. I'll also just fix the imapclient.util.to_bytes to check if __bytes__ is available and use it.

retval[bite] = _parse_compact_message_list(message_bite)
elif bite == b'PARTIAL':
message_bite = six.next(it)[1]
retval[bite + b'_RAW'] = _raw_as_bytes(message_bite)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and again

retval[bite] = _parse_compact_message_list(message_bite)
else:
retval[bite] = six.next(it)
except StopIteration:
pass

return retval


def _raw_as_bytes(raw):
if raw is None:
return None
elif isinstance(raw, int):
return str(raw).encode('ascii')
else:
return raw


def _parse_compact_message_list(message_bite):
if message_bite is None:
return []
if isinstance(message_bite, int):
return [message_bite]
messages = []
for message_atom in message_bite.split(b','):
first_b, sep, last_b = message_atom.partition(b':')
first = _int_or_error(first_b, 'invalid ID')
if sep:
last = _int_or_error(last_b, 'invalid ID')
if last < first: # 10:12 is equivalent to 12:10 (!)
first, last = last, first
messages.extend(range(first, last+1))
else:
messages.append(first)
return messages



def _int_or_error(value, error_text):
try:
return int(value)
Expand Down
26 changes: 26 additions & 0 deletions tests/test_response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from imapclient.datetime_util import datetime_to_native
from imapclient.fixed_offset import FixedOffset
from imapclient.response_parser import (
parse_esearch_response,
parse_response,
parse_message_list,
parse_fetch_response,
Expand Down Expand Up @@ -191,6 +192,31 @@ def test_modseq_interleaved(self):
self.assertEqual(out.modseq, 9)


class TestParseEsearchRespons(unittest.TestCase):
def test_esort(self):
self.assertEqual(parse_esearch_response([b'(TAG "KFOO6") UID PARTIAL (1:5 68669,69520,68831,68835,66540) COUNT 2216']),
{b'COUNT': 2216,
b'PARTIAL': [68669, 69520, 68831, 68835, 66540],
b'PARTIAL_RAW': b'68669,69520,68831,68835,66540'})

def test_esearch(self):
self.assertEqual(parse_esearch_response([b'(TAG "GJHF5") UID PARTIAL (1:5 69574,69590,69605,69607:69608) COUNT 2216']),
{b'COUNT': 2216,
b'PARTIAL': [69574, 69590, 69605, 69607, 69608],
b'PARTIAL_RAW': b'69574,69590,69605,69607:69608'})

def test_partial_no_result(self):
self.assertEqual(parse_esearch_response([b'(TAG "no-exist") UID PARTIAL (1:5 NIL) COUNT 0']),
{b'COUNT': 0,
b'PARTIAL': [],
b'PARTIAL_RAW': None})

def test_partial_single_result(self):
self.assertEqual(parse_esearch_response([b'(TAG "one-result") UID PARTIAL (1:5 69573) COUNT 1']),
{b'COUNT': 1,
b'PARTIAL': [69573],
b'PARTIAL_RAW': b'69573'})

class TestParseFetchResponse(unittest.TestCase):

def test_basic(self):
Expand Down