From f0d7c3d085b5659f0f951fd6c3fd6369e112bb78 Mon Sep 17 00:00:00 2001 From: nick evans Date: Thu, 7 Nov 2024 14:24:20 -0500 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=93=9A=20Update=20`#search`=20documen?= =?UTF-8?q?tation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Expanded criteria docs: * explain sequence-set coercion * explain encoding of string, integer, and date args * add warning for raw string data * Expanded charset docs * Link to IANA charset registry * Indicate defaults are "US-ASCII" or "UTF-8" * Demonstrate sending charset embedded inside criteria * Move `#search` example above search criteria. The list of search keys is long. And to be more complete, it will need to get even longer. It's nice to have some examples near the top. * Update `#search` example (`NEW` is deprecated) `NEW` and `RECENT` have been removed from `IMAP4rev2`, so I'm updating the example with a search key that isn't deprecated. --- lib/net/imap.rb | 59 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/lib/net/imap.rb b/lib/net/imap.rb index b0fd5e53..494b258d 100644 --- a/lib/net/imap.rb +++ b/lib/net/imap.rb @@ -1930,17 +1930,53 @@ def uid_expunge(uid_set) end # Sends a {SEARCH command [IMAP4rev1 §6.4.4]}[https://www.rfc-editor.org/rfc/rfc3501#section-6.4.4] - # to search the mailbox for messages that match the given searching - # criteria, and returns message sequence numbers. +keys+ can either be a - # string holding the entire search string, or a single-dimension array of - # search keywords and arguments. - # - # Returns a SearchResult object. SearchResult inherits from Array (for + # to search the mailbox for messages that match the given search +criteria+, + # and returns a SearchResult. SearchResult inherits from Array (for # backward compatibility) but adds SearchResult#modseq when the +CONDSTORE+ # capability has been enabled. # + # +criteria+ is one or more search keys and their arguments, which may be + # provided as an array or a string. + # See {"Search criteria"}[rdoc-ref:#search@Search+criteria], below. + # + # * When +criteria+ is an array, each member is a +SEARCH+ command argument: + # * Any SequenceSet sends SequenceSet#valid_string. + # +Range+, -1, and nested +Array+ elements are converted to + # SequenceSet. + # * Any +String+ is sent verbatim when it is a valid \IMAP atom, + # and encoded as an \IMAP quoted or literal string otherwise. + # * Any other +Integer+ (besides -1) will be sent as +#to_s+. + # * +Date+ objects will be encoded as an \IMAP date (see ::encode_date). + # + # * When +criteria+ is a string, it will be sent directly to the server + # without any validation or encoding. *WARNING:* This is + # vulnerable to injection attacks when external inputs are used. + # + # +charset+ is the name of the {registered character + # set}[https://www.iana.org/assignments/character-sets/character-sets.xhtml] + # used by strings in the search +criteria+. When +charset+ isn't specified, + # either "US-ASCII" or "UTF-8" is assumed, depending on + # the server's capabilities. +charset+ may be sent inside +criteria+ + # instead of as a separate argument. + # # Related: #uid_search # + # ===== For example: + # + # p imap.search(["SUBJECT", "hello", "NOT", "SEEN"]) + # #=> [1, 6, 7, 8] + # + # The following searches send the exact same command to the server: + # + # # criteria array, charset arg + # imap.search(%w[OR UNSEEN FLAGGED SUBJECT foo], "UTF-8") + # # criteria string, charset arg + # imap.search("OR UNSEEN FLAGGED SUBJECT foo", "UTF-8") + # # criteria array contains charset arg + # imap.search(%w[CHARSET UTF-8 OR UNSEEN FLAGGED SUBJECT foo]) + # # criteria string contains charset arg + # imap.search("CHARSET UTF-8 OR UNSEEN FLAGGED SUBJECT foo") + # # ===== Search criteria # # For a full list of search criteria, @@ -1982,18 +2018,13 @@ def uid_expunge(uid_set) # # TO :: messages with in their TO field. # - # ===== For example: - # - # p imap.search(["SUBJECT", "hello", "NOT", "NEW"]) - # #=> [1, 6, 7, 8] - # # ===== Capabilities # - # If [CONDSTORE[https://www.rfc-editor.org/rfc/rfc7162.html]] is supported + # If CONDSTORE[https://www.rfc-editor.org/rfc/rfc7162.html] is supported # and enabled for the selected mailbox, a non-empty SearchResult will # include a +MODSEQ+ value. # imap.select("mbox", condstore: true) - # result = imap.search(["SUBJECT", "hi there", "not", "new") + # result = imap.search(["SUBJECT", "hi there", "not", "new"]) # #=> Net::IMAP::SearchResult[1, 6, 7, 8, modseq: 5594] # result.modseq # => 5594 def search(keys, charset = nil) @@ -2008,7 +2039,7 @@ def search(keys, charset = nil) # backward compatibility) but adds SearchResult#modseq when the +CONDSTORE+ # capability has been enabled. # - # See #search for documentation of search criteria. + # See #search for documentation of parameters. def uid_search(keys, charset = nil) return search_internal("UID SEARCH", keys, charset) end From a366a0d1deeaafacd434436489b2a08abb233fe4 Mon Sep 17 00:00:00 2001 From: nick evans Date: Thu, 7 Nov 2024 10:37:20 -0500 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9A=20Document=20all=20known=20sta?= =?UTF-8?q?ndard=20search=20keys?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than only list a subset of search keys, this documents them all. --- lib/net/imap.rb | 193 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 170 insertions(+), 23 deletions(-) diff --git a/lib/net/imap.rb b/lib/net/imap.rb index 494b258d..c52b2498 100644 --- a/lib/net/imap.rb +++ b/lib/net/imap.rb @@ -1977,46 +1977,193 @@ def uid_expunge(uid_set) # # criteria string contains charset arg # imap.search("CHARSET UTF-8 OR UNSEEN FLAGGED SUBJECT foo") # - # ===== Search criteria + # ===== Search keys # - # For a full list of search criteria, + # For full definitions of the standard search +criteria+, # see [{IMAP4rev1 §6.4.4}[https://www.rfc-editor.org/rfc/rfc3501.html#section-6.4.4]], # or [{IMAP4rev2 §6.4.4}[https://www.rfc-editor.org/rfc/rfc9051.html#section-6.4.4]], # in addition to documentation for - # any [CAPABILITIES[https://www.iana.org/assignments/imap-capabilities/imap-capabilities.xhtml]] - # reported by #capabilities which may define additional search filters, e.g: + # any #capabilities which may define additional search filters, such as # +CONDSTORE+, +WITHIN+, +FILTERS+, SEARCH=FUZZY, +OBJECTID+, or - # +SAVEDATE+. The following are some common search criteria: + # +SAVEDATE+. + # + # With the exception of sequence-set and parenthesized + # list, all search keys are composed of prefix label with zero or more + # arguments. The number and type of arguments is specific to each search + # key. + # + # +ALL+:: + # Matches every message in the mailbox. + # + # (_search-key_ _search-key_...):: + # Combines one or more _search-key_ arguments to match + # messages which match all contained search keys. Useful for +OR+, +NOT+, + # and other search keys with _search-key_ arguments. + # + # _Note:_ this search key has no label. + # + # +OR+ _search-key_ _search-key_:: + # Matches messages which match either _search-key_ argument. + # + # +NOT+ _search-key_:: + # Matches messages which do not match _search-key_. + # + # _sequence-set_:: + # Matches messages with message sequence numbers in _sequence-set_. + # + # _Note:_ this search key has no label. + # + # +UIDONLY+ must *not* be enabled. + # {[RFC9586]}[https://www.rfc-editor.org/rfc/rfc9586.html] + # + # +UID+ _sequence-set_:: + # Matches messages with a UID in _sequence-set_. + # + # +ANSWERED+:: + # +UNANSWERED+:: + # Matches messages with or without the \\Answered flag. + # +DELETED+:: + # +UNDELETED+:: + # Matches messages with or without the \\Deleted flag. + # +DRAFT+:: + # +UNDRAFT+:: + # Matches messages with or without the \\Draft flag. + # +FLAGGED+:: + # +UNFLAGGED+:: + # Matches messages with or without the \\Flagged flag. + # +SEEN+:: + # +UNSEEN+:: + # Matches messages with or without the \\Seen flag. + # + # +KEYWORD+ _keyword_:: + # +UNKEYWORD+ _keyword_:: + # Matches messages with or without the specified _keyword_. + # + # +BCC+ _substring_:: + # Matches when _substring_ is in the envelope's BCC field. + # +CC+ _substring_:: + # Matches when _substring_ is in the envelope's CC field. + # +FROM+ _substring_:: + # Matches when _substring_ is in the envelope's FROM field. + # +SUBJECT+ _substring_:: + # Matches when _substring_ is in the envelope's SUBJECT field. + # +TO+ _substring_:: + # Matches when _substring_ is in the envelope's TO field. + # + # +HEADER+ _field_ _substring_:: + # Matches when _substring_ is in the specified header _field_. + # + # +BODY+ _string_:: + # Matches when _string_ is in the body of the message. + # Does not match on header fields. + # + # The server _may_ use flexible matching, rather than simple substring + # matches. For example, this may use stemming or match only full words. + # + # +TEXT+ _string_:: + # Matches when _string_ is in the header or body of the message. + # + # The server _may_ use flexible matching, rather than simple substring + # matches. For example, this may use stemming or match only full words. + # + # +BEFORE+ _date_:: + # +ON+ _date_:: + # +SINCE+ _date_:: + # Matches when the +INTERNALDATE+ is earlier than, on, or later than + # _date_. + # + # +SENTBEFORE+ _date_:: + # +SENTON+ _date_:: + # +SENTSINCE+ _date_:: + # Matches when the +Date+ header is earlier than, on, or later than _date_. + # + # +SMALLER+ _bytes_:: + # +LARGER+ _bytes_:: + # Matches when +RFC822.SIZE+ is smaller/larger than _bytes_. + # + # ====== Removed from +IMAP4rev2+ + # + # The \\Recent flag has been removed from +IMAP4rev2+. So these + # search keys require the +IMAP4rev1+ capability. # - # :: a set of message sequence numbers. "," indicates - # an interval, "+:+" indicates a range. For instance, - # "2,10:12,15" means "2,10,11,12,15". + # +RECENT+:: + # +UNRECENT+:: + # Matches messages with or without the \\Recent flag. + # + # +NEW+:: + # Equivalent to (RECENT UNSEEN). + # + # ====== Extension search keys + # + # The search keys described below are defined by standard \IMAP extensions. + # + # +OLDER+ _interval_:: + # +YOUNGER+ _interval_:: + # Matches when +INTERNALDATE+ is more/less than _interval_ seconds ago. + # + # Requires the +WITHIN+ capability. + # {[RFC5032]}[https://www.rfc-editor.org/rfc/rfc5032.html] + # + # +ANNOTATION+ _entry_ _attr_ _value_:: + # Matches messages that have annotations with entries matching _entry_, + # attributes matching _attr_, and _value_ in the attribute's values. + # + # Requires the +ANNOTATE-EXPERIMENT-1+ capability. + # {[RFC5257]}[https://www.rfc-editor.org/rfc/rfc5257.html]. + # + # +FILTER+ _filter_:: + # References a _filter_ that is stored on the server and matches all + # messages which would be matched by that filter's search criteria. + # + # Requires the +FILTERS+ capability. + # {[RFC5466]}[https://www.rfc-editor.org/rfc/rfc5466.html#section-3.1] + # + # +FUZZY+ _search-key_:: + # Uses fuzzy matching for the specified search key. + # + # Requires the SEARCH=FUZZY capability. + # {[RFC6203]}[https://www.rfc-editor.org/rfc/rfc6203.html#section-6]. # - # BEFORE :: messages with an internal date strictly before - # . The date argument has a format similar - # to 8-Aug-2002, and can be formatted using - # Net::IMAP.format_date. + # +MODSEQ+ _modseq_:: + # Matches when +MODSEQ+ is greater than or equal to _modseq_. # - # BODY :: messages that contain within their body. + # Requires the +CONDSTORE+ capability. + # {[RFC7162]}[https://www.rfc-editor.org/rfc/rfc7162.html#section-3.1.5]. # - # CC :: messages containing in their CC field. + # +MODSEQ+ _entry_ _entry-type_ _modseq_:: + # Matches when a specific metadata _entry_ has been updated since + # _modseq_. # - # FROM :: messages that contain in their FROM field. + # For flags, the corresponding _entry_ name is + # "/flags/#{flag_name}", where _flag_name_ includes the + # \\ prefix. _entry-type_ can be one of "shared", + # "priv" (private), or "all". # - # NEW:: messages with the \Recent, but not the \Seen, flag set. + # Requires the +CONDSTORE+ capability. + # {[RFC7162]}[https://www.rfc-editor.org/rfc/rfc7162.html#section-3.1.5]. # - # NOT :: negate the following search key. + # +EMAILID+ _objectid_:: + # +THREADID+ _objectid_:: + # Matches when +EMAILID+/+THREADID+ is equal to _objectid_ + # (substring matches are not supported). # - # OR :: "or" two search keys together. + # Requires the +OBJECTID+ capability. + # {[RFC8474]}[https://www.rfc-editor.org/rfc/rfc8474.html#section-6] # - # ON :: messages with an internal date exactly equal to , - # which has a format similar to 8-Aug-2002. + # +SAVEDATESUPPORTED+:: + # Matches every message in the mailbox when the mailbox supports the save + # date attribute. Otherwise, it matches no messages. # - # SINCE :: messages with an internal date on or after . + # Requires the +SAVEDATE+ capability. + # {[RFC8514]}[https://www.rfc-editor.org/rfc/rfc8514.html#section-4.3] # - # SUBJECT :: messages with in their subject. + # +SAVEDBEFORE+ _date_:: + # +SAVEDON+ _date_:: + # +SAVEDSINCE+ _date_:: + # Matches when the save date is earlier than, on, or later than _date_. # - # TO :: messages with in their TO field. + # Requires the +SAVEDATE+ capability. + # {[RFC8514]}[https://www.rfc-editor.org/rfc/rfc8514.html#section-4.3] # # ===== Capabilities #