From 9241d371887d72dc735c44c7d2bf2a01912e058b Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 31 Jul 2019 17:31:09 +0100 Subject: [PATCH 01/25] Add servlets, finish /hash_details, start /lookup --- sydent/http/httpserver.py | 6 ++ sydent/http/servlets/__init__.py | 2 +- sydent/http/servlets/hashdetailsservlet.py | 70 ++++++++++++++++ sydent/http/servlets/lookupv2servlet.py | 97 ++++++++++++++++++++++ sydent/http/servlets/v2_servlet.py | 39 +++++++++ sydent/sydent.py | 28 +++++++ 6 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 sydent/http/servlets/hashdetailsservlet.py create mode 100644 sydent/http/servlets/lookupv2servlet.py create mode 100644 sydent/http/servlets/v2_servlet.py diff --git a/sydent/http/httpserver.py b/sydent/http/httpserver.py index 0317f824..754bff3c 100644 --- a/sydent/http/httpserver.py +++ b/sydent/http/httpserver.py @@ -38,6 +38,7 @@ def __init__(self, sydent): identity = Resource() api = Resource() v1 = self.sydent.servlets.v1 + v2 = self.sydent.servlets.v2 validate = Resource() email = Resource() @@ -51,6 +52,8 @@ def __init__(self, sydent): lookup = self.sydent.servlets.lookup bulk_lookup = self.sydent.servlets.bulk_lookup + lookup_v2 = self.sydent.servlets.lookup_v2 + threepid = Resource() bind = self.sydent.servlets.threepidBind unbind = self.sydent.servlets.threepidUnbind @@ -63,6 +66,7 @@ def __init__(self, sydent): root.putChild('_matrix', matrix) matrix.putChild('identity', identity) identity.putChild('api', api) + identity.putChild('v2', v2) api.putChild('v1', v1) v1.putChild('validate', validate) @@ -93,6 +97,8 @@ def __init__(self, sydent): v1.putChild('sign-ed25519', self.sydent.servlets.blindlySignStuffServlet) + v2.putChild('lookup', lookup_v2) + self.factory = Site(root) self.factory.displayTracebacks = False diff --git a/sydent/http/servlets/__init__.py b/sydent/http/servlets/__init__.py index 9e2eabb4..68007d9d 100644 --- a/sydent/http/servlets/__init__.py +++ b/sydent/http/servlets/__init__.py @@ -20,7 +20,7 @@ def get_args(request, required_args): """ - Helper function to get arguments for an HTTP request + Helper function to get arguments for an HTTP request. Currently takes args from the top level keys of a json object or www-form-urlencoded for backwards compatability. Returns a tuple (error, args) where if error is non-null, diff --git a/sydent/http/servlets/hashdetailsservlet.py b/sydent/http/servlets/hashdetailsservlet.py new file mode 100644 index 00000000..d2ad5357 --- /dev/null +++ b/sydent/http/servlets/hashdetailsservlet.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.web.resource import Resource +from sydent.db.threepid_associations import GlobalAssociationStore +from sydent.util.tokenutils import generateAlphanumericTokenOfLength + +import logging +import json +import signedjson.sign + +from sydent.http.servlets import get_args, jsonwrap, send_cors + + +logger = logging.getLogger(__name__) + + +class HashDetailsServlet(Resource): + isLeaf = True + + def __init__(self, syd): + self.sydent = syd + self.known_algorithms = ["sha256", "none"] + + def render_GET(self, request): + """ + Return the hashing algorithms and pepper that this IS supports. + Whether the response includes the "none" algorithm is determined by a + config option. The pepper included in the response is also set by the + config, and generated if one is not set. + + Returns: An object containing an array of hashing algorithms the + server supports, and a `lookup_pepper` field, which is a + server-defined value that the client should include in the 3PID + information before hashing. + """ + send_cors(request) + + # Determine what hashing algorithms have been enabled + algorithms = self.sydent.config.get("hashing", "algorithms") + if not algorithms: + # Default response + algorithms = ["sha256"] + + # A lookup_pepper is defined in the config, otherwise it is generated + lookup_pepper = self.sydent.config.get("hashing", "lookup_pepper") + + return { + "algorithms": algorithms, + "lookup_pepper": lookup_pepper, + } + + @jsonwrap + def render_OPTIONS(self, request): + send_cors(request) + request.setResponseCode(200) + return {} diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py new file mode 100644 index 00000000..133fea66 --- /dev/null +++ b/sydent/http/servlets/lookupv2servlet.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.web.resource import Resource +from sydent.db.threepid_associations import GlobalAssociationStore +from sydent.http.servlets.hashdetailsservlet import HashDetailsServlet + +import logging +import json +import signedjson.sign + +from sydent.http.servlets import get_args, jsonwrap, send_cors + + +logger = logging.getLogger(__name__) + + +class LookupV2Servlet(Resource): + isLeaf = True + + def __init__(self, syd): + self.sydent = syd + + def render_POST(self, request): + """ + Perform lookups with potentially hashed 3PID details. + + Depending on our response to /hash_details, the client will chosoe a + hash algorithm and pepper, hash the 3PIDs it wants to lookup, and + send them to us, along with the algorithm and pepper it used. + + We first check this algorithm/pepper combo matches what we expect, + then compare the 3PID details to what we have in the database. + + Params: A JSON object containing the following keys: + * 'addresses': List of hashed/plaintext (depending on the + algorithm) 3PID addresses and mediums. + * 'algorithm': The algorithm the client has used to process + the 3PIDs. + * 'pepper': The pepper the client has attached to the 3PIDs. + Returns: Object with key 'mappings', which is a dictionary of results + where each result is a key/value pair of what the client sent, and + the matching Matrix User ID that claims to own that 3PID. + + User IDs for which no mapping is found are omitted. + """ + send_cors(request) + err, args = get_args(request, ('addresses', 'algorithm', 'pepper')) + if err: + return json.dumps(err) + + addresses = args['addresses'] + if not isinstance(addresses, list): + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PARAM', 'error': 'addresses must be a list'}, None + + algorithm = args['algorithm'] + if not isinstance(algorithm, str): + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm must be a string'}, None + if algorithm not in self.sydent.config.get("hashing", "algorithms"): + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}, None + + pepper = args['pepper'] + if not isinstance(pepper, str): + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PARAM', 'error': 'pepper must be a string'}, None + if pepper != self.sydent.config.get("hashing", "lookup_pepper"): + request.setResponseCode(400) + return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match server's"}, None + + logger.info("Lookup of %d threepids with algorithm", len(addresses), algorithm) + + # TODO: Matching. Make sure to generate hashes in the DB. Use migrations to generate hashes for existing 3PIDs? + + return json.dumps({ 'mappings': results }) + + + @jsonwrap + def render_OPTIONS(self, request): + send_cors(request) + request.setResponseCode(200) + return {} diff --git a/sydent/http/servlets/v2_servlet.py b/sydent/http/servlets/v2_servlet.py new file mode 100644 index 00000000..0e6b630d --- /dev/null +++ b/sydent/http/servlets/v2_servlet.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.web.resource import Resource + +from sydent.http.servlets import jsonwrap, send_cors + + +class V2Servlet(Resource): + isLeaf = False + + def __init__(self, syd): + Resource.__init__(self) + self.sydent = syd + + @jsonwrap + def render_GET(self, request): + send_cors(request) + request.setResponseCode(200) + return {} + + @jsonwrap + def render_OPTIONS(self, request): + send_cors(request) + request.setResponseCode(200) + return {} diff --git a/sydent/sydent.py b/sydent/sydent.py index e5fe6458..42cda037 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -44,6 +44,8 @@ from http.servlets.msisdnservlet import MsisdnRequestCodeServlet, MsisdnValidateCodeServlet from http.servlets.lookupservlet import LookupServlet from http.servlets.bulklookupservlet import BulkLookupServlet +from http.servlets.lookupv2servlet import LookupV2Servlet +from http.servlets.hashdetailsservlet import HashDetailsServlet from http.servlets.pubkeyservlets import Ed25519Servlet from http.servlets.threepidbindservlet import ThreePidBindServlet from http.servlets.threepidunbindservlet import ThreePidUnbindServlet @@ -51,6 +53,7 @@ from http.servlets.getvalidated3pidservlet import GetValidated3pidServlet from http.servlets.store_invite_servlet import StoreInviteServlet from http.servlets.v1_servlet import V1Servlet +from http.servlets.v2_servlet import V2Servlet from db.valsession import ThreePidValSessionStore @@ -108,6 +111,9 @@ 'crypto': { 'ed25519.signingkey': '', }, + 'hashing': { + 'algorithms': ['sha256'], + } } @@ -174,6 +180,26 @@ def sighup(signum, stack): addr=self.cfg.get("general", "prometheus_addr"), ) + if self.cfg.has_option("hashing", "algorithms"): + algorithms = self.cfg.get("hashing", "algorithms") + if not isinstance(algorithms, list): + logger.fatal("Config file option hashing.algorithms is not an array") + + # Ensure provided hash algorithms are known + for algorithm in algorithms: + if algorithm not in HashDetailsServlet.known_algorithms: + logger.fatal( + "Config file option hashing.algorithms contains unknown algorithm '%s'.", + algorithm, + ) + + # Determine whether a lookup_pepper value has been defined + lookup_pepper = self.cfg.get("hashing", "lookup_pepper") + if not lookup_pepper: + # If lookup_pepper hasn't been defined, or is an empty string, + # generate one + self.cfg.set("hashing", "lookup_pepper", generateAlphanumericTokenOfLength(5)) + self.validators = Validators() self.validators.email = EmailValidator(self) self.validators.msisdn = MsisdnValidator(self) @@ -186,12 +212,14 @@ def sighup(signum, stack): self.servlets = Servlets() self.servlets.v1 = V1Servlet(self) + self.servlets.v2 = V2Servlet(self) self.servlets.emailRequestCode = EmailRequestCodeServlet(self) self.servlets.emailValidate = EmailValidateCodeServlet(self) self.servlets.msisdnRequestCode = MsisdnRequestCodeServlet(self) self.servlets.msisdnValidate = MsisdnValidateCodeServlet(self) self.servlets.lookup = LookupServlet(self) self.servlets.bulk_lookup = BulkLookupServlet(self) + self.servlets.lookup_v2 = LookupV2Servlet(self) self.servlets.pubkey_ed25519 = Ed25519Servlet(self) self.servlets.pubkeyIsValid = PubkeyIsValidServlet(self) self.servlets.ephemeralPubkeyIsValid = EphemeralPubkeyIsValidServlet(self) From 4fa953b25b4b57eeefceba99ed8b83353155fe2d Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 1 Aug 2019 18:22:12 +0100 Subject: [PATCH 02/25] Add db portions and lookup_pepper db interaction --- sydent/db/threepid_associations.py | 59 ++++++++++++++++++++++++- sydent/db/threepid_associations.sql | 33 +++++++++++++- sydent/http/servlets/lookupv2servlet.py | 2 +- sydent/sydent.py | 16 +++++-- 4 files changed, 103 insertions(+), 7 deletions(-) diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index f6f96cb2..c6d71e10 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -180,7 +180,7 @@ def getMxids(self, threepid_tuples): def addAssociation(self, assoc, rawSgAssoc, originServer, originId, commit=True): """ :param assoc: (sydent.threepid.GlobalThreepidAssociation) The association to add as a high level object - :param sgAssoc The original raw bytes of the signed association + :param sgAssoc: The original raw bytes of the signed association :return: """ cur = self.sydent.db.cursor() @@ -215,3 +215,60 @@ def removeAssociation(self, medium, address): cur.rowcount, medium, address, ) self.sydent.db.commit() + +class HashingMetadataStore: + def __init__(self, sydent): + self.sydent = sydent + + def retrieve_value(self, name) + """Return a value from the hashing_metadata table + + :param name: The name of the db column to return the value for + :type name: str + + :returns a value corresponding to the specified name, or None if a + value does not exist + """ + cur = self.sydent.db.cursor() + res = cur.execute("select %s from hashing_metadata" % name) + row = res.fetchone() + + if not row: + return None + + return row[0] + + def is_new(self, name, value): + """ + Returns whether a provided value does NOT match a value stored in the + database under the specified db column name + + :param name: The name of the db column to check + :type name: str + + :param value: The value to check against + + :returns a boolean that is true if the the provided value and the + value of the item under the named db column is different + :rtype: bool + """ + db_value = self.retrieve_value(name) + if not value: + return False + return value != db_value + + def store_values(self, names_and_values): + """Stores values in the hashing_metadata table under the named columns + + :param names_and_values: Column names and associated values to store + in the database + :type names_and_values: Dict + """ + cur = self.sydent.db.cursor() + + columns = ', '.join(names_and_values.keys()) + values = ', '.join('?' * len(names_and_values)) + sql = 'INSERT INTO hashing_metadata ({}) VALUES ({})'.format(columns, values) + + cur.execute(sql) + self.sydent.db.commit() diff --git a/sydent/db/threepid_associations.sql b/sydent/db/threepid_associations.sql index 2e17777e..bcc10ea2 100644 --- a/sydent/db/threepid_associations.sql +++ b/sydent/db/threepid_associations.sql @@ -14,10 +14,39 @@ See the License for the specific language governing permissions and limitations under the License. */ -CREATE TABLE IF NOT EXISTS local_threepid_associations (id integer primary key, medium varchar(16) not null, address varchar(256) not null, mxid varchar(256) not null, ts integer not null, notBefore bigint not null, notAfter bigint not null); +CREATE TABLE IF NOT EXISTS local_threepid_associations ( + id integer primary key, + medium varchar(16) not null, + address varchar(256) not null, + hash varchar(256) not null, + mxid varchar(256) not null, + ts integer not null, + notBefore bigint not null, + notAfter bigint not null +); CREATE UNIQUE INDEX IF NOT EXISTS medium_address on local_threepid_associations(medium, address); -CREATE TABLE IF NOT EXISTS global_threepid_associations (id integer primary key, medium varchar(16) not null, address varchar(256) not null, mxid varchar(256) not null, ts integer not null, notBefore bigint not null, notAfter integer not null, originServer varchar(255) not null, originId integer not null, sgAssoc text not null); +CREATE TABLE IF NOT EXISTS global_threepid_associations ( + id integer primary key, + medium varchar(16) not null, + address varchar(256) not null, + hash varchar(256) not null, + mxid varchar(256) not null, + ts integer not null, + notBefore bigint not null, + notAfter integer not null, + originServer varchar(255) not null, + originId integer not null, + sgAssoc text not null +); CREATE INDEX IF NOT EXISTS medium_address on global_threepid_associations (medium, address); CREATE INDEX IF NOT EXISTS medium_lower_address on global_threepid_associations (medium, lower(address)); CREATE UNIQUE INDEX IF NOT EXISTS originServer_originId on global_threepid_associations (originServer, originId); + +/* + * hashing_metadata contains information needed for the identity server to carry + * out tasks related to hashing. Salts and peppers etc. should go here. + */ +CREATE TABLE IF NOT EXISTS hashing_metadata ( + lookup_pepper varchar(256) not null, +); diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 133fea66..64773270 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -85,7 +85,7 @@ def render_POST(self, request): logger.info("Lookup of %d threepids with algorithm", len(addresses), algorithm) - # TODO: Matching. Make sure to generate hashes in the DB. Use migrations to generate hashes for existing 3PIDs? + return json.dumps({ 'mappings': results }) diff --git a/sydent/sydent.py b/sydent/sydent.py index 42cda037..0fcbb462 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -56,6 +56,7 @@ from http.servlets.v2_servlet import V2Servlet from db.valsession import ThreePidValSessionStore +from db.threepid_associations import HashingMetadataStore from threepid.bind import ThreepidBinder @@ -195,10 +196,19 @@ def sighup(signum, stack): # Determine whether a lookup_pepper value has been defined lookup_pepper = self.cfg.get("hashing", "lookup_pepper") + # If lookup_pepper hasn't been defined, or is an empty string... if not lookup_pepper: - # If lookup_pepper hasn't been defined, or is an empty string, - # generate one - self.cfg.set("hashing", "lookup_pepper", generateAlphanumericTokenOfLength(5)) + # ...and it is not defined in the database... + if HashingMetadataStore.is_new("lookup_pepper", lookup_pepper): + new_pepper = generateAlphanumericTokenOfLength(5) + # ...then generate one + self.cfg.set( + "hashing", "lookup_pepper", + new_pepper, + ) + + # Store it in the DB + HashingMetadataStore.store_values({"lookup_pepper": new_pepper}) self.validators = Validators() self.validators.email = EmailValidator(self) From 965d7ae029e1791928e5ce17a16ada02b45d3712 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 5 Aug 2019 14:17:33 +0100 Subject: [PATCH 03/25] Rehash 3pids on pepper change. DB schema migrations --- sydent/db/sqlitedb.py | 17 ++++++++++++++ sydent/db/threepid_associations.py | 36 +++++++++++++++++++++++++++++ sydent/db/threepid_associations.sql | 2 ++ sydent/sydent.py | 33 ++++++++++++++++++++++---- 4 files changed, 84 insertions(+), 4 deletions(-) diff --git a/sydent/db/sqlitedb.py b/sydent/db/sqlitedb.py index 427f83d2..50819f52 100644 --- a/sydent/db/sqlitedb.py +++ b/sydent/db/sqlitedb.py @@ -132,6 +132,7 @@ def _upgradeSchema(self): self.db.commit() logger.info("v0 -> v1 schema migration complete") self._setSchemaVersion(1) + if curVer < 2: cur = self.db.cursor() cur.execute("CREATE INDEX threepid_validation_sessions_mtime ON threepid_validation_sessions(mtime)") @@ -139,6 +140,22 @@ def _upgradeSchema(self): logger.info("v1 -> v2 schema migration complete") self._setSchemaVersion(2) + if curVer < 3: + cur = self.db.cursor() + cur.execute("ALTER local_threepid_associations ADD COLUMN hash VARCHAR(256) NOT NULL") + cur.execute("CREATE INDEX IF NOT EXISTS hash_medium on local_threepid_associations (hash, medium))") + + cur.execute("ALTER global_threepid_associations ADD COLUMN hash VARCHAR(256) NOT NULL") + cur.execute("CREATE INDEX IF NOT EXISTS hash_medium on global_threepid_associations (hash, medium)") + + cur.execute( + "CREATE TABLE IF NOT EXISTS hashing_metadata " + "(lookup_pepper varchar(256) not null)" + ) + self.db.commit() + logger.info("v2 -> v3 schema migration complete") + self._setSchemaVersion(3) + def _getSchemaVersion(self): cur = self.db.cursor() res = cur.execute("PRAGMA user_version"); diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index c6d71e10..4bc4d136 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -272,3 +272,39 @@ def store_values(self, names_and_values): cur.execute(sql) self.sydent.db.commit() + + def rehash_threepids(self, hashing_function, pepper): + """Rehash all 3PIDs using a given hashing_function and pepper + + :param hashing_function: A function with single input and output strings + :type hashing_function func(str) -> str + + :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing. + :type pepper: str + """ + # Pull items from the database + cur = self.sydent.db.cursor() + + # Medium/address combos are marked as UNIQUE in the database + sql = "SELECT medium, address FROM local_threepid_associations" + res = cur.execute(sql) + rows = res.fetchall() + + for medium, address in rows: + # Combine the medium, address and pepper together in the following form: + # "address medium pepper" + # According to MSC2134: https://github.com/matrix-org/matrix-doc/blob/hs/hash-identity/proposals/2134-identity-hash-lookup.md + combo = "%s %s %s" % (address, medium, pepper) + + # Hash the resulting string + result = hashing_function(combo) + + # Save the result to the DB + sql = ( + "UPDATE local_threepid_associations SET hash = '%s' " + "WHERE medium = %s AND address = %s" + % (result, medium, address) + ) + cur.execute(sql) + + self.sydent.db.commit() diff --git a/sydent/db/threepid_associations.sql b/sydent/db/threepid_associations.sql index bcc10ea2..68cd5835 100644 --- a/sydent/db/threepid_associations.sql +++ b/sydent/db/threepid_associations.sql @@ -24,6 +24,7 @@ CREATE TABLE IF NOT EXISTS local_threepid_associations ( notBefore bigint not null, notAfter bigint not null ); +CREATE INDEX IF NOT EXISTS hash_medium on local_threepid_associations (hash, medium)); CREATE UNIQUE INDEX IF NOT EXISTS medium_address on local_threepid_associations(medium, address); CREATE TABLE IF NOT EXISTS global_threepid_associations ( @@ -39,6 +40,7 @@ CREATE TABLE IF NOT EXISTS global_threepid_associations ( originId integer not null, sgAssoc text not null ); +CREATE INDEX IF NOT EXISTS hash_medium on global_threepid_associations (hash, medium); CREATE INDEX IF NOT EXISTS medium_address on global_threepid_associations (medium, address); CREATE INDEX IF NOT EXISTS medium_lower_address on global_threepid_associations (medium, lower(address)); CREATE UNIQUE INDEX IF NOT EXISTS originServer_originId on global_threepid_associations (originServer, originId); diff --git a/sydent/sydent.py b/sydent/sydent.py index 0fcbb462..c7dc01ec 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -38,6 +38,8 @@ from validators.msisdnvalidator import MsisdnValidator from hs_federation.verifier import Verifier +from util.hash import sha256_and_url_safe_base64 + from sign.ed25519 import SydentEd25519 from http.servlets.emailservlet import EmailRequestCodeServlet, EmailValidateCodeServlet @@ -198,18 +200,41 @@ def sighup(signum, stack): lookup_pepper = self.cfg.get("hashing", "lookup_pepper") # If lookup_pepper hasn't been defined, or is an empty string... if not lookup_pepper: - # ...and it is not defined in the database... - if HashingMetadataStore.is_new("lookup_pepper", lookup_pepper): + # See if it exists in the database + lookup_pepper_db = HashingMetadataStore.retrieve_value("lookup_pepper") + if lookup_pepper_db: + # A pepper already exists, use it + self.cfg.set("hashing", "lookup_pepper", lookup_pepper_db) + else: + # No pepper defined and there isn't one in the database + # Generate one new_pepper = generateAlphanumericTokenOfLength(5) - # ...then generate one + + # Cache it self.cfg.set( "hashing", "lookup_pepper", new_pepper, ) - # Store it in the DB + # Store it in the database HashingMetadataStore.store_values({"lookup_pepper": new_pepper}) + # Re-hash all 3pids + HashingMetadataStore.rehash_threepids( + sha256_and_url_safe_base64, new_pepper, + ) + else: + # If it has been defined, check if it's different from what we have + # in the database + if HashingMetadataStore.is_new("lookup_pepper", lookup_pepper): + # Store the new pepper in the database + HashingMetadataStore.store_values({"lookup_pepper": lookup_pepper}) + + # Re-hash all 3pids + HashingMetadataStore.rehash_threepids( + sha256_and_url_safe_base64, lookup_pepper, + ) + self.validators = Validators() self.validators.email = EmailValidator(self) self.validators.msisdn = MsisdnValidator(self) From 30e679bb9f03c24f9bb69c3997d078e377815940 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 5 Aug 2019 14:40:42 +0100 Subject: [PATCH 04/25] Move HashingMetadataStore to its own file --- sydent/db/hashing_metadata.py | 123 +++++++++++++++++++++++++++++ sydent/db/threepid_associations.py | 93 ---------------------- sydent/sydent.py | 2 +- sydent/util/hash.py | 24 ++++++ 4 files changed, 148 insertions(+), 94 deletions(-) create mode 100644 sydent/db/hashing_metadata.py create mode 100644 sydent/util/hash.py diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py new file mode 100644 index 00000000..9a03f39e --- /dev/null +++ b/sydent/db/hashing_metadata.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class HashingMetadataStore: + def __init__(self, sydent): + self.sydent = sydent + + def retrieve_value(self, name) + """Return a value from the hashing_metadata table + + :param name: The name of the db column to return the value for + :type name: str + + :returns a value corresponding to the specified name, or None if a + value does not exist + """ + cur = self.sydent.db.cursor() + res = cur.execute("select %s from hashing_metadata" % name) + row = res.fetchone() + + if not row: + return None + + return row[0] + + def is_new(self, name, value): + """ + Returns whether a provided value does NOT match a value stored in the + database under the specified db column name + + :param name: The name of the db column to check + :type name: str + + :param value: The value to check against + + :returns a boolean that is true if the the provided value and the + value of the item under the named db column is different + :rtype: bool + """ + db_value = self.retrieve_value(name) + if not value: + return False + return value != db_value + + def store_values(self, names_and_values): + """Stores values in the hashing_metadata table under the named columns + + :param names_and_values: Column names and associated values to store + in the database + :type names_and_values: Dict + """ + cur = self.sydent.db.cursor() + + columns = ', '.join(names_and_values.keys()) + values = ', '.join('?' * len(names_and_values)) + sql = 'INSERT INTO hashing_metadata ({}) VALUES ({})'.format(columns, values) + + cur.execute(sql) + self.sydent.db.commit() + + def rehash_threepids(self, hashing_function, pepper): + """Rehash all 3PIDs using a given hashing_function and pepper + + :param hashing_function: A function with single input and output strings + :type hashing_function func(str) -> str + + :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing. + :type pepper: str + """ + self._rehash_threepids(hashing_function, pepper, "local_threepid_associations") + self._rehash_threepids(hashing_function, pepper, "global_threepid_associations") + + def _rehash_threepids(self, hashing_function, pepper, table): + """Rehash 3PIDs of a given table using a given hashing_function and pepper + + :param hashing_function: A function with single input and output strings + :type hashing_function func(str) -> str + + :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing. + :type pepper: str + + :param table: The database table to perform the rehashing on + :type table: str + """ + # Pull items from the database + cur = self.sydent.db.cursor() + + # Medium/address combos are marked as UNIQUE in the database + sql = "SELECT medium, address FROM %s" % table + res = cur.execute(sql) + rows = res.fetchall() + + for medium, address in rows: + # Combine the medium, address and pepper together in the following form: + # "address medium pepper" + # According to MSC2134: https://github.com/matrix-org/matrix-doc/blob/hs/hash-identity/proposals/2134-identity-hash-lookup.md + combo = "%s %s %s" % (address, medium, pepper) + + # Hash the resulting string + result = hashing_function(combo) + + # Save the result to the DB + sql = ( + "UPDATE %s SET hash = '%s' " + "WHERE medium = %s AND address = %s" + % (table, result, medium, address) + ) + cur.execute(sql) + + self.sydent.db.commit() diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index 4bc4d136..4948352f 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -215,96 +215,3 @@ def removeAssociation(self, medium, address): cur.rowcount, medium, address, ) self.sydent.db.commit() - -class HashingMetadataStore: - def __init__(self, sydent): - self.sydent = sydent - - def retrieve_value(self, name) - """Return a value from the hashing_metadata table - - :param name: The name of the db column to return the value for - :type name: str - - :returns a value corresponding to the specified name, or None if a - value does not exist - """ - cur = self.sydent.db.cursor() - res = cur.execute("select %s from hashing_metadata" % name) - row = res.fetchone() - - if not row: - return None - - return row[0] - - def is_new(self, name, value): - """ - Returns whether a provided value does NOT match a value stored in the - database under the specified db column name - - :param name: The name of the db column to check - :type name: str - - :param value: The value to check against - - :returns a boolean that is true if the the provided value and the - value of the item under the named db column is different - :rtype: bool - """ - db_value = self.retrieve_value(name) - if not value: - return False - return value != db_value - - def store_values(self, names_and_values): - """Stores values in the hashing_metadata table under the named columns - - :param names_and_values: Column names and associated values to store - in the database - :type names_and_values: Dict - """ - cur = self.sydent.db.cursor() - - columns = ', '.join(names_and_values.keys()) - values = ', '.join('?' * len(names_and_values)) - sql = 'INSERT INTO hashing_metadata ({}) VALUES ({})'.format(columns, values) - - cur.execute(sql) - self.sydent.db.commit() - - def rehash_threepids(self, hashing_function, pepper): - """Rehash all 3PIDs using a given hashing_function and pepper - - :param hashing_function: A function with single input and output strings - :type hashing_function func(str) -> str - - :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing. - :type pepper: str - """ - # Pull items from the database - cur = self.sydent.db.cursor() - - # Medium/address combos are marked as UNIQUE in the database - sql = "SELECT medium, address FROM local_threepid_associations" - res = cur.execute(sql) - rows = res.fetchall() - - for medium, address in rows: - # Combine the medium, address and pepper together in the following form: - # "address medium pepper" - # According to MSC2134: https://github.com/matrix-org/matrix-doc/blob/hs/hash-identity/proposals/2134-identity-hash-lookup.md - combo = "%s %s %s" % (address, medium, pepper) - - # Hash the resulting string - result = hashing_function(combo) - - # Save the result to the DB - sql = ( - "UPDATE local_threepid_associations SET hash = '%s' " - "WHERE medium = %s AND address = %s" - % (result, medium, address) - ) - cur.execute(sql) - - self.sydent.db.commit() diff --git a/sydent/sydent.py b/sydent/sydent.py index c7dc01ec..3ab89892 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -58,7 +58,7 @@ from http.servlets.v2_servlet import V2Servlet from db.valsession import ThreePidValSessionStore -from db.threepid_associations import HashingMetadataStore +from db.hashing_metadata import HashingMetadataStore from threepid.bind import ThreepidBinder diff --git a/sydent/util/hash.py b/sydent/util/hash.py new file mode 100644 index 00000000..edf9b211 --- /dev/null +++ b/sydent/util/hash.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import unpaddedbase64 + + +def sha256_and_url_safe_base64(input_text): + """SHA256 hash an input string, encode it as url-safe base64, and return it""" + digest = hashlib.sha256(input_text.encode()).digest() + return unpaddedbase64.encode_base64(digest, urlsafe=True) From 7527c450c688c4408f4d29c1a6997d9af414cfa6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 5 Aug 2019 16:04:18 +0100 Subject: [PATCH 05/25] Complete lookup --- sydent/db/hashing_metadata.py | 1 + sydent/db/threepid_associations.py | 28 +++++++++++++++++++++++- sydent/http/servlets/lookupservlet.py | 1 + sydent/http/servlets/lookupv2servlet.py | 29 +++++++++++++++++++++---- sydent/http/srvresolver.py | 2 +- 5 files changed, 55 insertions(+), 6 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 9a03f39e..4897cf68 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -103,6 +103,7 @@ def _rehash_threepids(self, hashing_function, pepper, table): res = cur.execute(sql) rows = res.fetchall() + # TODO: Do this in batches for medium, address in rows: # Combine the medium, address and pepper together in the following form: # "address medium pepper" diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index 4948352f..8e57d97e 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -138,6 +138,16 @@ def getMxid(self, medium, address): return row[0] def getMxids(self, threepid_tuples): + """Given a list of threepid_tuples, the same list but with MXIDs + appended to each tuple for which a match was found in the database + for. Output is ordered by medium, address, timestamp DESC + + :param threepid_tuples: List containing (medium, address) tuples + :type threepid_tuples: [(str, str)] + + :returns a list of (medium, address, mxid) tuples + :rtype [(str, str, str)] + """ cur = self.sydent.db.cursor() cur.execute("CREATE TEMPORARY TABLE tmp_getmxids (medium VARCHAR(16), address VARCHAR(256))"); @@ -181,7 +191,6 @@ def addAssociation(self, assoc, rawSgAssoc, originServer, originId, commit=True) """ :param assoc: (sydent.threepid.GlobalThreepidAssociation) The association to add as a high level object :param sgAssoc: The original raw bytes of the signed association - :return: """ cur = self.sydent.db.cursor() res = cur.execute("insert or ignore into global_threepid_associations " @@ -215,3 +224,20 @@ def removeAssociation(self, medium, address): cur.rowcount, medium, address, ) self.sydent.db.commit() + + def retrieveMxidFromHash(self, input_hash): + """Returns an mxid from a given hash value + + :param input_hash: The hash string to lookup in the database + :type input_hash: str + + :returns the MXID relating to the hash if one is found, otherwise None + :rtype: str|None + """ + cur = self.sydent.db.cursor() + + res = cur.execute("SELECT mxid WHERE hash = ?", (input_hash,)) + row = res.fetchone() + if not row: + return None + return row[0] diff --git a/sydent/http/servlets/lookupservlet.py b/sydent/http/servlets/lookupservlet.py index 4ff92af7..6927a72a 100644 --- a/sydent/http/servlets/lookupservlet.py +++ b/sydent/http/servlets/lookupservlet.py @@ -36,6 +36,7 @@ def __init__(self, syd): def render_GET(self, request): """ Look up an individual threepid. + ** DEPRECATED ** Params: 'medium': the medium of the threepid 'address': the address of the threepid Returns: A signed association if the threepid has a corresponding mxid, otherwise the empty object. diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 64773270..b8cb3148 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -15,15 +15,13 @@ # limitations under the License. from twisted.web.resource import Resource -from sydent.db.threepid_associations import GlobalAssociationStore -from sydent.http.servlets.hashdetailsservlet import HashDetailsServlet import logging import json import signedjson.sign from sydent.http.servlets import get_args, jsonwrap, send_cors - +from sydent.db.threepid_associations import GlobalAssociationStore logger = logging.getLogger(__name__) @@ -33,6 +31,7 @@ class LookupV2Servlet(Resource): def __init__(self, syd): self.sydent = syd + self.globalAssociationStore = GlobalAssociationStore(self.sydent) def render_POST(self, request): """ @@ -84,11 +83,33 @@ def render_POST(self, request): return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match server's"}, None logger.info("Lookup of %d threepids with algorithm", len(addresses), algorithm) + if algorithm == "none": + # Lookup without hashing + medium_address_tuples = [] + for medium_and_address in addresses: + # Parse medium, address components + # Being careful to account for 3PIDs one day having spaces in the address + split_input = medium_and_address.split() + (address, medium) = (' '.join(split_input[:-1]), split_input[-1]) + + medium_address_tuples.append((medium, address)) + + # Lookup the mxids + medium_address_mxid_tuples = GlobalAssociationStore.getMxids(medium_address_tuples) + return json.dumps({'mappings': {x[0]: x[2] for x in medium_address_mxid_tuples}}) + elif algorithm == "sha256": + # Lookup using SHA256 with URL-safe base64 encoding + mappings = {} + for h in addresses: + mxid = self.globalAssociationStore.retrieveMxidFromHash(h) + if mxid: + mappings[h] = mxid - return json.dumps({ 'mappings': results }) + return json.dumps({'mappings': mappings}) + return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}, None @jsonwrap def render_OPTIONS(self, request): diff --git a/sydent/http/srvresolver.py b/sydent/http/srvresolver.py index 4664c99a..01410df4 100644 --- a/sydent/http/srvresolver.py +++ b/sydent/http/srvresolver.py @@ -108,7 +108,7 @@ def resolve_service(self, service_name): :type service_name: bytes :returns a list of the SRV records, or an empty list if none found. - :rtype: Deferred[list[Server]] + :rtype: Deferred[[Server]] """ now = int(self._get_time()) From 3ae1d09ef0a48fe10e2f64b09f8907d601c22003 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 5 Aug 2019 16:06:53 +0100 Subject: [PATCH 06/25] Batch updating --- sydent/db/hashing_metadata.py | 39 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 4897cf68..2c9cb58f 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -103,22 +103,25 @@ def _rehash_threepids(self, hashing_function, pepper, table): res = cur.execute(sql) rows = res.fetchall() - # TODO: Do this in batches - for medium, address in rows: - # Combine the medium, address and pepper together in the following form: - # "address medium pepper" - # According to MSC2134: https://github.com/matrix-org/matrix-doc/blob/hs/hash-identity/proposals/2134-identity-hash-lookup.md - combo = "%s %s %s" % (address, medium, pepper) - - # Hash the resulting string - result = hashing_function(combo) - - # Save the result to the DB - sql = ( - "UPDATE %s SET hash = '%s' " - "WHERE medium = %s AND address = %s" - % (table, result, medium, address) - ) - cur.execute(sql) + batch_size = 500 + count = 0 + while count < len(rows): + for medium, address in rows[count:count+batch_size]: + # Combine the medium, address and pepper together in the following form: + # "address medium pepper" + # According to MSC2134: https://github.com/matrix-org/matrix-doc/blob/hs/hash-identity/proposals/2134-identity-hash-lookup.md + combo = "%s %s %s" % (address, medium, pepper) + + # Hash the resulting string + result = hashing_function(combo) + + # Save the result to the DB + sql = ( + "UPDATE %s SET hash = '%s' " + "WHERE medium = %s AND address = %s" + % (table, result, medium, address) + ) + cur.execute(sql) + + self.sydent.db.commit() - self.sydent.db.commit() From 76a651ac21e96fdfff8ef6b3362514527d87df56 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 5 Aug 2019 17:31:33 +0100 Subject: [PATCH 07/25] Hash assocs as they come in --- sydent/db/hashing_metadata.py | 1 - sydent/db/threepid_associations.py | 12 ++++++------ sydent/http/servlets/lookupv2servlet.py | 6 ++---- sydent/replication/peer.py | 8 ++++++++ sydent/threepid/__init__.py | 14 +++++++++++-- sydent/threepid/bind.py | 15 +++++++++++++- sydent/util/hash.py | 26 +++++++++++++++++++++++++ 7 files changed, 68 insertions(+), 14 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 2c9cb58f..782bb604 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -124,4 +124,3 @@ def _rehash_threepids(self, hashing_function, pepper, table): cur.execute(sql) self.sydent.db.commit() - diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index 8e57d97e..98a1f691 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -34,9 +34,9 @@ def addOrUpdateAssociation(self, assoc): # sqlite's support for upserts is atrocious cur.execute("insert or replace into local_threepid_associations " - "('medium', 'address', 'mxid', 'ts', 'notBefore', 'notAfter')" - " values (?, ?, ?, ?, ?, ?)", - (assoc.medium, assoc.address, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after)) + "('medium', 'address', 'hash', mxid', 'ts', 'notBefore', 'notAfter')" + " values (?, ?, ?, ?, ?, ?, ?)", + (assoc.medium, assoc.address, assoc.hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after)) self.sydent.db.commit() def getAssociationsAfterId(self, afterId, limit): @@ -194,9 +194,9 @@ def addAssociation(self, assoc, rawSgAssoc, originServer, originId, commit=True) """ cur = self.sydent.db.cursor() res = cur.execute("insert or ignore into global_threepid_associations " - "(medium, address, mxid, ts, notBefore, notAfter, originServer, originId, sgAssoc) values " - "(?, ?, ?, ?, ?, ?, ?, ?, ?)", - (assoc.medium, assoc.address, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after, + "(medium, address, hash, mxid, ts, notBefore, notAfter, originServer, originId, sgAssoc) values " + "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (assoc.medium, assoc.address, assoc.hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after, originServer, originId, rawSgAssoc)) if commit: self.sydent.db.commit() diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index b8cb3148..f994a354 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -22,6 +22,7 @@ from sydent.http.servlets import get_args, jsonwrap, send_cors from sydent.db.threepid_associations import GlobalAssociationStore +from sydent.util.hash import parse_space_separated_str logger = logging.getLogger(__name__) @@ -88,10 +89,7 @@ def render_POST(self, request): medium_address_tuples = [] for medium_and_address in addresses: # Parse medium, address components - # Being careful to account for 3PIDs one day having spaces in the address - split_input = medium_and_address.split() - (address, medium) = (' '.join(split_input[:-1]), split_input[-1]) - + medium, address = parse_space_separated_str(medium_and_address) medium_address_tuples.append((medium, address)) # Lookup the mxids diff --git a/sydent/replication/peer.py b/sydent/replication/peer.py index 1371f9de..46cf6fc4 100644 --- a/sydent/replication/peer.py +++ b/sydent/replication/peer.py @@ -19,6 +19,7 @@ from sydent.db.threepid_associations import GlobalAssociationStore from sydent.threepid import threePidAssocFromDict from sydent.config import ConfigError +from sydent.util.hash import sha256_and_url_safe_base64 from unpaddedbase64 import decode_base64 import signedjson.sign @@ -68,7 +69,14 @@ def pushUpdates(self, sgAssocs): for localId in sgAssocs: if localId > self.lastId: assocObj = threePidAssocFromDict(sgAssocs[localId]) + if assocObj.mxid is not None: + # Assign a hash to this association for the purposes of lookup + hash_str = ' '.join( + [address, medium, self.sydent.cfg.get("hashing", "lookup_pepper")], + ) + assocObj.hash = sha256_and_url_safe_base64(hash_str) + # We can probably skip verification for the local peer (although it could be good as a sanity check) globalAssocStore.addAssociation(assocObj, json.dumps(sgAssocs[localId]), self.sydent.server_name, localId) diff --git a/sydent/threepid/__init__.py b/sydent/threepid/__init__.py index 7d225445..1f1d0ec1 100644 --- a/sydent/threepid/__init__.py +++ b/sydent/threepid/__init__.py @@ -15,14 +15,23 @@ # limitations under the License. def threePidAssocFromDict(d): - assoc = ThreepidAssociation(d['medium'], d['address'], d['mxid'], d['ts'], d['not_before'], d['not_after']) + assoc = ThreepidAssociation( + d['medium'], + d['address'], + None, # empty hash digest by default + d['mxid'], + d['ts'], + d['not_before'], + d['not_after'], + ) return assoc class ThreepidAssociation: - def __init__(self, medium, address, mxid, ts, not_before, not_after): + def __init__(self, medium, address, hash_digest, mxid, ts, not_before, not_after): """ :param medium: The medium of the 3pid (eg. email) :param address: The identifier (eg. email address) + :param hash_digest: A hash digest of the 3pid :param mxid: The matrix ID the 3pid is associated with :param ts: The creation timestamp of this association, ms :param not_before: The timestamp, in ms, at which this association becomes valid @@ -30,6 +39,7 @@ def __init__(self, medium, address, mxid, ts, not_before, not_after): """ self.medium = medium self.address = address + self.hash = hash_digest self.mxid = mxid self.ts = ts self.not_before = not_before diff --git a/sydent/threepid/bind.py b/sydent/threepid/bind.py index 455fee7c..35adb666 100644 --- a/sydent/threepid/bind.py +++ b/sydent/threepid/bind.py @@ -25,6 +25,8 @@ from sydent.db.threepid_associations import LocalAssociationStore from sydent.util import time_msec +from sydent.util.hash import sha256_and_url_safe_base64 +from sydent.db.hashing_metadata import HashingMetadataStore from sydent.threepid.signer import Signer from sydent.http.httpclient import FederationHttpClient @@ -62,9 +64,20 @@ def addBinding(self, medium, address, mxid): """ localAssocStore = LocalAssociationStore(self.sydent) + # Fill out the association details createdAt = time_msec() expires = createdAt + ThreepidBinder.THREEPID_ASSOCIATION_LIFETIME_MS - assoc = ThreepidAssociation(medium, address, mxid, createdAt, createdAt, expires) + + # Hash the medium + address and store that hash for the purposes of + # later lookups + str_to_hash = ' '.join( + [address, medium, self.sydent.cfg.get("hashing", "lookup_pepper")], + ) + hash_digest = sha256_and_url_safe_base64(str_to_hash) + + assoc = ThreepidAssociation( + medium, address, hash_digest, mxid, createdAt, createdAt, expires, + ) localAssocStore.addOrUpdateAssociation(assoc) diff --git a/sydent/util/hash.py b/sydent/util/hash.py index edf9b211..26c4ff60 100644 --- a/sydent/util/hash.py +++ b/sydent/util/hash.py @@ -22,3 +22,29 @@ def sha256_and_url_safe_base64(input_text): """SHA256 hash an input string, encode it as url-safe base64, and return it""" digest = hashlib.sha256(input_text.encode()).digest() return unpaddedbase64.encode_base64(digest, urlsafe=True) + +def parse_space_separated_str(self, input_str): + """Parses a string containing values seperated by a space. Joins the leading chunks if there are more than two. + + Used for parsing medium, address values. + + e.g. If given input_str="someaddress somemedium", + this function will return ("someaddress", "somemedium"). + + If given input_str="some address somemedium", + this function will return ("some address", "somemedium"). + + This is due to the future possibility of address values containing + spaces. + + :param input_str: The space-separated str to split + :type input_str: str + + :returns a list with 2 strings in it + :rtype [str, str] + """ + # Split the string by spaces + split_input = input_str.split() + + # Return the last item separated from the rest + return (' '.join(split_input[:-1]), split_input[-1]) From c122d9bfe703cc2978302abf98c423a35edc0bda Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 5 Aug 2019 17:41:24 +0100 Subject: [PATCH 08/25] hash->lookup_hash in prep for hashing 3pids at rest --- sydent/db/hashing_metadata.py | 6 +++--- sydent/db/sqlitedb.py | 10 +++++----- sydent/db/threepid_associations.py | 19 ++++++++++--------- sydent/db/threepid_associations.sql | 14 +++++++------- sydent/http/servlets/lookupv2servlet.py | 2 +- sydent/replication/peer.py | 6 +++--- sydent/threepid/__init__.py | 8 ++++---- sydent/threepid/bind.py | 4 ++-- sydent/util/hash.py | 2 +- 9 files changed, 36 insertions(+), 35 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 782bb604..88a6881b 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -107,8 +107,8 @@ def _rehash_threepids(self, hashing_function, pepper, table): count = 0 while count < len(rows): for medium, address in rows[count:count+batch_size]: - # Combine the medium, address and pepper together in the following form: - # "address medium pepper" + # Combine the medium, address and pepper together in the + # following form: "address medium pepper" # According to MSC2134: https://github.com/matrix-org/matrix-doc/blob/hs/hash-identity/proposals/2134-identity-hash-lookup.md combo = "%s %s %s" % (address, medium, pepper) @@ -117,7 +117,7 @@ def _rehash_threepids(self, hashing_function, pepper, table): # Save the result to the DB sql = ( - "UPDATE %s SET hash = '%s' " + "UPDATE %s SET lookup_hash = '%s' " "WHERE medium = %s AND address = %s" % (table, result, medium, address) ) diff --git a/sydent/db/sqlitedb.py b/sydent/db/sqlitedb.py index 50819f52..a4404e73 100644 --- a/sydent/db/sqlitedb.py +++ b/sydent/db/sqlitedb.py @@ -142,14 +142,14 @@ def _upgradeSchema(self): if curVer < 3: cur = self.db.cursor() - cur.execute("ALTER local_threepid_associations ADD COLUMN hash VARCHAR(256) NOT NULL") - cur.execute("CREATE INDEX IF NOT EXISTS hash_medium on local_threepid_associations (hash, medium))") + cur.execute("ALTER local_threepid_associations ADD COLUMN lookup_hash VARCHAR(256) NOT NULL") + cur.execute("CREATE INDEX IF NOT EXISTS lookup_hash_medium on local_threepid_associations (lookup_hash, medium))") - cur.execute("ALTER global_threepid_associations ADD COLUMN hash VARCHAR(256) NOT NULL") - cur.execute("CREATE INDEX IF NOT EXISTS hash_medium on global_threepid_associations (hash, medium)") + cur.execute("ALTER global_threepid_associations ADD COLUMN lookup_hash VARCHAR(256) NOT NULL") + cur.execute("CREATE INDEX IF NOT EXISTS lookup_hash_medium on global_threepid_associations (lookup_hash, medium)") cur.execute( - "CREATE TABLE IF NOT EXISTS hashing_metadata " + "CREATE TABLE IF NOT EXISTS lookup_hashing_metadata " "(lookup_pepper varchar(256) not null)" ) self.db.commit() diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index 98a1f691..678bde2d 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -34,9 +34,9 @@ def addOrUpdateAssociation(self, assoc): # sqlite's support for upserts is atrocious cur.execute("insert or replace into local_threepid_associations " - "('medium', 'address', 'hash', mxid', 'ts', 'notBefore', 'notAfter')" + "('medium', 'address', 'lookup_hash', mxid', 'ts', 'notBefore', 'notAfter')" " values (?, ?, ?, ?, ?, ?, ?)", - (assoc.medium, assoc.address, assoc.hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after)) + (assoc.medium, assoc.address, assoc.lookup_hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after)) self.sydent.db.commit() def getAssociationsAfterId(self, afterId, limit): @@ -194,9 +194,9 @@ def addAssociation(self, assoc, rawSgAssoc, originServer, originId, commit=True) """ cur = self.sydent.db.cursor() res = cur.execute("insert or ignore into global_threepid_associations " - "(medium, address, hash, mxid, ts, notBefore, notAfter, originServer, originId, sgAssoc) values " + "(medium, address, lookup_hash, mxid, ts, notBefore, notAfter, originServer, originId, sgAssoc) values " "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", - (assoc.medium, assoc.address, assoc.hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after, + (assoc.medium, assoc.address, assoc.lookup_hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after, originServer, originId, rawSgAssoc)) if commit: self.sydent.db.commit() @@ -225,18 +225,19 @@ def removeAssociation(self, medium, address): ) self.sydent.db.commit() - def retrieveMxidFromHash(self, input_hash): - """Returns an mxid from a given hash value + def retrieveMxidFromHash(self, lookup_hash): + """Returns an mxid from a given lookup_hash value - :param input_hash: The hash string to lookup in the database + :param input_hash: The lookup_hash value to lookup in the database :type input_hash: str - :returns the MXID relating to the hash if one is found, otherwise None + :returns the MXID relating to the lookup_hash value if found, + otherwise None :rtype: str|None """ cur = self.sydent.db.cursor() - res = cur.execute("SELECT mxid WHERE hash = ?", (input_hash,)) + res = cur.execute("SELECT mxid WHERE lookup_hash = ?", (lookup_hash,)) row = res.fetchone() if not row: return None diff --git a/sydent/db/threepid_associations.sql b/sydent/db/threepid_associations.sql index 68cd5835..d4c6855f 100644 --- a/sydent/db/threepid_associations.sql +++ b/sydent/db/threepid_associations.sql @@ -18,20 +18,20 @@ CREATE TABLE IF NOT EXISTS local_threepid_associations ( id integer primary key, medium varchar(16) not null, address varchar(256) not null, - hash varchar(256) not null, + lookup_hash varchar(256) not null, mxid varchar(256) not null, ts integer not null, notBefore bigint not null, notAfter bigint not null ); -CREATE INDEX IF NOT EXISTS hash_medium on local_threepid_associations (hash, medium)); +CREATE INDEX IF NOT EXISTS lookup_hash_medium on local_threepid_associations (lookup_hash, medium)); CREATE UNIQUE INDEX IF NOT EXISTS medium_address on local_threepid_associations(medium, address); CREATE TABLE IF NOT EXISTS global_threepid_associations ( id integer primary key, medium varchar(16) not null, address varchar(256) not null, - hash varchar(256) not null, + lookup_hash varchar(256) not null, mxid varchar(256) not null, ts integer not null, notBefore bigint not null, @@ -40,15 +40,15 @@ CREATE TABLE IF NOT EXISTS global_threepid_associations ( originId integer not null, sgAssoc text not null ); -CREATE INDEX IF NOT EXISTS hash_medium on global_threepid_associations (hash, medium); +CREATE INDEX IF NOT EXISTS lookup_hash_medium on global_threepid_associations (lookup_hash, medium); CREATE INDEX IF NOT EXISTS medium_address on global_threepid_associations (medium, address); CREATE INDEX IF NOT EXISTS medium_lower_address on global_threepid_associations (medium, lower(address)); CREATE UNIQUE INDEX IF NOT EXISTS originServer_originId on global_threepid_associations (originServer, originId); /* - * hashing_metadata contains information needed for the identity server to carry - * out tasks related to hashing. Salts and peppers etc. should go here. + * lookup_hashing_metadata contains information needed for the identity server to carry + * out tasks related to lookup_hashing. Salts and peppers etc. should go here. */ -CREATE TABLE IF NOT EXISTS hashing_metadata ( +CREATE TABLE IF NOT EXISTS lookup_hashing_metadata ( lookup_pepper varchar(256) not null, ); diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index f994a354..3acdbddd 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -38,7 +38,7 @@ def render_POST(self, request): """ Perform lookups with potentially hashed 3PID details. - Depending on our response to /hash_details, the client will chosoe a + Depending on our response to /hash_details, the client will choose a hash algorithm and pepper, hash the 3PIDs it wants to lookup, and send them to us, along with the algorithm and pepper it used. diff --git a/sydent/replication/peer.py b/sydent/replication/peer.py index 46cf6fc4..dcadf909 100644 --- a/sydent/replication/peer.py +++ b/sydent/replication/peer.py @@ -71,11 +71,11 @@ def pushUpdates(self, sgAssocs): assocObj = threePidAssocFromDict(sgAssocs[localId]) if assocObj.mxid is not None: - # Assign a hash to this association for the purposes of lookup - hash_str = ' '.join( + # Assign a lookup_hash to this association for the purposes of lookup + str_to_hash = ' '.join( [address, medium, self.sydent.cfg.get("hashing", "lookup_pepper")], ) - assocObj.hash = sha256_and_url_safe_base64(hash_str) + assocObj.lookup_hash = sha256_and_url_safe_base64(hash_str) # We can probably skip verification for the local peer (although it could be good as a sanity check) globalAssocStore.addAssociation(assocObj, json.dumps(sgAssocs[localId]), diff --git a/sydent/threepid/__init__.py b/sydent/threepid/__init__.py index 1f1d0ec1..2d6d82ee 100644 --- a/sydent/threepid/__init__.py +++ b/sydent/threepid/__init__.py @@ -18,7 +18,7 @@ def threePidAssocFromDict(d): assoc = ThreepidAssociation( d['medium'], d['address'], - None, # empty hash digest by default + None, # empty lookup_hash digest by default d['mxid'], d['ts'], d['not_before'], @@ -27,11 +27,11 @@ def threePidAssocFromDict(d): return assoc class ThreepidAssociation: - def __init__(self, medium, address, hash_digest, mxid, ts, not_before, not_after): + def __init__(self, medium, address, lookup_hash, mxid, ts, not_before, not_after): """ :param medium: The medium of the 3pid (eg. email) :param address: The identifier (eg. email address) - :param hash_digest: A hash digest of the 3pid + :param lookup_hash: A hash digest of the 3pid. Can be a str or None :param mxid: The matrix ID the 3pid is associated with :param ts: The creation timestamp of this association, ms :param not_before: The timestamp, in ms, at which this association becomes valid @@ -39,7 +39,7 @@ def __init__(self, medium, address, hash_digest, mxid, ts, not_before, not_after """ self.medium = medium self.address = address - self.hash = hash_digest + self.lookup_hash = lookup_hash self.mxid = mxid self.ts = ts self.not_before = not_before diff --git a/sydent/threepid/bind.py b/sydent/threepid/bind.py index 35adb666..8ad5ad2c 100644 --- a/sydent/threepid/bind.py +++ b/sydent/threepid/bind.py @@ -73,10 +73,10 @@ def addBinding(self, medium, address, mxid): str_to_hash = ' '.join( [address, medium, self.sydent.cfg.get("hashing", "lookup_pepper")], ) - hash_digest = sha256_and_url_safe_base64(str_to_hash) + lookup_hash = sha256_and_url_safe_base64(str_to_hash) assoc = ThreepidAssociation( - medium, address, hash_digest, mxid, createdAt, createdAt, expires, + medium, address, lookup_hash, mxid, createdAt, createdAt, expires, ) localAssocStore.addOrUpdateAssociation(assoc) diff --git a/sydent/util/hash.py b/sydent/util/hash.py index 26c4ff60..3f266468 100644 --- a/sydent/util/hash.py +++ b/sydent/util/hash.py @@ -19,7 +19,7 @@ def sha256_and_url_safe_base64(input_text): - """SHA256 hash an input string, encode it as url-safe base64, and return it""" + """SHA256 hash an input string, encode the digest as url-safe base64, and return""" digest = hashlib.sha256(input_text.encode()).digest() return unpaddedbase64.encode_base64(digest, urlsafe=True) From 1e830f9a1ba3031a820ab66f94df816c7863c181 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 5 Aug 2019 18:17:43 +0100 Subject: [PATCH 09/25] Recompute lookup_hashes if a hashing algo is added/removed --- sydent/db/hashing_metadata.py | 7 ++- sydent/db/threepid_associations.sql | 7 +-- sydent/http/servlets/hashdetailsservlet.py | 4 +- sydent/sydent.py | 54 ++++++++++++++-------- sydent/util/hash.py | 16 +++++++ 5 files changed, 59 insertions(+), 29 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 88a6881b..3692b21e 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -33,7 +33,6 @@ def retrieve_value(self, name) if not row: return None - return row[0] def is_new(self, name, value): @@ -77,7 +76,7 @@ def rehash_threepids(self, hashing_function, pepper): :param hashing_function: A function with single input and output strings :type hashing_function func(str) -> str - :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing. + :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing :type pepper: str """ self._rehash_threepids(hashing_function, pepper, "local_threepid_associations") @@ -89,15 +88,15 @@ def _rehash_threepids(self, hashing_function, pepper, table): :param hashing_function: A function with single input and output strings :type hashing_function func(str) -> str - :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing. + :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing :type pepper: str :param table: The database table to perform the rehashing on :type table: str """ - # Pull items from the database cur = self.sydent.db.cursor() + # Pull items from the database # Medium/address combos are marked as UNIQUE in the database sql = "SELECT medium, address FROM %s" % table res = cur.execute(sql) diff --git a/sydent/db/threepid_associations.sql b/sydent/db/threepid_associations.sql index d4c6855f..5b460481 100644 --- a/sydent/db/threepid_associations.sql +++ b/sydent/db/threepid_associations.sql @@ -46,9 +46,10 @@ CREATE INDEX IF NOT EXISTS medium_lower_address on global_threepid_associations CREATE UNIQUE INDEX IF NOT EXISTS originServer_originId on global_threepid_associations (originServer, originId); /* - * lookup_hashing_metadata contains information needed for the identity server to carry - * out tasks related to lookup_hashing. Salts and peppers etc. should go here. + * hashing_metadata contains information needed for the identity server to carry + * out tasks related to hashing. Algorithms, salts and peppers etc. should go here. */ -CREATE TABLE IF NOT EXISTS lookup_hashing_metadata ( +CREATE TABLE IF NOT EXISTS hashing_metadata ( lookup_pepper varchar(256) not null, + lookup_hash varchar(256) not null, ); diff --git a/sydent/http/servlets/hashdetailsservlet.py b/sydent/http/servlets/hashdetailsservlet.py index d2ad5357..4f6440da 100644 --- a/sydent/http/servlets/hashdetailsservlet.py +++ b/sydent/http/servlets/hashdetailsservlet.py @@ -50,10 +50,8 @@ def render_GET(self, request): send_cors(request) # Determine what hashing algorithms have been enabled + # A default list value is defined in the config algorithms = self.sydent.config.get("hashing", "algorithms") - if not algorithms: - # Default response - algorithms = ["sha256"] # A lookup_pepper is defined in the config, otherwise it is generated lookup_pepper = self.sydent.config.get("hashing", "lookup_pepper") diff --git a/sydent/sydent.py b/sydent/sydent.py index 3ab89892..2e7b75f8 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -19,6 +19,7 @@ import logging import logging.handlers import os +import pickle import twisted.internet.reactor from twisted.internet import task @@ -38,7 +39,7 @@ from validators.msisdnvalidator import MsisdnValidator from hs_federation.verifier import Verifier -from util.hash import sha256_and_url_safe_base64 +from util.hash import sha256_and_url_safe_base64, diff_lists from sign.ed25519 import SydentEd25519 @@ -183,18 +184,8 @@ def sighup(signum, stack): addr=self.cfg.get("general", "prometheus_addr"), ) - if self.cfg.has_option("hashing", "algorithms"): - algorithms = self.cfg.get("hashing", "algorithms") - if not isinstance(algorithms, list): - logger.fatal("Config file option hashing.algorithms is not an array") - - # Ensure provided hash algorithms are known - for algorithm in algorithms: - if algorithm not in HashDetailsServlet.known_algorithms: - logger.fatal( - "Config file option hashing.algorithms contains unknown algorithm '%s'.", - algorithm, - ) + # Whether to compute a lookup_hash for each 3pid in the database + compute_lookup_hashes = False # Determine whether a lookup_pepper value has been defined lookup_pepper = self.cfg.get("hashing", "lookup_pepper") @@ -210,7 +201,7 @@ def sighup(signum, stack): # Generate one new_pepper = generateAlphanumericTokenOfLength(5) - # Cache it + # Save it for later use self.cfg.set( "hashing", "lookup_pepper", new_pepper, @@ -220,9 +211,7 @@ def sighup(signum, stack): HashingMetadataStore.store_values({"lookup_pepper": new_pepper}) # Re-hash all 3pids - HashingMetadataStore.rehash_threepids( - sha256_and_url_safe_base64, new_pepper, - ) + compute_lookup_hashes = True else: # If it has been defined, check if it's different from what we have # in the database @@ -231,10 +220,37 @@ def sighup(signum, stack): HashingMetadataStore.store_values({"lookup_pepper": lookup_pepper}) # Re-hash all 3pids - HashingMetadataStore.rehash_threepids( - sha256_and_url_safe_base64, lookup_pepper, + compute_lookup_hashes = True + + algorithms = self.cfg.get("hashing", "algorithms") + if not isinstance(algorithms, list): + logger.fatal("Config file option hashing.algorithms is not a list") + + # Ensure provided hash algorithms are known + for algorithm in algorithms: + if algorithm not in HashDetailsServlet.known_algorithms: + logger.fatal( + "Config file option hashing.algorithms contains unknown algorithm '%s'.", + algorithm, ) + # Check if list of algorithms have changed since the last run + db_algorithms = HashingMetadataStore.retrieve_value("algorithms") + if db_algorithms: + db_algorithms = pickle.loads(db_algorithms) + + # If the items differ by just a "none" value (or not at all), then + # there's no need to rehash + diff = diff_lists(db_algorithms, algorithms) + if diff and diff != ["none"]: + # Lookup hashing algorithm changed. Re-hash all 3pids + compute_lookup_hashes = True + + if compute_lookup_hashes: + HashingMetadataStore.rehash_threepids( + sha256_and_url_safe_base64, self.cfg.get("hashing", "lookup_pepper"), + ) + self.validators = Validators() self.validators.email = EmailValidator(self) self.validators.msisdn = MsisdnValidator(self) diff --git a/sydent/util/hash.py b/sydent/util/hash.py index 3f266468..9ed43621 100644 --- a/sydent/util/hash.py +++ b/sydent/util/hash.py @@ -48,3 +48,19 @@ def parse_space_separated_str(self, input_str): # Return the last item separated from the rest return (' '.join(split_input[:-1]), split_input[-1]) + +def diff_lists(first, second): + """Returns any differences between two lists + + :param first: A list of items + :type first: List + + :param second: Another list of items + :type second: List + + :returns a list containing items not found in both lists + :rtype: List + """ + a_minus_b = [x for x in first if x not in second] + b_minus_a = [x for x in second if x not in first] + return a_minus_b + b_minus_a From d34f0cb45b703ef7f18ddc2421ebd2697e03bc02 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 6 Aug 2019 11:26:00 +0100 Subject: [PATCH 10/25] Add info about config option, disable lookup if algs list is empty --- sydent/http/servlets/lookupv2servlet.py | 6 ++++++ sydent/sydent.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 3acdbddd..73193342 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -58,6 +58,12 @@ def render_POST(self, request): User IDs for which no mapping is found are omitted. """ send_cors(request) + + supported_algorithms = self.sydent.config.get("hashing", "algorithms") + if len(supported_algorithms) == 0: + request.setResponseCode(400) + return {'errcode': 'M_UNKNOWN', 'error': 'v2 lookup is disabled on this server'}, None + err, args = get_args(request, ('addresses', 'algorithm', 'pepper')) if err: return json.dumps(err) diff --git a/sydent/sydent.py b/sydent/sydent.py index 2e7b75f8..5d0d8551 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -116,6 +116,10 @@ 'ed25519.signingkey': '', }, 'hashing': { + # algorithms is a list with possible items "sha256" and "none" + # "sha256" - support lookup with sha256-hashed contact details + # "none" - support lookup with plaintext contact details + # Supplying an empty list will disable lookup 'algorithms': ['sha256'], } } From 50722ad1b8699ea81c81d5afcc1c144eeaaebf80 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 6 Aug 2019 11:54:38 +0100 Subject: [PATCH 11/25] Save hash algorithms to db. fixes --- sydent/db/hashing_metadata.py | 7 +++---- sydent/db/hashing_metadata.sql | 24 ++++++++++++++++++++++++ sydent/db/threepid_associations.py | 8 ++++---- sydent/db/threepid_associations.sql | 9 --------- sydent/sydent.py | 11 ++++++++++- 5 files changed, 41 insertions(+), 18 deletions(-) create mode 100644 sydent/db/hashing_metadata.sql diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 3692b21e..8d0aef62 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -38,7 +38,8 @@ def retrieve_value(self, name) def is_new(self, name, value): """ Returns whether a provided value does NOT match a value stored in the - database under the specified db column name + database under the specified db column name. The provided value will + be compared against None if it is not in the database. :param name: The name of the db column to check :type name: str @@ -46,12 +47,10 @@ def is_new(self, name, value): :param value: The value to check against :returns a boolean that is true if the the provided value and the - value of the item under the named db column is different + value of the item under the named db column is different. :rtype: bool """ db_value = self.retrieve_value(name) - if not value: - return False return value != db_value def store_values(self, names_and_values): diff --git a/sydent/db/hashing_metadata.sql b/sydent/db/hashing_metadata.sql new file mode 100644 index 00000000..a97505f6 --- /dev/null +++ b/sydent/db/hashing_metadata.sql @@ -0,0 +1,24 @@ +/* +Copyright 2019 The Matrix.org Foundation C.I.C. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* + * hashing_metadata contains information needed for the identity server to carry + * out tasks related to hashing. Algorithms, salts and peppers etc. should go here. + */ +CREATE TABLE IF NOT EXISTS hashing_metadata ( + lookup_pepper varchar(256), + lookup_algorithms varchar(256), +); diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index 678bde2d..87c3f8ec 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -138,9 +138,9 @@ def getMxid(self, medium, address): return row[0] def getMxids(self, threepid_tuples): - """Given a list of threepid_tuples, the same list but with MXIDs - appended to each tuple for which a match was found in the database - for. Output is ordered by medium, address, timestamp DESC + """Given a list of threepid_tuples, return the same list but with + MXIDs appended to each tuple for which a match was found in the + database for. Output is ordered by medium, address, timestamp DESC :param threepid_tuples: List containing (medium, address) tuples :type threepid_tuples: [(str, str)] @@ -231,7 +231,7 @@ def retrieveMxidFromHash(self, lookup_hash): :param input_hash: The lookup_hash value to lookup in the database :type input_hash: str - :returns the MXID relating to the lookup_hash value if found, + :returns the mxid relating to the lookup_hash value if found, otherwise None :rtype: str|None """ diff --git a/sydent/db/threepid_associations.sql b/sydent/db/threepid_associations.sql index 5b460481..075340d6 100644 --- a/sydent/db/threepid_associations.sql +++ b/sydent/db/threepid_associations.sql @@ -44,12 +44,3 @@ CREATE INDEX IF NOT EXISTS lookup_hash_medium on global_threepid_associations (l CREATE INDEX IF NOT EXISTS medium_address on global_threepid_associations (medium, address); CREATE INDEX IF NOT EXISTS medium_lower_address on global_threepid_associations (medium, lower(address)); CREATE UNIQUE INDEX IF NOT EXISTS originServer_originId on global_threepid_associations (originServer, originId); - -/* - * hashing_metadata contains information needed for the identity server to carry - * out tasks related to hashing. Algorithms, salts and peppers etc. should go here. - */ -CREATE TABLE IF NOT EXISTS hashing_metadata ( - lookup_pepper varchar(256) not null, - lookup_hash varchar(256) not null, -); diff --git a/sydent/sydent.py b/sydent/sydent.py index 5d0d8551..78be0e83 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -239,7 +239,7 @@ def sighup(signum, stack): ) # Check if list of algorithms have changed since the last run - db_algorithms = HashingMetadataStore.retrieve_value("algorithms") + db_algorithms = HashingMetadataStore.retrieve_value("lookup_algorithms") if db_algorithms: db_algorithms = pickle.loads(db_algorithms) @@ -249,6 +249,15 @@ def sighup(signum, stack): if diff and diff != ["none"]: # Lookup hashing algorithm changed. Re-hash all 3pids compute_lookup_hashes = True + else: + # The db didn't contain any info on hashing algorithms. + if "sha256" in algorithms: + # Rehash if "sha256" is specified in the config + compute_lookup_hashes = True + + # Save algorithm data to db + pickled_algorithms = pickle.dumps(algorithms) + HashingMetadataStore.store_values({"lookup_algorithms": pickled_algorithms}) if compute_lookup_hashes: HashingMetadataStore.rehash_threepids( From 03eca16f9c9f7807d577c2b69253abf08d531ba2 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 6 Aug 2019 13:26:14 +0100 Subject: [PATCH 12/25] Remove algorithm configuration --- sydent/db/hashing_metadata.py | 6 ++-- sydent/db/hashing_metadata.sql | 1 - sydent/db/threepid_associations.py | 2 +- sydent/http/servlets/hashdetailsservlet.py | 16 ++++----- sydent/http/servlets/lookupv2servlet.py | 8 ++--- sydent/replication/peer.py | 2 +- sydent/sydent.py | 40 ---------------------- sydent/util/hash.py | 31 +++++++---------- 8 files changed, 26 insertions(+), 80 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 8d0aef62..5a43c2bd 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -18,7 +18,7 @@ class HashingMetadataStore: def __init__(self, sydent): self.sydent = sydent - def retrieve_value(self, name) + def retrieve_value(self, name): """Return a value from the hashing_metadata table :param name: The name of the db column to return the value for @@ -101,13 +101,15 @@ def _rehash_threepids(self, hashing_function, pepper, table): res = cur.execute(sql) rows = res.fetchall() + # Iterate through each medium, address combo, hash it, + # and store in the db batch_size = 500 count = 0 while count < len(rows): for medium, address in rows[count:count+batch_size]: # Combine the medium, address and pepper together in the # following form: "address medium pepper" - # According to MSC2134: https://github.com/matrix-org/matrix-doc/blob/hs/hash-identity/proposals/2134-identity-hash-lookup.md + # According to MSC2134: https://github.com/matrix-org/matrix-doc/pull/2134 combo = "%s %s %s" % (address, medium, pepper) # Hash the resulting string diff --git a/sydent/db/hashing_metadata.sql b/sydent/db/hashing_metadata.sql index a97505f6..b7475dad 100644 --- a/sydent/db/hashing_metadata.sql +++ b/sydent/db/hashing_metadata.sql @@ -20,5 +20,4 @@ limitations under the License. */ CREATE TABLE IF NOT EXISTS hashing_metadata ( lookup_pepper varchar(256), - lookup_algorithms varchar(256), ); diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index 87c3f8ec..a9cdacd8 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -139,7 +139,7 @@ def getMxid(self, medium, address): def getMxids(self, threepid_tuples): """Given a list of threepid_tuples, return the same list but with - MXIDs appended to each tuple for which a match was found in the + mxids appended to each tuple for which a match was found in the database for. Output is ordered by medium, address, timestamp DESC :param threepid_tuples: List containing (medium, address) tuples diff --git a/sydent/http/servlets/hashdetailsservlet.py b/sydent/http/servlets/hashdetailsservlet.py index 4f6440da..805c51dd 100644 --- a/sydent/http/servlets/hashdetailsservlet.py +++ b/sydent/http/servlets/hashdetailsservlet.py @@ -30,17 +30,16 @@ class HashDetailsServlet(Resource): isLeaf = True + known_algorithms = ["sha256", "none"] def __init__(self, syd): self.sydent = syd - self.known_algorithms = ["sha256", "none"] def render_GET(self, request): """ - Return the hashing algorithms and pepper that this IS supports. - Whether the response includes the "none" algorithm is determined by a - config option. The pepper included in the response is also set by the - config, and generated if one is not set. + Return the hashing algorithms and pepper that this IS supports. The + pepper included in the response is set by the config, and generated + if one is not set. Returns: An object containing an array of hashing algorithms the server supports, and a `lookup_pepper` field, which is a @@ -49,15 +48,12 @@ def render_GET(self, request): """ send_cors(request) - # Determine what hashing algorithms have been enabled - # A default list value is defined in the config - algorithms = self.sydent.config.get("hashing", "algorithms") - # A lookup_pepper is defined in the config, otherwise it is generated lookup_pepper = self.sydent.config.get("hashing", "lookup_pepper") + request.setResponseCode(200) return { - "algorithms": algorithms, + "algorithms": known_algorithms, "lookup_pepper": lookup_pepper, } diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 73193342..3a4f1b82 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -22,6 +22,7 @@ from sydent.http.servlets import get_args, jsonwrap, send_cors from sydent.db.threepid_associations import GlobalAssociationStore +from sydent.http.servlets.hashdetailsservlet import HashDetailsServlet from sydent.util.hash import parse_space_separated_str logger = logging.getLogger(__name__) @@ -59,11 +60,6 @@ def render_POST(self, request): """ send_cors(request) - supported_algorithms = self.sydent.config.get("hashing", "algorithms") - if len(supported_algorithms) == 0: - request.setResponseCode(400) - return {'errcode': 'M_UNKNOWN', 'error': 'v2 lookup is disabled on this server'}, None - err, args = get_args(request, ('addresses', 'algorithm', 'pepper')) if err: return json.dumps(err) @@ -77,7 +73,7 @@ def render_POST(self, request): if not isinstance(algorithm, str): request.setResponseCode(400) return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm must be a string'}, None - if algorithm not in self.sydent.config.get("hashing", "algorithms"): + if algorithm not in HashDetailsServlet.known_algorithms: request.setResponseCode(400) return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}, None diff --git a/sydent/replication/peer.py b/sydent/replication/peer.py index dcadf909..068ec17f 100644 --- a/sydent/replication/peer.py +++ b/sydent/replication/peer.py @@ -71,7 +71,7 @@ def pushUpdates(self, sgAssocs): assocObj = threePidAssocFromDict(sgAssocs[localId]) if assocObj.mxid is not None: - # Assign a lookup_hash to this association for the purposes of lookup + # Assign a lookup_hash to this association str_to_hash = ' '.join( [address, medium, self.sydent.cfg.get("hashing", "lookup_pepper")], ) diff --git a/sydent/sydent.py b/sydent/sydent.py index 78be0e83..de44da4c 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -115,13 +115,6 @@ 'crypto': { 'ed25519.signingkey': '', }, - 'hashing': { - # algorithms is a list with possible items "sha256" and "none" - # "sha256" - support lookup with sha256-hashed contact details - # "none" - support lookup with plaintext contact details - # Supplying an empty list will disable lookup - 'algorithms': ['sha256'], - } } @@ -226,39 +219,6 @@ def sighup(signum, stack): # Re-hash all 3pids compute_lookup_hashes = True - algorithms = self.cfg.get("hashing", "algorithms") - if not isinstance(algorithms, list): - logger.fatal("Config file option hashing.algorithms is not a list") - - # Ensure provided hash algorithms are known - for algorithm in algorithms: - if algorithm not in HashDetailsServlet.known_algorithms: - logger.fatal( - "Config file option hashing.algorithms contains unknown algorithm '%s'.", - algorithm, - ) - - # Check if list of algorithms have changed since the last run - db_algorithms = HashingMetadataStore.retrieve_value("lookup_algorithms") - if db_algorithms: - db_algorithms = pickle.loads(db_algorithms) - - # If the items differ by just a "none" value (or not at all), then - # there's no need to rehash - diff = diff_lists(db_algorithms, algorithms) - if diff and diff != ["none"]: - # Lookup hashing algorithm changed. Re-hash all 3pids - compute_lookup_hashes = True - else: - # The db didn't contain any info on hashing algorithms. - if "sha256" in algorithms: - # Rehash if "sha256" is specified in the config - compute_lookup_hashes = True - - # Save algorithm data to db - pickled_algorithms = pickle.dumps(algorithms) - HashingMetadataStore.store_values({"lookup_algorithms": pickled_algorithms}) - if compute_lookup_hashes: HashingMetadataStore.rehash_threepids( sha256_and_url_safe_base64, self.cfg.get("hashing", "lookup_pepper"), diff --git a/sydent/util/hash.py b/sydent/util/hash.py index 9ed43621..b82794a0 100644 --- a/sydent/util/hash.py +++ b/sydent/util/hash.py @@ -19,12 +19,21 @@ def sha256_and_url_safe_base64(input_text): - """SHA256 hash an input string, encode the digest as url-safe base64, and return""" + """SHA256 hash an input string, encode the digest as url-safe base64, and + return + + :param input_text: string to hash + :type input_text: str + + :returns a sha256 hashed and url-safe base64 encoded digest + :rtype: str + """ digest = hashlib.sha256(input_text.encode()).digest() return unpaddedbase64.encode_base64(digest, urlsafe=True) -def parse_space_separated_str(self, input_str): - """Parses a string containing values seperated by a space. Joins the leading chunks if there are more than two. +def parse_space_separated_str(input_str): + """Parses a string containing values seperated by a space. Joins the + leading chunks if there are more than two. Used for parsing medium, address values. @@ -48,19 +57,3 @@ def parse_space_separated_str(self, input_str): # Return the last item separated from the rest return (' '.join(split_input[:-1]), split_input[-1]) - -def diff_lists(first, second): - """Returns any differences between two lists - - :param first: A list of items - :type first: List - - :param second: Another list of items - :type second: List - - :returns a list containing items not found in both lists - :rtype: List - """ - a_minus_b = [x for x in first if x not in second] - b_minus_a = [x for x in second if x not in first] - return a_minus_b + b_minus_a From 761b85273d5a3d8112fc0fee89bb55cf92e23b71 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 6 Aug 2019 14:44:32 +0100 Subject: [PATCH 13/25] fix up and successful tests --- sydent/db/hashing_metadata.py | 38 ++++---------- sydent/db/hashing_metadata.sql | 4 +- sydent/db/sqlitedb.py | 10 ++-- sydent/db/threepid_associations.py | 8 +-- sydent/db/threepid_associations.sql | 6 +-- sydent/http/httpserver.py | 2 + sydent/http/servlets/__init__.py | 2 +- sydent/http/servlets/hashdetailsservlet.py | 20 ++++---- sydent/http/servlets/lookupv2servlet.py | 38 +++++++------- sydent/sydent.py | 58 ++++++---------------- sydent/util/hash.py | 2 +- 11 files changed, 74 insertions(+), 114 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 5a43c2bd..28f18cba 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -35,24 +35,6 @@ def retrieve_value(self, name): return None return row[0] - def is_new(self, name, value): - """ - Returns whether a provided value does NOT match a value stored in the - database under the specified db column name. The provided value will - be compared against None if it is not in the database. - - :param name: The name of the db column to check - :type name: str - - :param value: The value to check against - - :returns a boolean that is true if the the provided value and the - value of the item under the named db column is different. - :rtype: bool - """ - db_value = self.retrieve_value(name) - return value != db_value - def store_values(self, names_and_values): """Stores values in the hashing_metadata table under the named columns @@ -65,8 +47,8 @@ def store_values(self, names_and_values): columns = ', '.join(names_and_values.keys()) values = ', '.join('?' * len(names_and_values)) sql = 'INSERT INTO hashing_metadata ({}) VALUES ({})'.format(columns, values) - - cur.execute(sql) + values = names_and_values.values() + cur.execute(sql, values) self.sydent.db.commit() def rehash_threepids(self, hashing_function, pepper): @@ -104,9 +86,8 @@ def _rehash_threepids(self, hashing_function, pepper, table): # Iterate through each medium, address combo, hash it, # and store in the db batch_size = 500 - count = 0 - while count < len(rows): - for medium, address in rows[count:count+batch_size]: + while rows: + for medium, address in rows[:batch_size]: # Combine the medium, address and pepper together in the # following form: "address medium pepper" # According to MSC2134: https://github.com/matrix-org/matrix-doc/pull/2134 @@ -117,10 +98,13 @@ def _rehash_threepids(self, hashing_function, pepper, table): # Save the result to the DB sql = ( - "UPDATE %s SET lookup_hash = '%s' " - "WHERE medium = %s AND address = %s" - % (table, result, medium, address) + "UPDATE %s SET lookup_hash = ? " + "WHERE medium = ? AND address = ?" + % (table) ) - cur.execute(sql) + cur.execute(sql, (result, medium, address)) + + # Remove processed items from the list + rows = rows[batch_size:] self.sydent.db.commit() diff --git a/sydent/db/hashing_metadata.sql b/sydent/db/hashing_metadata.sql index b7475dad..c93136cb 100644 --- a/sydent/db/hashing_metadata.sql +++ b/sydent/db/hashing_metadata.sql @@ -16,8 +16,8 @@ limitations under the License. /* * hashing_metadata contains information needed for the identity server to carry - * out tasks related to hashing. Algorithms, salts and peppers etc. should go here. + * out tasks related to hashing. Salts and peppers etc. should go here. */ CREATE TABLE IF NOT EXISTS hashing_metadata ( - lookup_pepper varchar(256), + lookup_pepper varchar(256) ); diff --git a/sydent/db/sqlitedb.py b/sydent/db/sqlitedb.py index a4404e73..01e41b5e 100644 --- a/sydent/db/sqlitedb.py +++ b/sydent/db/sqlitedb.py @@ -142,15 +142,15 @@ def _upgradeSchema(self): if curVer < 3: cur = self.db.cursor() - cur.execute("ALTER local_threepid_associations ADD COLUMN lookup_hash VARCHAR(256) NOT NULL") - cur.execute("CREATE INDEX IF NOT EXISTS lookup_hash_medium on local_threepid_associations (lookup_hash, medium))") + cur.execute("ALTER TABLE local_threepid_associations ADD COLUMN lookup_hash VARCHAR(256)") + cur.execute("CREATE INDEX IF NOT EXISTS lookup_hash_medium on local_threepid_associations (lookup_hash, medium)") - cur.execute("ALTER global_threepid_associations ADD COLUMN lookup_hash VARCHAR(256) NOT NULL") + cur.execute("ALTER TABLE global_threepid_associations ADD COLUMN lookup_hash VARCHAR(256)") cur.execute("CREATE INDEX IF NOT EXISTS lookup_hash_medium on global_threepid_associations (lookup_hash, medium)") cur.execute( - "CREATE TABLE IF NOT EXISTS lookup_hashing_metadata " - "(lookup_pepper varchar(256) not null)" + "CREATE TABLE IF NOT EXISTS hashing_metadata " + "(lookup_pepper varchar(256))" ) self.db.commit() logger.info("v2 -> v3 schema migration complete") diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index a9cdacd8..552317fa 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -150,8 +150,8 @@ def getMxids(self, threepid_tuples): """ cur = self.sydent.db.cursor() - cur.execute("CREATE TEMPORARY TABLE tmp_getmxids (medium VARCHAR(16), address VARCHAR(256))"); - cur.execute("CREATE INDEX tmp_getmxids_medium_lower_address ON tmp_getmxids (medium, lower(address))"); + cur.execute("CREATE TEMPORARY TABLE tmp_getmxids (medium VARCHAR(16), address VARCHAR(256))") + cur.execute("CREATE INDEX tmp_getmxids_medium_lower_address ON tmp_getmxids (medium, lower(address))") try: inserted_cap = 0 @@ -237,7 +237,9 @@ def retrieveMxidFromHash(self, lookup_hash): """ cur = self.sydent.db.cursor() - res = cur.execute("SELECT mxid WHERE lookup_hash = ?", (lookup_hash,)) + res = cur.execute( + "SELECT mxid FROM global_threepid_associations WHERE lookup_hash = ?", (lookup_hash,) + ) row = res.fetchone() if not row: return None diff --git a/sydent/db/threepid_associations.sql b/sydent/db/threepid_associations.sql index 075340d6..c925144b 100644 --- a/sydent/db/threepid_associations.sql +++ b/sydent/db/threepid_associations.sql @@ -18,20 +18,20 @@ CREATE TABLE IF NOT EXISTS local_threepid_associations ( id integer primary key, medium varchar(16) not null, address varchar(256) not null, - lookup_hash varchar(256) not null, + lookup_hash varchar(256), mxid varchar(256) not null, ts integer not null, notBefore bigint not null, notAfter bigint not null ); -CREATE INDEX IF NOT EXISTS lookup_hash_medium on local_threepid_associations (lookup_hash, medium)); +CREATE INDEX IF NOT EXISTS lookup_hash_medium on local_threepid_associations (lookup_hash, medium); CREATE UNIQUE INDEX IF NOT EXISTS medium_address on local_threepid_associations(medium, address); CREATE TABLE IF NOT EXISTS global_threepid_associations ( id integer primary key, medium varchar(16) not null, address varchar(256) not null, - lookup_hash varchar(256) not null, + lookup_hash varchar(256), mxid varchar(256) not null, ts integer not null, notBefore bigint not null, diff --git a/sydent/http/httpserver.py b/sydent/http/httpserver.py index 754bff3c..78796550 100644 --- a/sydent/http/httpserver.py +++ b/sydent/http/httpserver.py @@ -52,6 +52,7 @@ def __init__(self, sydent): lookup = self.sydent.servlets.lookup bulk_lookup = self.sydent.servlets.bulk_lookup + hash_details = self.sydent.servlets.hash_details lookup_v2 = self.sydent.servlets.lookup_v2 threepid = Resource() @@ -98,6 +99,7 @@ def __init__(self, sydent): v1.putChild('sign-ed25519', self.sydent.servlets.blindlySignStuffServlet) v2.putChild('lookup', lookup_v2) + v2.putChild('hash_details', hash_details) self.factory = Site(root) self.factory.displayTracebacks = False diff --git a/sydent/http/servlets/__init__.py b/sydent/http/servlets/__init__.py index 68007d9d..7b737125 100644 --- a/sydent/http/servlets/__init__.py +++ b/sydent/http/servlets/__init__.py @@ -24,7 +24,7 @@ def get_args(request, required_args): Currently takes args from the top level keys of a json object or www-form-urlencoded for backwards compatability. Returns a tuple (error, args) where if error is non-null, - the requesat is malformed. Otherwise, args contains the + the request is malformed. Otherwise, args contains the parameters passed. """ args = None diff --git a/sydent/http/servlets/hashdetailsservlet.py b/sydent/http/servlets/hashdetailsservlet.py index 805c51dd..3b741cdd 100644 --- a/sydent/http/servlets/hashdetailsservlet.py +++ b/sydent/http/servlets/hashdetailsservlet.py @@ -16,7 +16,7 @@ from twisted.web.resource import Resource from sydent.db.threepid_associations import GlobalAssociationStore -from sydent.util.tokenutils import generateAlphanumericTokenOfLength +from sydent.db.hashing_metadata import HashingMetadataStore import logging import json @@ -32,14 +32,15 @@ class HashDetailsServlet(Resource): isLeaf = True known_algorithms = ["sha256", "none"] - def __init__(self, syd): + def __init__(self, syd, lookup_pepper): self.sydent = syd + self.lookup_pepper = lookup_pepper def render_GET(self, request): """ Return the hashing algorithms and pepper that this IS supports. The - pepper included in the response is set by the config, and generated - if one is not set. + pepper included in the response is stored in the database, or + otherwise generated. Returns: An object containing an array of hashing algorithms the server supports, and a `lookup_pepper` field, which is a @@ -47,15 +48,12 @@ def render_GET(self, request): information before hashing. """ send_cors(request) - - # A lookup_pepper is defined in the config, otherwise it is generated - lookup_pepper = self.sydent.config.get("hashing", "lookup_pepper") request.setResponseCode(200) - return { - "algorithms": known_algorithms, - "lookup_pepper": lookup_pepper, - } + return json.dumps({ + "algorithms": self.known_algorithms, + "lookup_pepper": self.lookup_pepper, + }) @jsonwrap def render_OPTIONS(self, request): diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 3a4f1b82..9ef34748 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -31,9 +31,10 @@ class LookupV2Servlet(Resource): isLeaf = True - def __init__(self, syd): + def __init__(self, syd, lookup_pepper): self.sydent = syd self.globalAssociationStore = GlobalAssociationStore(self.sydent) + self.lookup_pepper = lookup_pepper def render_POST(self, request): """ @@ -52,6 +53,7 @@ def render_POST(self, request): * 'algorithm': The algorithm the client has used to process the 3PIDs. * 'pepper': The pepper the client has attached to the 3PIDs. + Returns: Object with key 'mappings', which is a dictionary of results where each result is a key/value pair of what the client sent, and the matching Matrix User ID that claims to own that 3PID. @@ -67,36 +69,34 @@ def render_POST(self, request): addresses = args['addresses'] if not isinstance(addresses, list): request.setResponseCode(400) - return {'errcode': 'M_INVALID_PARAM', 'error': 'addresses must be a list'}, None + return json.dumps({'errcode': 'M_INVALID_PARAM', 'error': 'addresses must be a list'}) - algorithm = args['algorithm'] - if not isinstance(algorithm, str): - request.setResponseCode(400) - return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm must be a string'}, None + algorithm = str(args['algorithm']) if algorithm not in HashDetailsServlet.known_algorithms: request.setResponseCode(400) - return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}, None + return json.dumps({'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}) - pepper = args['pepper'] - if not isinstance(pepper, str): - request.setResponseCode(400) - return {'errcode': 'M_INVALID_PARAM', 'error': 'pepper must be a string'}, None - if pepper != self.sydent.config.get("hashing", "lookup_pepper"): + pepper = str(args['pepper']) + if pepper != self.lookup_pepper: request.setResponseCode(400) - return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match server's"}, None + return json.dumps({'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match server's"}) - logger.info("Lookup of %d threepids with algorithm", len(addresses), algorithm) + logger.info("Lookup of %d threepid(s) with algorithm %s", len(addresses), algorithm) if algorithm == "none": # Lookup without hashing medium_address_tuples = [] - for medium_and_address in addresses: + for address_and_medium in addresses: # Parse medium, address components - medium, address = parse_space_separated_str(medium_and_address) - medium_address_tuples.append((medium, address)) + # The address and medium are flipped from what getMxids() is + # expecting, so switch them around + address, medium = parse_space_separated_str(address_and_medium) + medium_address_tuples.append((str(medium), str(address))) # Lookup the mxids - medium_address_mxid_tuples = GlobalAssociationStore.getMxids(medium_address_tuples) + print "giving: " + str(medium_address_tuples) + medium_address_mxid_tuples = self.globalAssociationStore.getMxids(medium_address_tuples) + # Return a dictionary of lookup_string: mxid values return json.dumps({'mappings': {x[0]: x[2] for x in medium_address_mxid_tuples}}) elif algorithm == "sha256": @@ -109,7 +109,7 @@ def render_POST(self, request): return json.dumps({'mappings': mappings}) - return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}, None + return json.dumps({'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}) @jsonwrap def render_OPTIONS(self, request): diff --git a/sydent/sydent.py b/sydent/sydent.py index de44da4c..0ef67a02 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -39,7 +39,8 @@ from validators.msisdnvalidator import MsisdnValidator from hs_federation.verifier import Verifier -from util.hash import sha256_and_url_safe_base64, diff_lists +from util.hash import sha256_and_url_safe_base64 +from util.tokenutils import generateAlphanumericTokenOfLength from sign.ed25519 import SydentEd25519 @@ -181,47 +182,19 @@ def sighup(signum, stack): addr=self.cfg.get("general", "prometheus_addr"), ) - # Whether to compute a lookup_hash for each 3pid in the database - compute_lookup_hashes = False - - # Determine whether a lookup_pepper value has been defined - lookup_pepper = self.cfg.get("hashing", "lookup_pepper") - # If lookup_pepper hasn't been defined, or is an empty string... + # See if a pepper already exists in the database + hashing_metadata_store = HashingMetadataStore(self) + lookup_pepper = hashing_metadata_store.retrieve_value("lookup_pepper") if not lookup_pepper: - # See if it exists in the database - lookup_pepper_db = HashingMetadataStore.retrieve_value("lookup_pepper") - if lookup_pepper_db: - # A pepper already exists, use it - self.cfg.set("hashing", "lookup_pepper", lookup_pepper_db) - else: - # No pepper defined and there isn't one in the database - # Generate one - new_pepper = generateAlphanumericTokenOfLength(5) - - # Save it for later use - self.cfg.set( - "hashing", "lookup_pepper", - new_pepper, - ) - - # Store it in the database - HashingMetadataStore.store_values({"lookup_pepper": new_pepper}) - - # Re-hash all 3pids - compute_lookup_hashes = True - else: - # If it has been defined, check if it's different from what we have - # in the database - if HashingMetadataStore.is_new("lookup_pepper", lookup_pepper): - # Store the new pepper in the database - HashingMetadataStore.store_values({"lookup_pepper": lookup_pepper}) - - # Re-hash all 3pids - compute_lookup_hashes = True - - if compute_lookup_hashes: - HashingMetadataStore.rehash_threepids( - sha256_and_url_safe_base64, self.cfg.get("hashing", "lookup_pepper"), + # No pepper defined in the database, generate one + lookup_pepper = generateAlphanumericTokenOfLength(5) + + # Store it in the database + hashing_metadata_store.store_values({"lookup_pepper": lookup_pepper}) + + # Re-hash all 3pids + hashing_metadata_store.rehash_threepids( + sha256_and_url_safe_base64, lookup_pepper, ) self.validators = Validators() @@ -243,7 +216,8 @@ def sighup(signum, stack): self.servlets.msisdnValidate = MsisdnValidateCodeServlet(self) self.servlets.lookup = LookupServlet(self) self.servlets.bulk_lookup = BulkLookupServlet(self) - self.servlets.lookup_v2 = LookupV2Servlet(self) + self.servlets.hash_details = HashDetailsServlet(self, lookup_pepper) + self.servlets.lookup_v2 = LookupV2Servlet(self, lookup_pepper) self.servlets.pubkey_ed25519 = Ed25519Servlet(self) self.servlets.pubkeyIsValid = PubkeyIsValidServlet(self) self.servlets.ephemeralPubkeyIsValid = EphemeralPubkeyIsValidServlet(self) diff --git a/sydent/util/hash.py b/sydent/util/hash.py index b82794a0..0d27e274 100644 --- a/sydent/util/hash.py +++ b/sydent/util/hash.py @@ -49,7 +49,7 @@ def parse_space_separated_str(input_str): :param input_str: The space-separated str to split :type input_str: str - :returns a list with 2 strings in it + :returns a list with 2 strings in it, (address, medium) :rtype [str, str] """ # Split the string by spaces From 989f4561c80eeaeb987b9f7dd1fc63fb85a1a793 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 13 Aug 2019 15:48:26 +0100 Subject: [PATCH 14/25] Update sydent/http/servlets/lookupservlet.py Co-Authored-By: Erik Johnston --- sydent/http/servlets/lookupservlet.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sydent/http/servlets/lookupservlet.py b/sydent/http/servlets/lookupservlet.py index 6927a72a..3a146db9 100644 --- a/sydent/http/servlets/lookupservlet.py +++ b/sydent/http/servlets/lookupservlet.py @@ -36,7 +36,9 @@ def __init__(self, syd): def render_GET(self, request): """ Look up an individual threepid. + ** DEPRECATED ** + Params: 'medium': the medium of the threepid 'address': the address of the threepid Returns: A signed association if the threepid has a corresponding mxid, otherwise the empty object. From 821742b83e2460da53bb44c8b3617fd96d07f8f3 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 13 Aug 2019 15:58:40 +0100 Subject: [PATCH 15/25] update from review comments --- sydent/db/hashing_metadata.py | 58 ++++++++++++++----------- sydent/db/hashing_metadata.sql | 3 +- sydent/db/threepid_associations.sql | 4 +- sydent/http/servlets/lookupv2servlet.py | 15 ++++--- sydent/http/servlets/v2_servlet.py | 12 ----- sydent/http/srvresolver.py | 2 +- sydent/sydent.py | 4 +- 7 files changed, 48 insertions(+), 50 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 28f18cba..b4474209 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -18,37 +18,34 @@ class HashingMetadataStore: def __init__(self, sydent): self.sydent = sydent - def retrieve_value(self, name): - """Return a value from the hashing_metadata table + def get_lookup_pepper(self): + """Return the value of the current lookup pepper from the db - :param name: The name of the db column to return the value for - :type name: str - - :returns a value corresponding to the specified name, or None if a - value does not exist + :returns a pepper if it exists in the database, or None if one does + not exist """ cur = self.sydent.db.cursor() - res = cur.execute("select %s from hashing_metadata" % name) + res = cur.execute("select lookup_pepper from hashing_metadata") row = res.fetchone() if not row: return None return row[0] - def store_values(self, names_and_values): - """Stores values in the hashing_metadata table under the named columns + def store_lookup_pepper(self, lookup_pepper): + """Stores a new lookup pepper in the hashing_metadata db table - :param names_and_values: Column names and associated values to store - in the database - :type names_and_values: Dict + :param lookup_pepper: The pepper to store in the database + :type lookup_pepper: str """ cur = self.sydent.db.cursor() - columns = ', '.join(names_and_values.keys()) - values = ', '.join('?' * len(names_and_values)) - sql = 'INSERT INTO hashing_metadata ({}) VALUES ({})'.format(columns, values) - values = names_and_values.values() - cur.execute(sql, values) + # Create or update lookup_pepper + sql = ( + 'INSERT OR REPLACE INTO hashing_metadata (id, lookup_pepper) ' + 'VALUES (0, ?)' + ) + cur.execute(sql, lookup_pepper) self.sydent.db.commit() def rehash_threepids(self, hashing_function, pepper): @@ -77,17 +74,29 @@ def _rehash_threepids(self, hashing_function, pepper, table): """ cur = self.sydent.db.cursor() - # Pull items from the database + # Get count of all 3PID records # Medium/address combos are marked as UNIQUE in the database - sql = "SELECT medium, address FROM %s" % table + sql = "SELECT COUNT(*) FROM %s" % table res = cur.execute(sql) - rows = res.fetchall() + row_count = res.fetchone() + row_count = row_count[0] # Iterate through each medium, address combo, hash it, # and store in the db batch_size = 500 - while rows: - for medium, address in rows[:batch_size]: + while count < row_count: + sql = ( + "SELECT medium, address FROM %s LIMIT %s OFFSET %s ORDER BY id" % + (table, batch_size, count) + ) + res = cur.execute(sql) + rows = res.fetchall() + + for medium, address in rows: + # Skip broken db entry + if not medium or not address: + continue + # Combine the medium, address and pepper together in the # following form: "address medium pepper" # According to MSC2134: https://github.com/matrix-org/matrix-doc/pull/2134 @@ -104,7 +113,6 @@ def _rehash_threepids(self, hashing_function, pepper, table): ) cur.execute(sql, (result, medium, address)) - # Remove processed items from the list - rows = rows[batch_size:] + count += len(rows) self.sydent.db.commit() diff --git a/sydent/db/hashing_metadata.sql b/sydent/db/hashing_metadata.sql index c93136cb..d7219a45 100644 --- a/sydent/db/hashing_metadata.sql +++ b/sydent/db/hashing_metadata.sql @@ -19,5 +19,6 @@ limitations under the License. * out tasks related to hashing. Salts and peppers etc. should go here. */ CREATE TABLE IF NOT EXISTS hashing_metadata ( - lookup_pepper varchar(256) + id integer primary key, + lookup_pepper varchar ); diff --git a/sydent/db/threepid_associations.sql b/sydent/db/threepid_associations.sql index c925144b..c4869b8b 100644 --- a/sydent/db/threepid_associations.sql +++ b/sydent/db/threepid_associations.sql @@ -18,7 +18,7 @@ CREATE TABLE IF NOT EXISTS local_threepid_associations ( id integer primary key, medium varchar(16) not null, address varchar(256) not null, - lookup_hash varchar(256), + lookup_hash varchar, mxid varchar(256) not null, ts integer not null, notBefore bigint not null, @@ -31,7 +31,7 @@ CREATE TABLE IF NOT EXISTS global_threepid_associations ( id integer primary key, medium varchar(16) not null, address varchar(256) not null, - lookup_hash varchar(256), + lookup_hash varchar, mxid varchar(256) not null, ts integer not null, notBefore bigint not null, diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 9ef34748..6b97162d 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -36,6 +36,7 @@ def __init__(self, syd, lookup_pepper): self.globalAssociationStore = GlobalAssociationStore(self.sydent) self.lookup_pepper = lookup_pepper + @jsonwrap def render_POST(self, request): """ Perform lookups with potentially hashed 3PID details. @@ -64,22 +65,22 @@ def render_POST(self, request): err, args = get_args(request, ('addresses', 'algorithm', 'pepper')) if err: - return json.dumps(err) + return err addresses = args['addresses'] if not isinstance(addresses, list): request.setResponseCode(400) - return json.dumps({'errcode': 'M_INVALID_PARAM', 'error': 'addresses must be a list'}) + return {'errcode': 'M_INVALID_PARAM', 'error': 'addresses must be a list'} algorithm = str(args['algorithm']) if algorithm not in HashDetailsServlet.known_algorithms: request.setResponseCode(400) - return json.dumps({'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}) + return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'} pepper = str(args['pepper']) if pepper != self.lookup_pepper: request.setResponseCode(400) - return json.dumps({'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match server's"}) + return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match server's"} logger.info("Lookup of %d threepid(s) with algorithm %s", len(addresses), algorithm) if algorithm == "none": @@ -97,7 +98,7 @@ def render_POST(self, request): medium_address_mxid_tuples = self.globalAssociationStore.getMxids(medium_address_tuples) # Return a dictionary of lookup_string: mxid values - return json.dumps({'mappings': {x[0]: x[2] for x in medium_address_mxid_tuples}}) + return { 'mappings': {x[0]: x[2] for x in medium_address_mxid_tuples} } elif algorithm == "sha256": # Lookup using SHA256 with URL-safe base64 encoding @@ -107,9 +108,9 @@ def render_POST(self, request): if mxid: mappings[h] = mxid - return json.dumps({'mappings': mappings}) + return {'mappings': mappings} - return json.dumps({'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'}) + return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'} @jsonwrap def render_OPTIONS(self, request): diff --git a/sydent/http/servlets/v2_servlet.py b/sydent/http/servlets/v2_servlet.py index 0e6b630d..22516465 100644 --- a/sydent/http/servlets/v2_servlet.py +++ b/sydent/http/servlets/v2_servlet.py @@ -25,15 +25,3 @@ class V2Servlet(Resource): def __init__(self, syd): Resource.__init__(self) self.sydent = syd - - @jsonwrap - def render_GET(self, request): - send_cors(request) - request.setResponseCode(200) - return {} - - @jsonwrap - def render_OPTIONS(self, request): - send_cors(request) - request.setResponseCode(200) - return {} diff --git a/sydent/http/srvresolver.py b/sydent/http/srvresolver.py index 01410df4..4664c99a 100644 --- a/sydent/http/srvresolver.py +++ b/sydent/http/srvresolver.py @@ -108,7 +108,7 @@ def resolve_service(self, service_name): :type service_name: bytes :returns a list of the SRV records, or an empty list if none found. - :rtype: Deferred[[Server]] + :rtype: Deferred[list[Server]] """ now = int(self._get_time()) diff --git a/sydent/sydent.py b/sydent/sydent.py index 0ef67a02..1bafc7c2 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -184,13 +184,13 @@ def sighup(signum, stack): # See if a pepper already exists in the database hashing_metadata_store = HashingMetadataStore(self) - lookup_pepper = hashing_metadata_store.retrieve_value("lookup_pepper") + lookup_pepper = hashing_metadata_store.get_lookup_pepper() if not lookup_pepper: # No pepper defined in the database, generate one lookup_pepper = generateAlphanumericTokenOfLength(5) # Store it in the database - hashing_metadata_store.store_values({"lookup_pepper": lookup_pepper}) + hashing_metadata_store.store_lookup_pepper(lookup_pepper) # Re-hash all 3pids hashing_metadata_store.rehash_threepids( From 553c4174da5e9f16e7603aa5148d08b4090e9010 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 13 Aug 2019 17:18:12 +0100 Subject: [PATCH 16/25] Update sydent/http/servlets/lookupv2servlet.py Co-Authored-By: Erik Johnston --- sydent/http/servlets/lookupv2servlet.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 6b97162d..6245f9ae 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -94,7 +94,6 @@ def render_POST(self, request): medium_address_tuples.append((str(medium), str(address))) # Lookup the mxids - print "giving: " + str(medium_address_tuples) medium_address_mxid_tuples = self.globalAssociationStore.getMxids(medium_address_tuples) # Return a dictionary of lookup_string: mxid values From 424270b144a2bc84bba4fb673da1c9b5180c9c21 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 14 Aug 2019 11:59:47 +0100 Subject: [PATCH 17/25] Make updating pepper and rehashing 3PIDs atomic Fix SQL. --- sydent/db/hashing_metadata.py | 49 ++++++++++++++----------- sydent/http/servlets/lookupv2servlet.py | 3 +- sydent/sydent.py | 10 ++--- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index b4474209..fb182c37 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + class HashingMetadataStore: def __init__(self, sydent): self.sydent = sydent @@ -32,11 +33,14 @@ def get_lookup_pepper(self): return None return row[0] - def store_lookup_pepper(self, lookup_pepper): - """Stores a new lookup pepper in the hashing_metadata db table + def store_lookup_pepper(self, hashing_function, pepper): + """Stores a new lookup pepper in the hashing_metadata db table and rehashes all 3PIDs - :param lookup_pepper: The pepper to store in the database - :type lookup_pepper: str + :param hashing_function: A function with single input and output strings + :type hashing_function func(str) -> str + + :param pepper: The pepper to store in the database + :type pepper: str """ cur = self.sydent.db.cursor() @@ -45,24 +49,26 @@ def store_lookup_pepper(self, lookup_pepper): 'INSERT OR REPLACE INTO hashing_metadata (id, lookup_pepper) ' 'VALUES (0, ?)' ) - cur.execute(sql, lookup_pepper) - self.sydent.db.commit() - - def rehash_threepids(self, hashing_function, pepper): - """Rehash all 3PIDs using a given hashing_function and pepper + cur.execute(sql, (pepper,)) - :param hashing_function: A function with single input and output strings - :type hashing_function func(str) -> str + # Hand the cursor it to each rehashing function + # Each function will queue some rehashing db transactions + self._rehash_threepids(cur, hashing_function, pepper, "local_threepid_associations") + self._rehash_threepids(cur, hashing_function, pepper, "global_threepid_associations") - :param pepper: A pepper to append to the end of the 3PID (after a space) before hashing - :type pepper: str - """ - self._rehash_threepids(hashing_function, pepper, "local_threepid_associations") - self._rehash_threepids(hashing_function, pepper, "global_threepid_associations") + # Commit the queued db transactions so that adding a new pepper and hashing is atomic + self.sydent.db.commit() - def _rehash_threepids(self, hashing_function, pepper, table): + def _rehash_threepids(self, cur, hashing_function, pepper, table): """Rehash 3PIDs of a given table using a given hashing_function and pepper + A database cursor `cur` must be passed to this function. After this function completes, + the calling function should make sure to call self`self.sydent.db.commit()` to commit + the made changes to the database. + + :param cur: Database cursor + :type cur: + :param hashing_function: A function with single input and output strings :type hashing_function func(str) -> str @@ -72,7 +78,6 @@ def _rehash_threepids(self, hashing_function, pepper, table): :param table: The database table to perform the rehashing on :type table: str """ - cur = self.sydent.db.cursor() # Get count of all 3PID records # Medium/address combos are marked as UNIQUE in the database @@ -84,9 +89,10 @@ def _rehash_threepids(self, hashing_function, pepper, table): # Iterate through each medium, address combo, hash it, # and store in the db batch_size = 500 + count = 0 while count < row_count: sql = ( - "SELECT medium, address FROM %s LIMIT %s OFFSET %s ORDER BY id" % + "SELECT medium, address FROM %s ORDER BY id LIMIT %s OFFSET %s" % (table, batch_size, count) ) res = cur.execute(sql) @@ -109,10 +115,9 @@ def _rehash_threepids(self, hashing_function, pepper, table): sql = ( "UPDATE %s SET lookup_hash = ? " "WHERE medium = ? AND address = ?" - % (table) + % table ) + # Lines up the query to be executed on commit cur.execute(sql, (result, medium, address)) count += len(rows) - - self.sydent.db.commit() diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 6b97162d..46ee16ff 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -80,7 +80,8 @@ def render_POST(self, request): pepper = str(args['pepper']) if pepper != self.lookup_pepper: request.setResponseCode(400) - return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match server's"} + return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match " + "the server's"} logger.info("Lookup of %d threepid(s) with algorithm %s", len(addresses), algorithm) if algorithm == "none": diff --git a/sydent/sydent.py b/sydent/sydent.py index 1bafc7c2..616c98c6 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -189,13 +189,9 @@ def sighup(signum, stack): # No pepper defined in the database, generate one lookup_pepper = generateAlphanumericTokenOfLength(5) - # Store it in the database - hashing_metadata_store.store_lookup_pepper(lookup_pepper) - - # Re-hash all 3pids - hashing_metadata_store.rehash_threepids( - sha256_and_url_safe_base64, lookup_pepper, - ) + # Store it in the database and rehash 3PIDs + hashing_metadata_store.store_lookup_pepper(sha256_and_url_safe_base64, + lookup_pepper) self.validators = Validators() self.validators.email = EmailValidator(self) From 54f4f36c763734e9cabde283585ff1759f845037 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 19 Aug 2019 11:38:18 +0100 Subject: [PATCH 18/25] review comments --- sydent/http/servlets/lookupv2servlet.py | 9 +++------ sydent/util/hash.py | 26 ------------------------- 2 files changed, 3 insertions(+), 32 deletions(-) diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 7d2b11a9..7b54abbc 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -17,13 +17,10 @@ from twisted.web.resource import Resource import logging -import json -import signedjson.sign from sydent.http.servlets import get_args, jsonwrap, send_cors from sydent.db.threepid_associations import GlobalAssociationStore from sydent.http.servlets.hashdetailsservlet import HashDetailsServlet -from sydent.util.hash import parse_space_separated_str logger = logging.getLogger(__name__) @@ -80,8 +77,8 @@ def render_POST(self, request): pepper = str(args['pepper']) if pepper != self.lookup_pepper: request.setResponseCode(400) - return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match " - "the server's"} + return {'errcode': 'M_INVALID_PEPPER', 'error': "pepper does not match '%s'" % + self.lookup_pepper} logger.info("Lookup of %d threepid(s) with algorithm %s", len(addresses), algorithm) if algorithm == "none": @@ -91,7 +88,7 @@ def render_POST(self, request): # Parse medium, address components # The address and medium are flipped from what getMxids() is # expecting, so switch them around - address, medium = parse_space_separated_str(address_and_medium) + address, medium = address_and_medium.rsplit(maxsplit=1) medium_address_tuples.append((str(medium), str(address))) # Lookup the mxids diff --git a/sydent/util/hash.py b/sydent/util/hash.py index 0d27e274..aa5d5ae3 100644 --- a/sydent/util/hash.py +++ b/sydent/util/hash.py @@ -31,29 +31,3 @@ def sha256_and_url_safe_base64(input_text): digest = hashlib.sha256(input_text.encode()).digest() return unpaddedbase64.encode_base64(digest, urlsafe=True) -def parse_space_separated_str(input_str): - """Parses a string containing values seperated by a space. Joins the - leading chunks if there are more than two. - - Used for parsing medium, address values. - - e.g. If given input_str="someaddress somemedium", - this function will return ("someaddress", "somemedium"). - - If given input_str="some address somemedium", - this function will return ("some address", "somemedium"). - - This is due to the future possibility of address values containing - spaces. - - :param input_str: The space-separated str to split - :type input_str: str - - :returns a list with 2 strings in it, (address, medium) - :rtype [str, str] - """ - # Split the string by spaces - split_input = input_str.split() - - # Return the last item separated from the rest - return (' '.join(split_input[:-1]), split_input[-1]) From a7c78cd0bcc081506a2e418bd0d05cc4eb68f91a Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 19 Aug 2019 19:41:11 +0100 Subject: [PATCH 19/25] Reject lookup addresses containing spaces --- sydent/http/servlets/lookupv2servlet.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 7b54abbc..0fd654f7 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -88,7 +88,15 @@ def render_POST(self, request): # Parse medium, address components # The address and medium are flipped from what getMxids() is # expecting, so switch them around - address, medium = address_and_medium.rsplit(maxsplit=1) + address, medium = address_and_medium.split() + + # Forbid addresses that contain a space + if " " in address: + return { + 'errcode': 'M_UNKNOWN', + 'error': '"%s": contains spaces' % address + } + medium_address_tuples.append((str(medium), str(address))) # Lookup the mxids From 5a82c66e6c8b7042e0c5a12aa8ecacf74383ff8d Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 20 Aug 2019 10:21:13 +0100 Subject: [PATCH 20/25] Only use one method of space checking --- sydent/http/servlets/lookupv2servlet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index 0fd654f7..abf89132 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -88,7 +88,7 @@ def render_POST(self, request): # Parse medium, address components # The address and medium are flipped from what getMxids() is # expecting, so switch them around - address, medium = address_and_medium.split() + address, medium = address_and_medium.rsplit(maxsplit=1) # Forbid addresses that contain a space if " " in address: From b35e1a68541725530de55fd1de144ebd8b967d79 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 20 Aug 2019 10:26:33 +0100 Subject: [PATCH 21/25] Add comment about hashing before serving --- sydent/sydent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sydent/sydent.py b/sydent/sydent.py index 616c98c6..f175efe8 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -183,6 +183,8 @@ def sighup(signum, stack): ) # See if a pepper already exists in the database + # Note: This MUST be run before we start serving requests, otherwise lookups for + # 3PID hashes may come in before we've completed generating them hashing_metadata_store = HashingMetadataStore(self) lookup_pepper = hashing_metadata_store.get_lookup_pepper() if not lookup_pepper: From 3667de14b3e7ceae317068fbbdf2a0a867ff5c2f Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 20 Aug 2019 10:36:08 +0100 Subject: [PATCH 22/25] Add response codes --- sydent/http/servlets/lookupv2servlet.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index abf89132..aefd3c74 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -92,6 +92,7 @@ def render_POST(self, request): # Forbid addresses that contain a space if " " in address: + request.setResponseCode(400) return { 'errcode': 'M_UNKNOWN', 'error': '"%s": contains spaces' % address @@ -103,7 +104,7 @@ def render_POST(self, request): medium_address_mxid_tuples = self.globalAssociationStore.getMxids(medium_address_tuples) # Return a dictionary of lookup_string: mxid values - return { 'mappings': {x[0]: x[2] for x in medium_address_mxid_tuples} } + return {'mappings': {x[0]: x[2] for x in medium_address_mxid_tuples}} elif algorithm == "sha256": # Lookup using SHA256 with URL-safe base64 encoding @@ -115,6 +116,7 @@ def render_POST(self, request): return {'mappings': mappings} + request.setResponseCode(400) return {'errcode': 'M_INVALID_PARAM', 'error': 'algorithm is not supported'} @jsonwrap From 16b1fe5832edd408cf96fc105c4d6ec43f263674 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 20 Aug 2019 13:13:59 +0100 Subject: [PATCH 23/25] lots of fixes --- sydent/db/hashing_metadata.py | 2 ++ sydent/db/hashing_metadata.sql | 24 ------------------- sydent/db/sqlitedb.py | 31 ++++++++++++++++++++----- sydent/db/threepid_associations.py | 7 +++--- sydent/http/servlets/lookupv2servlet.py | 12 +++++----- sydent/replication/peer.py | 11 +++++---- sydent/threepid/bind.py | 3 ++- 7 files changed, 46 insertions(+), 44 deletions(-) delete mode 100644 sydent/db/hashing_metadata.sql diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index fb182c37..3970dcf0 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Actions on the hashing_metadata table which is defined in the migration process in +# sqlitedb.py class HashingMetadataStore: def __init__(self, sydent): diff --git a/sydent/db/hashing_metadata.sql b/sydent/db/hashing_metadata.sql deleted file mode 100644 index d7219a45..00000000 --- a/sydent/db/hashing_metadata.sql +++ /dev/null @@ -1,24 +0,0 @@ -/* -Copyright 2019 The Matrix.org Foundation C.I.C. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -/* - * hashing_metadata contains information needed for the identity server to carry - * out tasks related to hashing. Salts and peppers etc. should go here. - */ -CREATE TABLE IF NOT EXISTS hashing_metadata ( - id integer primary key, - lookup_pepper varchar -); diff --git a/sydent/db/sqlitedb.py b/sydent/db/sqlitedb.py index 01e41b5e..22ab7cea 100644 --- a/sydent/db/sqlitedb.py +++ b/sydent/db/sqlitedb.py @@ -142,16 +142,35 @@ def _upgradeSchema(self): if curVer < 3: cur = self.db.cursor() - cur.execute("ALTER TABLE local_threepid_associations ADD COLUMN lookup_hash VARCHAR(256)") - cur.execute("CREATE INDEX IF NOT EXISTS lookup_hash_medium on local_threepid_associations (lookup_hash, medium)") - cur.execute("ALTER TABLE global_threepid_associations ADD COLUMN lookup_hash VARCHAR(256)") - cur.execute("CREATE INDEX IF NOT EXISTS lookup_hash_medium on global_threepid_associations (lookup_hash, medium)") + # Add lookup_hash columns to threepid association tables + cur.execute( + "ALTER TABLE local_threepid_associations " + "ADD COLUMN lookup_hash VARCHAR(256)" + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS lookup_hash_medium " + "on local_threepid_associations " + "(lookup_hash, medium)" + ) + cur.execute( + "ALTER TABLE global_threepid_associations " + "ADD COLUMN lookup_hash VARCHAR(256)" + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS lookup_hash_medium " + "on global_threepid_associations " + "(lookup_hash, medium)" + ) + # Create hashing_metadata table to store the current lookup_pepper cur.execute( - "CREATE TABLE IF NOT EXISTS hashing_metadata " - "(lookup_pepper varchar(256))" + "CREATE TABLE IF NOT EXISTS hashing_metadata (" + "id integer primary key, " + "lookup_pepper varchar(256)" + ")" ) + self.db.commit() logger.info("v2 -> v3 schema migration complete") self._setSchemaVersion(3) diff --git a/sydent/db/threepid_associations.py b/sydent/db/threepid_associations.py index 552317fa..fe4d7d10 100644 --- a/sydent/db/threepid_associations.py +++ b/sydent/db/threepid_associations.py @@ -34,7 +34,7 @@ def addOrUpdateAssociation(self, assoc): # sqlite's support for upserts is atrocious cur.execute("insert or replace into local_threepid_associations " - "('medium', 'address', 'lookup_hash', mxid', 'ts', 'notBefore', 'notAfter')" + "('medium', 'address', 'lookup_hash', 'mxid', 'ts', 'notBefore', 'notAfter')" " values (?, ?, ?, ?, ?, ?, ?)", (assoc.medium, assoc.address, assoc.lookup_hash, assoc.mxid, assoc.ts, assoc.not_before, assoc.not_after)) self.sydent.db.commit() @@ -45,7 +45,8 @@ def getAssociationsAfterId(self, afterId, limit): if afterId is None: afterId = -1 - q = "select id, medium, address, mxid, ts, notBefore, notAfter from local_threepid_associations " \ + q = "select id, medium, address, lookup_hash, mxid, ts, notBefore, notAfter from " \ + "local_threepid_associations " \ "where id > ? order by id asc" if limit is not None: q += " limit ?" @@ -58,7 +59,7 @@ def getAssociationsAfterId(self, afterId, limit): assocs = {} for row in res.fetchall(): - assoc = ThreepidAssociation(row[1], row[2], row[3], row[4], row[5], row[6]) + assoc = ThreepidAssociation(row[1], row[2], row[3], row[4], row[5], row[6], row[7]) assocs[row[0]] = assoc maxId = row[0] diff --git a/sydent/http/servlets/lookupv2servlet.py b/sydent/http/servlets/lookupv2servlet.py index aefd3c74..4105ecb1 100644 --- a/sydent/http/servlets/lookupv2servlet.py +++ b/sydent/http/servlets/lookupv2servlet.py @@ -86,25 +86,25 @@ def render_POST(self, request): medium_address_tuples = [] for address_and_medium in addresses: # Parse medium, address components - # The address and medium are flipped from what getMxids() is - # expecting, so switch them around - address, medium = address_and_medium.rsplit(maxsplit=1) + address_medium_split = address_and_medium.split() # Forbid addresses that contain a space - if " " in address: + if len(address_medium_split) != 2: request.setResponseCode(400) return { 'errcode': 'M_UNKNOWN', - 'error': '"%s": contains spaces' % address + 'error': 'Invalid "address medium" pair: "%s"' % address_and_medium } + # Get the mxid for the address/medium combo if known + address, medium = address_medium_split medium_address_tuples.append((str(medium), str(address))) # Lookup the mxids medium_address_mxid_tuples = self.globalAssociationStore.getMxids(medium_address_tuples) # Return a dictionary of lookup_string: mxid values - return {'mappings': {x[0]: x[2] for x in medium_address_mxid_tuples}} + return {'mappings': {x[1]: x[2] for x in medium_address_mxid_tuples}} elif algorithm == "sha256": # Lookup using SHA256 with URL-safe base64 encoding diff --git a/sydent/replication/peer.py b/sydent/replication/peer.py index 068ec17f..f500e982 100644 --- a/sydent/replication/peer.py +++ b/sydent/replication/peer.py @@ -17,6 +17,7 @@ import ConfigParser from sydent.db.threepid_associations import GlobalAssociationStore +from sydent.db.hashing_metadata import HashingMetadataStore from sydent.threepid import threePidAssocFromDict from sydent.config import ConfigError from sydent.util.hash import sha256_and_url_safe_base64 @@ -58,6 +59,7 @@ class LocalPeer(Peer): def __init__(self, sydent): super(LocalPeer, self).__init__(sydent.server_name, {}) self.sydent = sydent + self.hashing_store = HashingMetadataStore(sydent) globalAssocStore = GlobalAssociationStore(self.sydent) self.lastId = globalAssocStore.lastIdFromServer(self.servername) @@ -72,12 +74,13 @@ def pushUpdates(self, sgAssocs): if assocObj.mxid is not None: # Assign a lookup_hash to this association - str_to_hash = ' '.join( - [address, medium, self.sydent.cfg.get("hashing", "lookup_pepper")], + str_to_hash = ' '.join( + [assocObj.address, assocObj.medium, self.hashing_store.get_lookup_pepper()], ) - assocObj.lookup_hash = sha256_and_url_safe_base64(hash_str) + assocObj.lookup_hash = sha256_and_url_safe_base64(str_to_hash) - # We can probably skip verification for the local peer (although it could be good as a sanity check) + # We can probably skip verification for the local peer (although it could + # be good as a sanity check) globalAssocStore.addAssociation(assocObj, json.dumps(sgAssocs[localId]), self.sydent.server_name, localId) else: diff --git a/sydent/threepid/bind.py b/sydent/threepid/bind.py index 8ad5ad2c..a91fe33b 100644 --- a/sydent/threepid/bind.py +++ b/sydent/threepid/bind.py @@ -50,6 +50,7 @@ class ThreepidBinder: def __init__(self, sydent): self.sydent = sydent + self.hashing_store = HashingMetadataStore(sydent) def addBinding(self, medium, address, mxid): """Binds the given 3pid to the given mxid. @@ -71,7 +72,7 @@ def addBinding(self, medium, address, mxid): # Hash the medium + address and store that hash for the purposes of # later lookups str_to_hash = ' '.join( - [address, medium, self.sydent.cfg.get("hashing", "lookup_pepper")], + [address, medium, self.hashing_store.get_lookup_pepper()], ) lookup_hash = sha256_and_url_safe_base64(str_to_hash) From 1fec4288aec9832826eeb060b5fb02250a6038ad Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 20 Aug 2019 13:16:20 +0100 Subject: [PATCH 24/25] Add simple get to v2 endpoints --- sydent/db/hashing_metadata.py | 2 +- sydent/http/servlets/v2_servlet.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/sydent/db/hashing_metadata.py b/sydent/db/hashing_metadata.py index 3970dcf0..515f51b3 100644 --- a/sydent/db/hashing_metadata.py +++ b/sydent/db/hashing_metadata.py @@ -53,7 +53,7 @@ def store_lookup_pepper(self, hashing_function, pepper): ) cur.execute(sql, (pepper,)) - # Hand the cursor it to each rehashing function + # Hand the cursor to each rehashing function # Each function will queue some rehashing db transactions self._rehash_threepids(cur, hashing_function, pepper, "local_threepid_associations") self._rehash_threepids(cur, hashing_function, pepper, "global_threepid_associations") diff --git a/sydent/http/servlets/v2_servlet.py b/sydent/http/servlets/v2_servlet.py index 22516465..0e6b630d 100644 --- a/sydent/http/servlets/v2_servlet.py +++ b/sydent/http/servlets/v2_servlet.py @@ -25,3 +25,15 @@ class V2Servlet(Resource): def __init__(self, syd): Resource.__init__(self) self.sydent = syd + + @jsonwrap + def render_GET(self, request): + send_cors(request) + request.setResponseCode(200) + return {} + + @jsonwrap + def render_OPTIONS(self, request): + send_cors(request) + request.setResponseCode(200) + return {} From 760c0f42ccf95bd9baa132cc39fe40ca835c7dbc Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 20 Aug 2019 13:24:52 +0100 Subject: [PATCH 25/25] Recalculate lookup_hash when we receive a replicated association --- sydent/http/servlets/replication.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/sydent/http/servlets/replication.py b/sydent/http/servlets/replication.py index 5a5642f5..d59fb901 100644 --- a/sydent/http/servlets/replication.py +++ b/sydent/http/servlets/replication.py @@ -18,6 +18,10 @@ from twisted.web.resource import Resource from sydent.http.servlets import jsonwrap from sydent.threepid import threePidAssocFromDict + +from sydent.util.hash import sha256_and_url_safe_base64 + +from sydent.db.hashing_metadata import HashingMetadataStore from sydent.db.peers import PeerStore from sydent.db.threepid_associations import GlobalAssociationStore @@ -29,6 +33,7 @@ class ReplicationPushServlet(Resource): def __init__(self, sydent): self.sydent = sydent + self.hashing_store = HashingMetadataStore(sydent) @jsonwrap def render_POST(self, request): @@ -66,21 +71,32 @@ def render_POST(self, request): globalAssocsStore = GlobalAssociationStore(self.sydent) - for originId,sgAssoc in inJson['sgAssocs'].items(): + for originId, sgAssoc in inJson['sgAssocs'].items(): try: peer.verifySignedAssociation(sgAssoc) logger.debug("Signed association from %s with origin ID %s verified", peer.servername, originId) - # Don't bother adding if one has already failed: we add all of them or none so we're only going to - # roll back the transaction anyway (but we continue to try & verify the rest so we can give a - # complete list of the ones that don't verify) + # Don't bother adding if one has already failed: we add all of them or none so + # we're only going to roll back the transaction anyway (but we continue to try + # & verify the rest so we can give a complete list of the ones that don't + # verify) if len(failedIds) > 0: continue assocObj = threePidAssocFromDict(sgAssoc) if assocObj.mxid is not None: - globalAssocsStore.addAssociation(assocObj, json.dumps(sgAssoc), peer.servername, originId, commit=False) + # Calculate the lookup hash with our own pepper for this association + str_to_hash = ' '.join( + [assocObj.address, assocObj.medium, + self.hashing_store.get_lookup_pepper()], + ) + assocObj.lookup_hash = sha256_and_url_safe_base64(str_to_hash) + + # Add this association + globalAssocsStore.addAssociation( + assocObj, json.dumps(sgAssoc), peer.servername, originId, commit=False + ) else: logger.info("Incoming deletion: removing associations for %s / %s", assocObj.medium, assocObj.address) globalAssocsStore.removeAssociation(assocObj.medium, assocObj.address) @@ -98,4 +114,4 @@ def render_POST(self, request): 'failed_ids':failedIds} else: self.sydent.db.commit() - return {'success':True} + return {'success': True}