From 010ca7e460d13160b465de24dd3982a397f94c16 Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Mon, 7 Oct 2019 17:27:26 +0200
Subject: [PATCH 01/26] Update docs. (#870)

---
 docs/reference/provisioning/scim-via-curl.md | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/docs/reference/provisioning/scim-via-curl.md b/docs/reference/provisioning/scim-via-curl.md
index 7e28794a326..a3fd2bc0c3f 100644
--- a/docs/reference/provisioning/scim-via-curl.md
+++ b/docs/reference/provisioning/scim-via-curl.md
@@ -8,6 +8,11 @@ This page shows you how to communicate with the wire backend through
 the [SCIM API](http://www.simplecloud.info/) by example.  All examples
 are [curl](https://curl.haxx.se/) (in bash syntax).
 
+We support setting the handle and user name in wire (the thing with
+`@` and the longer thing without `@`).  There is also support for
+setting rich-info.  Group provisioning is planned, but the release
+date hasn't been fixed yet.
+
 If you want to dive into the backend code, start [reading here in our
 backend](https://github.com/wireapp/wire-server/blob/develop/services/spar/src/Spar/Scim.hs)
 and [our hscim library](https://github.com/wireapp/hscim).

From 9130a37f0b2f9689898b0833ccf62c86291a48d8 Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Wed, 16 Oct 2019 14:27:21 +0200
Subject: [PATCH 02/26] Support HEAD requests for `/sso/initiate-bind` (#878)

---
 services/spar/src/Spar/API.hs       | 1 +
 services/spar/src/Spar/API/Types.hs | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/services/spar/src/Spar/API.hs b/services/spar/src/Spar/API.hs
index 8705a376cdb..cccae4a5390 100644
--- a/services/spar/src/Spar/API.hs
+++ b/services/spar/src/Spar/API.hs
@@ -57,6 +57,7 @@ app ctx = SAML.setHttpCachePolicy
 api :: Opts -> ServerT API Spar
 api opts
      = apiSSO opts
+  :<|> authreqPrecheck
   :<|> authreq (maxttlAuthreqDiffTime opts) DoInitiateBind
   :<|> apiIDP
   :<|> apiScim
diff --git a/services/spar/src/Spar/API/Types.hs b/services/spar/src/Spar/API/Types.hs
index f0fc4e629cd..902bff25381 100644
--- a/services/spar/src/Spar/API/Types.hs
+++ b/services/spar/src/Spar/API/Types.hs
@@ -33,7 +33,8 @@ import "swagger2" Data.Swagger hiding (Header(..))
 
 type API
      = "sso" :> APISSO
-  :<|> "sso-initiate-bind"  :> APIAuthReq  -- (see comment on 'APIAuthReq')
+  :<|> "sso-initiate-bind"  :> APIAuthReqPrecheck  -- (see comment on 'APIAuthReq')
+  :<|> "sso-initiate-bind"  :> APIAuthReq          -- (see comment on 'APIAuthReq')
   :<|> "identity-providers" :> APIIDP
   :<|> "scim" :> APIScim
   :<|> OmitDocs :> "i" :> APIINTERNAL

From d972d2dfc9d87c14f565b21e7b59b5b96a237152 Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Thu, 17 Oct 2019 11:32:49 +0200
Subject: [PATCH 03/26] spar docs (#871)

* docs

* Add missing spar integration test cases
---
 deploy/services-demo/register_idp_internal.sh |  25 +++
 docs/reference/spar-braindump.md              | 183 ++++++++++++++++++
 libs/brig-types/src/Brig/Types/Common.hs      |   2 +-
 .../test-integration/Test/Spar/APISpec.hs     |  74 ++++++-
 4 files changed, 282 insertions(+), 2 deletions(-)
 create mode 100755 deploy/services-demo/register_idp_internal.sh
 create mode 100644 docs/reference/spar-braindump.md

diff --git a/deploy/services-demo/register_idp_internal.sh b/deploy/services-demo/register_idp_internal.sh
new file mode 100755
index 00000000000..8a142efbad1
--- /dev/null
+++ b/deploy/services-demo/register_idp_internal.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+set -e
+
+# server-side variant of ./register_idp.sh; use if you have ssh access to one of your spar instances.
+# usage: ./register_idp_internal.sh <admin id> <metadata file>
+
+backend="http://localhost:8080"
+
+metadata_file=$1
+if [ ! -e "${metadata_file}" ]; then
+    echo "*** no metadata: '$1'"
+    exit 80
+fi
+
+z_user=$2
+if [ ! -n "${z_user}" ]; then
+    echo "*** no z_user uuid"
+    exit 80
+fi
+
+which curl >/dev/null || ( echo "*** please install https://curl.haxx.se/ in your path."; exit 81 )
+curl_exe=$(which curl)
+
+${curl_exe} -is -v -XPOST ${backend}/identity-providers -H"Z-User: ${z_user}" -H'Content-type: application/xml' -d@"${metadata_file}"
diff --git a/docs/reference/spar-braindump.md b/docs/reference/spar-braindump.md
new file mode 100644
index 00000000000..23e2d80fd1c
--- /dev/null
+++ b/docs/reference/spar-braindump.md
@@ -0,0 +1,183 @@
+# the spar service for user provisioning (scim) and authentication (saml) - a brain dump
+
+this is a mix of information on inmplementation details, architecture,
+and operation.  it should probably be sorted into different places in
+the future, but if you can't find any more well-structured
+documentation answering your questions, look here!
+
+
+## related documentation
+
+- [list of howtos for supported SAML IdP vendors](https://docs.wire.com/how-to/single-sign-on/index.html)
+- [fragment](https://docs.wire.com/understand/single-sign-on/design.html)  (TODO: clean up the section "common misconceptions" below and move it here.)
+- [official docs for team admin from customer support](https://support.wire.com/hc/en-us/categories/360000248538?section=administration%3Fsection%3Dadministration)  (skip to "Authentication")
+- [talk scim using curl](https://github.com/wireapp/wire-server/blob/develop/docs/reference/provisioning/scim-via-curl.md)
+- if you want to work on our saml/scim implementation and do not have access to [https://github.com/wireapp/design-specs/tree/master/Single%20Sign%20On], please get in touch with us.
+
+
+## operations
+
+### enabling / disabling the sso feature for a team
+
+if you have sso disabled by default, you need to turn on the feature
+for every team that wants to use it.  you can do this in the stern
+service (aka backoffice).  look for `get/put
+/teams/{tid}/features/sso`
+
+
+### registering an idp with a team via curl
+
+you need to have:
+
+```sh
+# user id of an admin of the team (or the creator from the team info
+# in the backoffice, if you only have the team id).
+export ADMIN_ID=...
+
+# path of the xml metadata file (if you only have the url, curl it)
+export METADATA_FILE=...
+```
+
+then you need to do:
+
+```sh
+./khan.me scp upload -e prod -r spar -s .../wire-server/deploy/services-demo/register_idp_internal.sh -d ./register.sh
+./khan.me scp upload -e prod -r spar -s ${METADATA_FILE} -d ./metadata.xml
+./khan.me ssh -e prod -r spar
+export TEAM_OWNER_ID=...
+./register.sh metadata.xml $TEAM_OWNER_ID
+```
+
+the output contains the a json object representing the idp.  construct
+the login code from the `id` field of that object by adding `wire-` in
+front, eg.:
+
+```
+wire-e97fbe2e-eeb1-11e9-acf3-9ba77d8a04bf
+```
+
+give this login code to the users that you want to connect to wire
+using this idp.  see
+[here](https://support.wire.com/hc/en-us/articles/360000954617-Login-with-SSO)
+on how to use the login code.
+
+
+### troubleshooting
+
+#### gathering information
+
+- find metadata for team in table `spar.idp_raw_metadata` via cqlsh
+  (since https://github.com/wireapp/wire-server/pull/872)
+
+- ask user for screenshots of the error message, or even better, for
+  the text.  the error message contains lots of strings that you can
+  grep for in the spar sources.
+
+
+#### making spar work with a new IdP
+
+often, new IdPs work out of the box, because there appears to be some
+consensus about what minimum feature set everybody should support.
+
+if there are problems: collect the metadata xml and an authentication
+response xml (either from the browser http logs via a more technically
+savvy customer; FUTUREWORK: it would be nice to log all saml response
+xml files that spar receives in prod and cannot process).
+
+https://github.com/wireapp/saml2-web-sso supports writing [unit vendor
+compatibility
+tests](https://github.com/wireapp/saml2-web-sso/blob/ff9b9f445475809d1fa31ef7f2932caa0ed31613/test/Test/SAML2/WebSSO/APISpec.hs#L266-L329)
+against that response value.  once that test passes, it should all
+work fine.
+
+
+### common misconceptions
+
+
+#### an email address can be one of two things
+
+the email used for saml auth is only a name, and never used for
+sending out emails, and does not show as the email address of the user
+in the team settings.
+
+RATIONALE: emails that are passed in from an external identity
+provider must be trusted, so the user cannot have them as an actual
+email address that wire is sending emails to.
+
+POSSIBLE FEATURE: we could authenticate the emails sent in from the
+identity provider in the same way we are doing that for
+password-authenticated non-team users: email receives a link
+containing a crypto token, user clicks on link if the email is
+authentic, email gets authenticated.
+
+
+#### scim, provisioning, metadata
+
+for changing the user information (name, handle, email, ...), saml
+isn't enough.  the identity management software (AD in this case, or
+some add-on) needs to support scim.  we *could* support doing that via
+saml, but the part of the standards that are needed for that are even
+in worse shape than the ones for the authentication bits, and it would
+not lead to a good user experience.  so instead we require users to
+adopt the more robust and contemporary scim standard.
+
+
+#### we don't support binding password/phone-auth'ed users to saml yet
+
+to keep track of whether we have, see https://github.com/zinfra/backend-issues/issues/731
+
+
+
+## application logic
+
+### deleting users that exist on spar
+
+For scim- or saml-created users, there are three locations for user data:
+
+- `brig.user` (and a few things associated with that on brig and galley)
+- `spar.user`
+- `spar.scim_user`
+
+The single source of truth is `brig.user`.  Dangling entries in the
+other places are allowed, and must be checked by the application code
+for danglingness.  ([test case for
+scim](https://github.com/wireapp/wire-server/blob/010ca7e460d13160b465de24dd3982a397f94c16/services/spar/test-integration/Test/Spar/Scim/UserSpec.hs#L239-L308);
+[test case for
+saml](https://github.com/wireapp/wire-server/blob/293518655d7bae60fbcb0c4aaa06034785bfb6fc/services/spar/test-integration/Test/Spar/APISpec.hs#L742-L795))
+
+For the semantics of interesting corner cases, consult [the test
+suite](https://github.com/wireapp/wire-server/blob/develop/services/spar/test-integration/Test/Spar/APISpec.hs).
+If you can't find what you're looking for there, please add at least a
+pending test case explaining what's missing.
+
+Side note: Users in brig carry an enum type
+[`ManagedBy`](https://github.com/wireapp/wire-server/blob/010ca7e460d13160b465de24dd3982a397f94c16/libs/brig-types/src/Brig/Types/Common.hs#L393-L413);
+see also {#DevScimOneWaySync}.  This is a half-implemented feature for
+managing conflicts between changes via scim vs. changes from wire
+clients; and does currently not affect deletability of users.
+
+
+#### delete via deleting idp
+
+[Currently](https://github.com/wireapp/wire-server/blob/010ca7e460d13160b465de24dd3982a397f94c16/services/spar/src/Spar/API.hs#L172-L187),
+deleting an IdP does not delete any user data.  In particular:
+
+- cookies of users that have authenticated via an IdP will remain valid if the IdP gets deleted.
+- if a user authenticates via an IdP that has been deleted to obtain a new cookie, the login code will not work, and the user will never be able to login again.
+- the user will still show in the team settings, and can be manually deleted from there.
+- if a new idp is registered, and a user authenticates via that idp, the old user is unreachable.  (spar will look up the wire `UserId` under the saml user id that consists partly of the id of the new IdP, come up empty, and [create a fresh user on brig](https://github.com/wireapp/wire-server/blob/010ca7e460d13160b465de24dd3982a397f94c16/services/spar/src/Spar/App.hs#L306).)
+
+
+#### user deletes herself
+
+TODO
+
+
+#### delete in team settings
+
+TODO (probably little difference between this and "user deletes herself"?)
+
+
+#### delete via scim
+
+TODO
diff --git a/libs/brig-types/src/Brig/Types/Common.hs b/libs/brig-types/src/Brig/Types/Common.hs
index 4f43c0c207c..14937d3a175 100644
--- a/libs/brig-types/src/Brig/Types/Common.hs
+++ b/libs/brig-types/src/Brig/Types/Common.hs
@@ -391,7 +391,7 @@ codeParser err conv = do
 -- ManagedBy
 
 -- | Who controls changes to the user profile (where the profile is defined as "all
--- user-editable, user-visible attributes").
+-- user-editable, user-visible attributes").  See {#DevScimOneWaySync}.
 data ManagedBy
       -- | The profile can be changed in-app; user doesn't show up via SCIM at all.
     = ManagedByWire
diff --git a/services/spar/test-integration/Test/Spar/APISpec.hs b/services/spar/test-integration/Test/Spar/APISpec.hs
index 372d15fefc1..c9bb6591a2e 100644
--- a/services/spar/test-integration/Test/Spar/APISpec.hs
+++ b/services/spar/test-integration/Test/Spar/APISpec.hs
@@ -5,6 +5,7 @@ import Imports hiding (head)
 import Bilge
 import Brig.Types.User
 import Control.Lens
+import Control.Monad.Random.Class (getRandomR)
 import Data.Aeson
 import Data.Aeson.Lens
 import Data.ByteString.Conversion
@@ -12,7 +13,7 @@ import Data.Id
 import Data.Proxy
 import Data.String.Conversions
 import Data.UUID as UUID hiding (null, fromByteString)
-import Network.HTTP.Types (status200)
+import Network.HTTP.Types (status200, status202)
 import SAML2.WebSSO as SAML
 import SAML2.WebSSO.Test.Lenses
 import SAML2.WebSSO.Test.MockResponse
@@ -38,6 +39,7 @@ spec = do
   specFinalizeLogin
   specBindingUsers
   specCRUDIdentityProvider
+  specDeleteCornerCases
   specScimAndSAML
   specAux
 
@@ -735,6 +737,76 @@ specCRUDIdentityProvider = do
             liftIO $ idp `shouldBe` idp'
 
 
+specDeleteCornerCases :: SpecWith TestEnv
+specDeleteCornerCases = describe "delete corner cases" $ do
+  it "create user1 via idp1 (saml); delete user1; create user via newly created idp2 (saml)" $ do
+    pending
+
+  it "create user1 via saml; delete user1; create via scim (in same team)" $ do
+    pending
+
+  it "create user1 via saml; delete user1; create via password (outside team)" $ do
+    pending
+
+  it "delete idp; create idp with same issuer id" $ do
+    pending
+
+  -- clone of 'testScimCreateVsUserRef', without the scim part: Create a user implicitly via
+  -- saml login; remove it via brig leaving a dangling entry in @spar.user@; create it via saml
+  -- login once more.  This should work despite the dangling database entry.
+  it "re-create previously deleted, dangling users" $ do
+    (_ownerid, _teamid, idp) <- registerTestIdP
+    uname :: SAML.UnqualifiedNameID <- do
+      suffix <- cs <$> replicateM 7 (getRandomR ('0', '9'))
+      either (error . show) pure $
+        SAML.mkUNameIDEmail ("email_" <> suffix <> "@example.com")
+    let uref   = SAML.UserRef tenant subj
+        subj   = either (error . show) id $ SAML.mkNameID uname Nothing Nothing Nothing
+        tenant = idp ^. SAML.idpMetadata . SAML.edIssuer
+
+    !(Just !uid) <- createViaSaml idp uref
+    samlUserShouldSatisfy uref isJust
+
+    deleteViaBrig uid
+    samlUserShouldSatisfy uref isJust  -- brig doesn't talk to spar right now when users
+                                       -- are deleted there.  we need to work around this
+                                       -- fact for now.  (if the test fails here, this may
+                                       -- mean that you fixed the behavior and can
+                                       -- change this to 'isNothing'.)
+
+    (Just _) <- createViaSaml idp uref
+    samlUserShouldSatisfy uref isJust
+  where
+    samlUserShouldSatisfy :: HasCallStack => SAML.UserRef -> (Maybe UserId -> Bool) -> TestSpar ()
+    samlUserShouldSatisfy uref property = do
+        muid <- getUserIdViaRef' uref
+        liftIO $ muid `shouldSatisfy` property
+
+    createViaSamlResp :: HasCallStack => IdP -> SAML.UserRef -> TestSpar ResponseLBS
+    createViaSamlResp idp (SAML.UserRef _ subj) = do
+        (privCreds, authnReq) <- negotiateAuthnRequest idp
+        spmeta <- getTestSPMetadata
+        authnResp <- runSimpleSP $
+            mkAuthnResponseWithSubj subj privCreds idp spmeta authnReq True
+        createResp <- submitAuthnResponse authnResp
+        liftIO $ responseStatus createResp `shouldBe` status200
+        pure createResp
+
+    createViaSaml :: HasCallStack => IdP -> SAML.UserRef -> TestSpar (Maybe UserId)
+    createViaSaml idp uref = do
+        resp <- createViaSamlResp idp uref
+        liftIO $ do
+            maybe (error "no body") cs (responseBody resp)
+                `shouldContain` "<title>wire:sso:success</title>"
+        getUserIdViaRef' uref
+
+    deleteViaBrig :: UserId -> TestSpar ()
+    deleteViaBrig uid = do
+        brig <- view teBrig
+        resp <- (call . delete $ brig . paths ["i", "users", toByteString' uid])
+        liftIO $ responseStatus resp `shouldBe` status202
+
+
 specScimAndSAML :: SpecWith TestEnv
 specScimAndSAML = do
     it "SCIM and SAML work together and SCIM-created users can login" $ do

From 9575ca3947ea14eeaced902a832c64a46c63870b Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Thu, 17 Oct 2019 12:04:54 +0200
Subject: [PATCH 04/26] rename docker-ephemeral to docker_ephemeral (#880)

Fixes #615
---
 .gitignore                                     | 16 ++++++++--------
 README.md                                      |  2 +-
 .../build/Makefile                             |  0
 .../build/README.md                            |  0
 .../db-migrate/brig-index.sh                   |  0
 .../db-migrate/brig-schema.sh                  |  0
 .../db-migrate/galley-schema.sh                |  0
 .../db-migrate/gundeck-schema.sh               |  0
 .../db-migrate/spar-schema.sh                  |  0
 .../docker-compose.yaml                        |  0
 .../init.sh                                    |  0
 .../run.sh                                     |  0
 deploy/services-demo/README.md                 |  2 +-
 deploy/services-demo/demo.sh                   |  4 ++--
 deploy/services-demo/docker-compose.yaml       | 18 +++++++++---------
 services/brig/brig.integration.yaml            |  2 +-
 services/integration.sh                        |  4 ++--
 17 files changed, 24 insertions(+), 24 deletions(-)
 rename deploy/{docker-ephemeral => dockerephemeral}/build/Makefile (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/build/README.md (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/db-migrate/brig-index.sh (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/db-migrate/brig-schema.sh (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/db-migrate/galley-schema.sh (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/db-migrate/gundeck-schema.sh (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/db-migrate/spar-schema.sh (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/docker-compose.yaml (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/init.sh (100%)
 rename deploy/{docker-ephemeral => dockerephemeral}/run.sh (100%)

diff --git a/.gitignore b/.gitignore
index 28cbc6beab1..50a4d5aff20 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,14 +63,14 @@ swagger-ui
 deploy/services-demo/resources/templates/*
 deploy/services-demo/conf/nginz/zwagger-ui/*
 
-deploy/docker-ephemeral/build/airdock_base-all/
-deploy/docker-ephemeral/build/airdock_base/
-deploy/docker-ephemeral/build/airdock_fakesqs-all/
-deploy/docker-ephemeral/build/airdock_fakesqs/
-deploy/docker-ephemeral/build/airdock_rvm-all/
-deploy/docker-ephemeral/build/airdock_rvm/
-deploy/docker-ephemeral/build/dynamodb_local/
-deploy/docker-ephemeral/build/smtp/
+deploy/dockerephemeral/build/airdock_base-all/
+deploy/dockerephemeral/build/airdock_base/
+deploy/dockerephemeral/build/airdock_fakesqs-all/
+deploy/dockerephemeral/build/airdock_fakesqs/
+deploy/dockerephemeral/build/airdock_rvm-all/
+deploy/dockerephemeral/build/airdock_rvm/
+deploy/dockerephemeral/build/dynamodb_local/
+deploy/dockerephemeral/build/smtp/
 
 # Ignore cabal files; use package.yaml instead
 *.cabal
diff --git a/README.md b/README.md
index 593458473ec..d43f8cfe905 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@ Integration tests require all of the haskell services (brig, galley, cannon, gun
 Setting up these real, but in-memory internal and "fake" external dependencies is done easiest using [`docker-compose`](https://docs.docker.com/compose/install/). Run the following in a separate terminal (it will block that terminal, C-c to shut all these docker images down again):
 
 ```
-deploy/docker-ephemeral/run.sh
+deploy/dockerephemeral/run.sh
 ```
 
 Then, to run all integration tests:
diff --git a/deploy/docker-ephemeral/build/Makefile b/deploy/dockerephemeral/build/Makefile
similarity index 100%
rename from deploy/docker-ephemeral/build/Makefile
rename to deploy/dockerephemeral/build/Makefile
diff --git a/deploy/docker-ephemeral/build/README.md b/deploy/dockerephemeral/build/README.md
similarity index 100%
rename from deploy/docker-ephemeral/build/README.md
rename to deploy/dockerephemeral/build/README.md
diff --git a/deploy/docker-ephemeral/db-migrate/brig-index.sh b/deploy/dockerephemeral/db-migrate/brig-index.sh
similarity index 100%
rename from deploy/docker-ephemeral/db-migrate/brig-index.sh
rename to deploy/dockerephemeral/db-migrate/brig-index.sh
diff --git a/deploy/docker-ephemeral/db-migrate/brig-schema.sh b/deploy/dockerephemeral/db-migrate/brig-schema.sh
similarity index 100%
rename from deploy/docker-ephemeral/db-migrate/brig-schema.sh
rename to deploy/dockerephemeral/db-migrate/brig-schema.sh
diff --git a/deploy/docker-ephemeral/db-migrate/galley-schema.sh b/deploy/dockerephemeral/db-migrate/galley-schema.sh
similarity index 100%
rename from deploy/docker-ephemeral/db-migrate/galley-schema.sh
rename to deploy/dockerephemeral/db-migrate/galley-schema.sh
diff --git a/deploy/docker-ephemeral/db-migrate/gundeck-schema.sh b/deploy/dockerephemeral/db-migrate/gundeck-schema.sh
similarity index 100%
rename from deploy/docker-ephemeral/db-migrate/gundeck-schema.sh
rename to deploy/dockerephemeral/db-migrate/gundeck-schema.sh
diff --git a/deploy/docker-ephemeral/db-migrate/spar-schema.sh b/deploy/dockerephemeral/db-migrate/spar-schema.sh
similarity index 100%
rename from deploy/docker-ephemeral/db-migrate/spar-schema.sh
rename to deploy/dockerephemeral/db-migrate/spar-schema.sh
diff --git a/deploy/docker-ephemeral/docker-compose.yaml b/deploy/dockerephemeral/docker-compose.yaml
similarity index 100%
rename from deploy/docker-ephemeral/docker-compose.yaml
rename to deploy/dockerephemeral/docker-compose.yaml
diff --git a/deploy/docker-ephemeral/init.sh b/deploy/dockerephemeral/init.sh
similarity index 100%
rename from deploy/docker-ephemeral/init.sh
rename to deploy/dockerephemeral/init.sh
diff --git a/deploy/docker-ephemeral/run.sh b/deploy/dockerephemeral/run.sh
similarity index 100%
rename from deploy/docker-ephemeral/run.sh
rename to deploy/dockerephemeral/run.sh
diff --git a/deploy/services-demo/README.md b/deploy/services-demo/README.md
index d82818c905d..16654b2a8d3 100644
--- a/deploy/services-demo/README.md
+++ b/deploy/services-demo/README.md
@@ -7,7 +7,7 @@ Use 2 different terminals and run:
 ```
 # On terminal 1, start the dependencies. Note that you should turn up the max memory
 # limit of docker. More on https://github.com/wireapp/wire-server/issues/326
-deploy/docker-ephemeral/run.sh
+deploy/dockerephemeral/run.sh
 ```
 
 ```
diff --git a/deploy/services-demo/demo.sh b/deploy/services-demo/demo.sh
index 46c6936dd14..e21b0a9eb9e 100755
--- a/deploy/services-demo/demo.sh
+++ b/deploy/services-demo/demo.sh
@@ -73,7 +73,7 @@ function check_prerequisites() {
     nc -z 127.0.0.1 9042 \
         && nc -z 127.0.0.1 9200 \
         && nc -z 127.0.0.1 6379 \
-        || { echo "Databases not up. Maybe run 'deploy/docker-ephemeral/run.sh' in a separate terminal first?";  exit 1; }
+        || { echo "Databases not up. Maybe run 'deploy/dockerephemeral/run.sh' in a separate terminal first?";  exit 1; }
     if [ "$docker_deployment" = "false" ]; then
         test -f ${DIR}/../dist/brig \
             && test -f ${DIR}/../dist/galley \
@@ -127,7 +127,7 @@ function copy_nginz_configs() {
 }
 
 # brig,gundeck,galley use the amazonka library's 'Discover', which expects AWS credentials
-# even if those are not used/can be dummy values with the fake sqs/ses/etc containers used (see deploy/docker-ephemeral/docker-compose.yaml)
+# even if those are not used/can be dummy values with the fake sqs/ses/etc containers used (see deploy/dockerephemeral/docker-compose.yaml)
 export AWS_REGION=${AWS_REGION:-eu-west-1}
 export AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-dummy}
 export AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-dummy}
diff --git a/deploy/services-demo/docker-compose.yaml b/deploy/services-demo/docker-compose.yaml
index 5435a02de02..62ff5165f03 100644
--- a/deploy/services-demo/docker-compose.yaml
+++ b/deploy/services-demo/docker-compose.yaml
@@ -1,5 +1,5 @@
 networks:
-  docker-ephemeral_demo_wire:
+  dockerephemeral_demo_wire:
     external: true
 
 version: '2'
@@ -30,7 +30,7 @@ services:
       - demo_wire_sqs:sqs
       - demo_wire_smtp:smtp
     networks:
-      - docker-ephemeral_demo_wire
+      - dockerephemeral_demo_wire
 
   galley:
     image: quay.io/wire/galley
@@ -50,7 +50,7 @@ services:
     external_links:
       - demo_wire_cassandra:cassandra
     networks:
-      - docker-ephemeral_demo_wire
+      - dockerephemeral_demo_wire
 
   gundeck:
     image: quay.io/wire/gundeck
@@ -72,7 +72,7 @@ services:
       - demo_wire_sqs:sqs
       - demo_wire_localstack:sns
     networks:
-      - docker-ephemeral_demo_wire
+      - dockerephemeral_demo_wire
 
   cannon:
     image: quay.io/wire/cannon
@@ -86,7 +86,7 @@ services:
       - /configs/conf/cannon.demo-docker.yaml
     working_dir: /configs
     networks:
-      - docker-ephemeral_demo_wire
+      - dockerephemeral_demo_wire
 
   cargohold:
     image: quay.io/wire/cargohold
@@ -102,7 +102,7 @@ services:
     external_links:
       - demo_wire_s3:s3
     networks:
-      - docker-ephemeral_demo_wire
+      - dockerephemeral_demo_wire
 
   proxy:
     image: quay.io/wire/proxy
@@ -116,7 +116,7 @@ services:
       - /configs/conf/proxy.demo.yaml
     working_dir: /configs
     networks:
-      - docker-ephemeral_demo_wire
+      - dockerephemeral_demo_wire
 
   spar:
     image: quay.io/wire/spar
@@ -132,7 +132,7 @@ services:
     external_links:
       - demo_wire_cassandra:cassandra
     networks:
-      - docker-ephemeral_demo_wire
+      - dockerephemeral_demo_wire
 
   nginz:
     image: quay.io/wire/nginz
@@ -157,4 +157,4 @@ services:
       - /configs/conf/nginz/nginx-docker.conf
     working_dir: /configs
     networks:
-      - docker-ephemeral_demo_wire
+      - dockerephemeral_demo_wire
diff --git a/services/brig/brig.integration.yaml b/services/brig/brig.integration.yaml
index 69aa47ead48..ca5d4b37808 100644
--- a/services/brig/brig.integration.yaml
+++ b/services/brig/brig.integration.yaml
@@ -24,7 +24,7 @@ gundeck:
   host: 127.0.0.1
   port: 8086
 
-# You can set up local SQS/Dynamo running e.g. `../../deploy/docker-ephemeral/run.sh`
+# You can set up local SQS/Dynamo running e.g. `../../deploy/dockerephemeral/run.sh`
 aws:
   userJournalQueue: integration-user-events.fifo
   # ^ Comment this out if you don't want to journal user events
diff --git a/services/integration.sh b/services/integration.sh
index e40c67d2643..bae017d675d 100755
--- a/services/integration.sh
+++ b/services/integration.sh
@@ -37,7 +37,7 @@ function check_prerequisites() {
     if ! ( nc -z 127.0.0.1 9042 \
         && nc -z 127.0.0.1 9200 \
         && nc -z 127.0.0.1 6379 ); then
-        echo "Databases not up. Maybe run 'deploy/docker-ephemeral/run.sh' in a separate terminal first?";  exit 1;
+        echo "Databases not up. Maybe run 'deploy/dockerephemeral/run.sh' in a separate terminal first?";  exit 1;
     fi
     if   [ ! -f "${TOP_LEVEL}/dist/brig" ] \
       && [ ! -f "${TOP_LEVEL}/dist/galley" ] \
@@ -65,7 +65,7 @@ if [[ $INTEGRATION_USE_REAL_AWS -eq 1 ]]; then
 else
     # brig,gundeck,galley use the amazonka library's 'Discover', which expects AWS credentials
     # even if those are not used/can be dummy values with the fake sqs/ses/etc containers used
-    # (see deploy/docker-ephemeral/docker-compose.yaml )
+    # (see deploy/dockerephemeral/docker-compose.yaml )
     echo 'Running tests using mocked AWS services'
     export AWS_REGION=eu-west-1
     export AWS_ACCESS_KEY_ID=dummykey

From e5e2207ed92af454b70d29b3dad1a8e633c1a6d0 Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Thu, 17 Oct 2019 12:47:50 +0200
Subject: [PATCH 05/26] Store raw idp metadata with typed details in c* (#872)

---
 services/spar/package.yaml                    |  1 +
 services/spar/schema/src/Main.hs              |  2 +
 services/spar/schema/src/V6.hs                | 18 +++++++++
 services/spar/src/Spar/API.hs                 | 17 ++++++--
 services/spar/src/Spar/API/Swagger.hs         |  3 ++
 services/spar/src/Spar/API/Types.hs           |  5 ++-
 services/spar/src/Spar/Data.hs                | 30 +++++++++++++-
 services/spar/src/Spar/Types.hs               | 39 ++++++++++++-------
 .../test-integration/Test/Spar/APISpec.hs     | 27 +++++++++----
 services/spar/test-integration/Util/Core.hs   | 10 +++++
 services/spar/test/Arbitrary.hs               |  5 ++-
 services/spar/test/Test/Spar/APISpec.hs       |  5 ++-
 12 files changed, 133 insertions(+), 29 deletions(-)
 create mode 100644 services/spar/schema/src/V6.hs

diff --git a/services/spar/package.yaml b/services/spar/package.yaml
index 93fbee9f640..ee3d1d1d50c 100644
--- a/services/spar/package.yaml
+++ b/services/spar/package.yaml
@@ -45,6 +45,7 @@ dependencies:
   - http-api-data
   - http-client
   - http-client-tls
+  - http-media
   - http-types
   - imports
   - insert-ordered-containers
diff --git a/services/spar/schema/src/Main.hs b/services/spar/schema/src/Main.hs
index e443d29fed0..9d9988d5e79 100644
--- a/services/spar/schema/src/Main.hs
+++ b/services/spar/schema/src/Main.hs
@@ -13,6 +13,7 @@ import qualified V2
 import qualified V3
 import qualified V4
 import qualified V5
+import qualified V6
 
 main :: IO ()
 main = do
@@ -27,6 +28,7 @@ main = do
         , V3.migration
         , V4.migration
         , V5.migration
+        , V6.migration
         -- When adding migrations here, don't forget to update
         -- 'schemaVersion' in Spar.Data
 
diff --git a/services/spar/schema/src/V6.hs b/services/spar/schema/src/V6.hs
new file mode 100644
index 00000000000..2f26b02a147
--- /dev/null
+++ b/services/spar/schema/src/V6.hs
@@ -0,0 +1,18 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE QuasiQuotes       #-}
+
+module V6 (migration) where
+
+import Imports
+import Cassandra.Schema
+import Text.RawString.QQ
+
+migration :: Migration
+migration = Migration 6 "Store raw XML metadata" $ do
+    void $ schema' [r|
+        CREATE TABLE if not exists idp_raw_metadata
+            ( id       uuid
+            , metadata text
+            , primary key (id)
+            ) with compaction = {'class': 'LeveledCompactionStrategy'};
+    |]
diff --git a/services/spar/src/Spar/API.hs b/services/spar/src/Spar/API.hs
index cccae4a5390..3c503e3ba83 100644
--- a/services/spar/src/Spar/API.hs
+++ b/services/spar/src/Spar/API.hs
@@ -74,6 +74,7 @@ apiSSO opts
 apiIDP :: ServerT APIIDP Spar
 apiIDP
      = idpGet
+  :<|> idpGetRaw
   :<|> idpGetAll
   :<|> idpCreate
   :<|> idpDelete
@@ -164,6 +165,14 @@ idpGet zusr idpid = withDebugLog "idpGet" (Just . show . (^. SAML.idpId)) $ do
   authorizeIdP zusr idp
   pure idp
 
+idpGetRaw :: Maybe UserId -> SAML.IdPId -> Spar RawIdPMetadata
+idpGetRaw zusr idpid = do
+  idp <- SAML.getIdPConfig idpid
+  authorizeIdP zusr idp
+  wrapMonadClient (Data.getIdPRawMetadata idpid) >>= \case
+    Just txt -> pure $ RawIdPMetadata txt
+    Nothing -> throwSpar SparNotFound
+
 idpGetAll :: Maybe UserId -> Spar IdPList
 idpGetAll zusr = withDebugLog "idpGetAll" (const Nothing) $ do
   teamid <- Intra.getZUsrOwnedTeam zusr
@@ -185,21 +194,23 @@ idpDelete zusr idpid = withDebugLog "idpDelete" (const Nothing) $ do
             when (stiIdP == Just idpid) $ Data.deleteScimToken team stiId
         -- Delete IdP config
         Data.deleteIdPConfig idpid issuer team
+        Data.deleteIdPRawMetadata idpid
     return NoContent
 
 -- | We generate a new UUID for each IdP used as IdPConfig's path, thereby ensuring uniqueness.
-idpCreateXML :: Maybe UserId -> SAML.IdPMetadata -> Spar IdP
-idpCreateXML zusr idpmeta = withDebugLog "idpCreate" (Just . show . (^. SAML.idpId)) $ do
+idpCreateXML :: Maybe UserId -> Text -> SAML.IdPMetadata -> Spar IdP
+idpCreateXML zusr raw idpmeta = withDebugLog "idpCreate" (Just . show . (^. SAML.idpId)) $ do
   teamid <- Intra.getZUsrOwnedTeam zusr
   Galley.assertSSOEnabled teamid
   idp <- validateNewIdP idpmeta teamid
+  wrapMonadClient $ Data.storeIdPRawMetadata (idp ^. SAML.idpId) raw
   SAML.storeIdPConfig idp
   pure idp
 
 -- | This handler only does the json parsing, and leaves all authorization checks and
 -- application logic to 'idpCreateXML'.
 idpCreate :: Maybe UserId -> IdPMetadataInfo -> Spar IdP
-idpCreate zusr (IdPMetadataValue xml) = idpCreateXML zusr xml
+idpCreate zusr (IdPMetadataValue raw xml) = idpCreateXML zusr raw xml
 
 
 withDebugLog :: SAML.SP m => String -> (a -> Maybe String) -> m a -> m a
diff --git a/services/spar/src/Spar/API/Swagger.hs b/services/spar/src/Spar/API/Swagger.hs
index dc07411ef30..078756d0e63 100644
--- a/services/spar/src/Spar/API/Swagger.hs
+++ b/services/spar/src/Spar/API/Swagger.hs
@@ -157,3 +157,6 @@ instance ToParamSchema BindCookie where
 
 instance ToSchema Void where
   declareNamedSchema _ = declareNamedSchema (Proxy @String)
+
+instance ToSchema RawIdPMetadata where
+  declareNamedSchema _ = declareNamedSchema (Proxy @String)
diff --git a/services/spar/src/Spar/API/Types.hs b/services/spar/src/Spar/API/Types.hs
index 902bff25381..95802beecf9 100644
--- a/services/spar/src/Spar/API/Types.hs
+++ b/services/spar/src/Spar/API/Types.hs
@@ -132,13 +132,16 @@ type APIAuthResp
 
 type APIIDP
      = Header "Z-User" UserId :> IdpGet
+  :<|> Header "Z-User" UserId :> IdpGetRaw
   :<|> Header "Z-User" UserId :> IdpGetAll
   :<|> Header "Z-User" UserId :> IdpCreate
   :<|> Header "Z-User" UserId :> IdpDelete
 
+type IdpGetRaw  = Capture "id" SAML.IdPId :> "raw" :> Get '[RawXML] RawIdPMetadata
+
 type IdpGet     = Capture "id" SAML.IdPId :> Get '[JSON] IdP
 type IdpGetAll  = Get '[JSON] IdPList
-type IdpCreate  = ReqBodyCustomError '[SAML.XML, JSON] "wai-error" IdPMetadataInfo :> PostCreated '[JSON] IdP
+type IdpCreate  = ReqBodyCustomError '[RawXML, JSON] "wai-error" IdPMetadataInfo :> PostCreated '[JSON] IdP
 type IdpDelete  = Capture "id" SAML.IdPId :> DeleteNoContent '[JSON] NoContent
 
 instance MakeCustomError "wai-error" IdPMetadataInfo where
diff --git a/services/spar/src/Spar/Data.hs b/services/spar/src/Spar/Data.hs
index 35e4a395242..ba6d3d9dd67 100644
--- a/services/spar/src/Spar/Data.hs
+++ b/services/spar/src/Spar/Data.hs
@@ -31,6 +31,9 @@ module Spar.Data
   , getIdPConfigsByTeam
   , deleteIdPConfig
   , deleteTeam
+  , storeIdPRawMetadata
+  , getIdPRawMetadata
+  , deleteIdPRawMetadata
 
   -- * SCIM auth
   , insertScimToken
@@ -70,7 +73,7 @@ import qualified Web.Scim.Class.User as ScimC.User
 
 -- | A lower bound: @schemaVersion <= whatWeFoundOnCassandra@, not @==@.
 schemaVersion :: Int32
-schemaVersion = 5
+schemaVersion = 6
 
 
 ----------------------------------------------------------------------
@@ -382,6 +385,31 @@ deleteTeam team = do
       deleteSAMLUsersByIssuer issuer
       deleteIdPConfig idpid issuer team
 
+storeIdPRawMetadata
+  :: (HasCallStack, MonadClient m)
+  => SAML.IdPId -> ST -> m ()
+storeIdPRawMetadata idp raw = retry x5 . write ins $ params Quorum (idp, raw)
+  where
+    ins :: PrepQuery W (SAML.IdPId, ST) ()
+    ins = "INSERT INTO idp_raw_metadata (id, metadata) VALUES (?, ?)"
+
+getIdPRawMetadata
+  :: (HasCallStack, MonadClient m)
+  => SAML.IdPId -> m (Maybe ST)
+getIdPRawMetadata idp = runIdentity <$$>
+  (retry x1 . query1 sel $ params Quorum (Identity idp))
+  where
+    sel :: PrepQuery R (Identity SAML.IdPId) (Identity ST)
+    sel = "SELECT metadata FROM idp_raw_metadata WHERE id = ?"
+
+deleteIdPRawMetadata
+  :: (HasCallStack, MonadClient m)
+  => SAML.IdPId -> m ()
+deleteIdPRawMetadata idp = retry x5 . write del $ params Quorum (Identity idp)
+  where
+    del :: PrepQuery W (Identity SAML.IdPId) ()
+    del = "DELETE FROM idp_raw_metadata WHERE id = ?"
+
 ----------------------------------------------------------------------
 -- SCIM auth
 --
diff --git a/services/spar/src/Spar/Types.hs b/services/spar/src/Spar/Types.hs
index 425ccf7654b..90943d3a745 100644
--- a/services/spar/src/Spar/Types.hs
+++ b/services/spar/src/Spar/Types.hs
@@ -6,28 +6,31 @@
 module Spar.Types where
 
 import Imports
+
 import Control.Lens (makeLenses)
 import Control.Monad.Except
 import Data.Aeson
 import Data.Aeson.TH
-import Data.Id (TeamId, UserId, ScimTokenId)
 import Data.ByteString.Conversion
+import Data.Id (TeamId, UserId, ScimTokenId)
 import Data.Json.Util
-import Data.Text.Encoding (encodeUtf8)
 import Data.Proxy (Proxy(Proxy))
 import Data.String.Conversions
 import Data.String.Conversions (ST)
+import Data.Text.Encoding (encodeUtf8)
 import Data.Time
 import GHC.TypeLits (KnownSymbol, symbolVal)
 import GHC.Types (Symbol)
+import Network.HTTP.Media ((//))
 import SAML2.Util (renderURI, parseURI')
 import SAML2.WebSSO (IdPConfig, IdPId, ID, AuthnRequest, Assertion, SimpleSetCookie)
 import SAML2.WebSSO.Types.TH (deriveJSONOptions)
+import Servant.API as Servant hiding (MkLink, URI(..))
+import System.Logger.Extended (LogFormat)
 import URI.ByteString
 import Util.Options
 import Web.Cookie
 import Web.HttpApiData
-import System.Logger.Extended (LogFormat)
 
 import qualified Data.ByteString.Builder as Builder
 import qualified Data.Text as ST
@@ -67,24 +70,34 @@ deriveJSON deriveJSONOptions ''IdPList
 -- implement @{"uri": <url>, "cert": <pinned_pubkey>}@.  check both the certificate we get
 -- from the server against the pinned one and the metadata url in the metadata against the one
 -- we fetched the xml from, but it's unclear what the benefit would be.)
-newtype IdPMetadataInfo = IdPMetadataValue SAML.IdPMetadata
+data IdPMetadataInfo = IdPMetadataValue Text SAML.IdPMetadata
   deriving (Eq, Show, Generic)
 
-instance SAML.HasXMLRoot IdPMetadataInfo where
-  renderRoot = error "instance SAML.HasXML IdPMetadataInfo: render not implemented"
-    -- FUTUREWORK: split up HasXML in saml-web-sso into FromXML and ToXML, then we probably
-    -- can actually not implement this (this even as an error).  should be a nice,
-    -- backwards-compatible change!
+-- | We want to store the raw xml text from the registration request in the database for
+-- trouble shooting, but @SAML.XML@ only gives us access to the xml tree, not the raw text.
+-- 'RawXML' helps with that.
+data RawXML
+
+instance Accept RawXML where
+  contentType Proxy = "application" // "xml"
+
+instance MimeUnrender RawXML IdPMetadataInfo where
+  mimeUnrender Proxy raw = IdPMetadataValue (cs raw) <$> mimeUnrender (Proxy @SAML.XML) raw
+
+instance MimeRender RawXML RawIdPMetadata where
+  mimeRender Proxy (RawIdPMetadata raw) = cs raw
+
+newtype RawIdPMetadata = RawIdPMetadata Text
+  deriving (Eq, Show, Generic)
 
-instance SAML.HasXML IdPMetadataInfo where
-  parse = fmap IdPMetadataValue . SAML.parse
 
 instance FromJSON IdPMetadataInfo where
   parseJSON = withObject "IdPMetadataInfo" $ \obj -> do
-    either fail (pure . IdPMetadataValue) . SAML.decode =<< (obj .: "value")
+    raw <- obj .: "value"
+    either fail (pure . IdPMetadataValue raw) (SAML.decode (cs raw))
 
 instance ToJSON IdPMetadataInfo where
-  toJSON (IdPMetadataValue xml) =
+  toJSON (IdPMetadataValue _ xml) =
     object [ "value" .= SAML.encode xml ]
 
 
diff --git a/services/spar/test-integration/Test/Spar/APISpec.hs b/services/spar/test-integration/Test/Spar/APISpec.hs
index c9bb6591a2e..9767093647c 100644
--- a/services/spar/test-integration/Test/Spar/APISpec.hs
+++ b/services/spar/test-integration/Test/Spar/APISpec.hs
@@ -24,6 +24,7 @@ import Util.Core
 import Util.Types
 
 import qualified Data.ByteString.Builder as LB
+import qualified Data.Text as ST
 import qualified Data.ZAuth.Token as ZAuth
 import qualified Galley.Types.Teams as Galley
 import qualified Spar.Intra.Brig as Intra
@@ -298,8 +299,8 @@ specBindingUsers = describe "binding existing users to sso identities" $ do
                              . expect2xx
                              )
 
-    let checkInitiateLogin :: HasCallStack => Bool -> TestSpar UserId -> SpecWith TestEnv
-        checkInitiateLogin hasZUser createUser = do
+    let checkInitiateBind :: HasCallStack => Bool -> TestSpar UserId -> SpecWith TestEnv
+        checkInitiateBind hasZUser createUser = do
           let testmsg = if hasZUser
                         then "responds with 200 and a bind cookie"
                         else "responds with 403 and 'bind-without-auth'"
@@ -333,13 +334,13 @@ specBindingUsers = describe "binding existing users to sso identities" $ do
 
     describe "GET /sso-initiate-bind/:idp" $ do
       context "known IdP, running session without authentication" $ do
-        checkInitiateLogin False (fmap fst . call . createRandomPhoneUser =<< asks (^. teBrig))
+        checkInitiateBind False (fmap fst . call . createRandomPhoneUser =<< asks (^. teBrig))
 
       context "known IdP, running session with non-sso user" $ do
-        checkInitiateLogin True (fmap fst . call . createRandomPhoneUser =<< asks (^. teBrig))
+        checkInitiateBind True (fmap fst . call . createRandomPhoneUser =<< asks (^. teBrig))
 
       context "known IdP, running session with sso user" $ do
-        checkInitiateLogin True (registerTestIdP >>= \(_, _, idp) -> loginSsoUserFirstTime idp)
+        checkInitiateBind True (registerTestIdP >>= \(_, _, idp) -> loginSsoUserFirstTime idp)
 
     describe "POST /sso/finalize-login" $ do
       let checkGrantingAuthnResp :: HasCallStack => UserId -> SignedAuthnResponse -> ResponseLBS -> TestSpar ()
@@ -619,6 +620,8 @@ specCRUDIdentityProvider = do
             `shouldRespondWith` \resp -> statusCode resp < 300
           callIdpGet' (env ^. teSpar) (Just userid) idpid
             `shouldRespondWith` checkErr (== 404) "not-found"
+          callIdpGetRaw' (env ^. teSpar) (Just userid) idpid
+            `shouldRespondWith` checkErr (== 404) "not-found"
 
         context "with email" $ it "responds with 2xx and removes IdP" $ do
           env <- ask
@@ -714,7 +717,11 @@ specCRUDIdentityProvider = do
           metadata <- makeTestIdPMetadata
           idp <- call $ callIdpCreate (env ^. teSpar) (Just owner) metadata
           idp' <- call $ callIdpGet (env ^. teSpar) (Just owner) (idp ^. idpId)
-          liftIO $ idp `shouldBe` idp'
+          rawmeta <- call $ callIdpGetRaw (env ^. teSpar) (Just owner) (idp ^. idpId)
+          liftIO $ do
+            idp `shouldBe` idp'
+            let prefix = "<EntityDescriptor xmlns:samlp=\"urn:oasis:names:tc:SAML:2.0:protocol\" xmlns:samla=\"urn:oasis:names"
+            ST.take (ST.length prefix) rawmeta `shouldBe` prefix
 
       context "client is owner without email" $ do
         it "responds with 2xx; makes IdP available for GET /identity-providers/" $ do
@@ -731,10 +738,14 @@ specCRUDIdentityProvider = do
           it "responds with 2xx; makes IdP available for GET /identity-providers/" $ do
             env <- ask
             (owner, _) <- call $ createUserWithTeam (env ^. teBrig) (env ^. teGalley)
-            metadata <- Data.Aeson.encode . IdPMetadataValue <$> makeTestIdPMetadata
+            metadata <- Data.Aeson.encode . (IdPMetadataValue mempty) <$> makeTestIdPMetadata
             idp <- call $ callIdpCreateRaw (env ^. teSpar) (Just owner) "application/json" metadata
             idp' <- call $ callIdpGet (env ^. teSpar) (Just owner) (idp ^. idpId)
-            liftIO $ idp `shouldBe` idp'
+            rawmeta <- call $ callIdpGetRaw (env ^. teSpar) (Just owner) (idp ^. idpId)
+            liftIO $ do
+              idp `shouldBe` idp'
+              let prefix = "<EntityDescriptor xmlns:samlp=\"urn:oasis:names:tc:SAML:2.0:protocol\" xmlns:samla=\"urn:oasis:names"
+              ST.take (ST.length prefix) rawmeta `shouldBe` prefix
 
 
 specDeleteCornerCases :: SpecWith TestEnv
diff --git a/services/spar/test-integration/Util/Core.hs b/services/spar/test-integration/Util/Core.hs
index 5a57d66d607..7e5627a5c56 100644
--- a/services/spar/test-integration/Util/Core.hs
+++ b/services/spar/test-integration/Util/Core.hs
@@ -57,6 +57,7 @@ module Util.Core
   , callAuthnReqPrecheck'
   , callAuthnReq, callAuthnReq'
   , callIdpGet, callIdpGet'
+  , callIdpGetRaw, callIdpGetRaw'
   , callIdpGetAll, callIdpGetAll'
   , callIdpCreate, callIdpCreate', callIdpCreateRaw, callIdpCreateRaw'
   , callIdpDelete, callIdpDelete'
@@ -714,6 +715,15 @@ callIdpGet' :: (MonadIO m, MonadHttp m) => SparReq -> Maybe UserId -> SAML.IdPId
 callIdpGet' sparreq_ muid idpid = do
   get $ sparreq_ . maybe id zUser muid . path (cs $ "/identity-providers/" -/ SAML.idPIdToST idpid)
 
+callIdpGetRaw :: (MonadIO m, MonadHttp m) => SparReq -> Maybe UserId -> SAML.IdPId -> m Text
+callIdpGetRaw sparreq_ muid idpid = do
+  resp <- callIdpGetRaw' (sparreq_ . expect2xx) muid idpid
+  maybe (liftIO . throwIO $ ErrorCall "Nothing") (pure . cs) (responseBody resp)
+
+callIdpGetRaw' :: (MonadIO m, MonadHttp m) => SparReq -> Maybe UserId -> SAML.IdPId -> m ResponseLBS
+callIdpGetRaw' sparreq_ muid idpid = do
+  get $ sparreq_ . maybe id zUser muid . path (cs $ "/identity-providers/" -/ SAML.idPIdToST idpid -/ "raw")
+
 callIdpGetAll :: (MonadIO m, MonadHttp m) => SparReq -> Maybe UserId -> m IdPList
 callIdpGetAll sparreq_ muid = do
   resp <- callIdpGetAll' (sparreq_ . expect2xx) muid
diff --git a/services/spar/test/Arbitrary.hs b/services/spar/test/Arbitrary.hs
index ad3b706c82a..09393b2abed 100644
--- a/services/spar/test/Arbitrary.hs
+++ b/services/spar/test/Arbitrary.hs
@@ -11,6 +11,7 @@ import Data.Proxy
 import "swagger2" Data.Swagger hiding (Header(..))
 import Data.Aeson
 import Data.Id ()
+import Data.String.Conversions (cs)
 import SAML2.WebSSO.Test.Arbitrary ()
 import Servant.API.ContentTypes
 import Spar.Types
@@ -46,7 +47,9 @@ instance Arbitrary NoContent where
   arbitrary = pure NoContent
 
 instance Arbitrary IdPMetadataInfo where
-  arbitrary = IdPMetadataValue <$> arbitrary
+  arbitrary = do
+    mdata <- arbitrary
+    pure $ IdPMetadataValue (cs $ encode mdata) mdata
 
 -- This is not required by the servant-server instances, but the swagger
 -- tests want it. See https://github.com/haskell-servant/servant-swagger/issues/58
diff --git a/services/spar/test/Test/Spar/APISpec.hs b/services/spar/test/Test/Spar/APISpec.hs
index 471b92cb600..1696ee29122 100644
--- a/services/spar/test/Test/Spar/APISpec.hs
+++ b/services/spar/test/Test/Spar/APISpec.hs
@@ -11,7 +11,7 @@ import Data.Metrics.Test (pathsConsistencyCheck)
 import Data.Proxy (Proxy(Proxy))
 import Servant.Swagger (validateEveryToJSON)
 import Spar.API as API
-import Spar.Types (IdPMetadataInfo)
+import Spar.Types (IdPMetadataInfo(IdPMetadataValue))
 import Test.Hspec (Spec, it, shouldBe)
 import Test.QuickCheck (property)
 
@@ -25,4 +25,5 @@ spec = do
     pathsConsistencyCheck (routesToPaths @API.API) `shouldBe` mempty
 
   it "roundtrip: IdPMetadataInfo" . property $ \(val :: IdPMetadataInfo) -> do
-    (eitherDecode . encode) val `shouldBe` Right val
+    let withoutRaw (IdPMetadataValue _ x) = x
+    (withoutRaw <$> (eitherDecode . encode) val) `shouldBe` Right (withoutRaw val)

From 4a78cd24b79048c6e3a4bd11c6b5f0fda1ead992 Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Fri, 18 Oct 2019 13:52:18 +0200
Subject: [PATCH 06/26] Tweak invite script (#877)

* Do not have default args that never apply

* Make errors more greppable, do not abort on first error

* Usage info
---
 deploy/services-demo/create_team_members.sh | 22 +++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/deploy/services-demo/create_team_members.sh b/deploy/services-demo/create_team_members.sh
index c899f8098cc..c0fabeafe51 100755
--- a/deploy/services-demo/create_team_members.sh
+++ b/deploy/services-demo/create_team_members.sh
@@ -2,10 +2,10 @@
 
 set -e
 
-ADMIN_UUID="a09e9521-e14e-4285-ad71-47caa97f4a16"
-TEAM_UUID="9e57a378-0dca-468f-9661-7872f5f1c910"
-BRIG_HOST="http://localhost:8082"
-CSV_FILE="myfile.csv"
+ADMIN_UUID="n/a"
+TEAM_UUID="n/a"
+BRIG_HOST="http://localhost:8080"
+CSV_FILE="n/a"
 
 USAGE="
 This bash script can be used to invite members to a given team.  Input
@@ -19,6 +19,17 @@ USAGE: $0
     -t <team uuid>: ID of the inviting team.  default: ${TEAM_UUID}
     -h <host>: Base URI of brig. default: ${BRIG_HOST}
     -c <input file>: file containing info on the invitees in format 'Email,UserName'.  default: ${CSV_FILE}
+
+If you tee(1) stdout, stderr of this script into a log file, you can
+grep that log file for errors like this:
+
+$ grep code out.log | grep email-exists  # the most common case
+$ grep code out.log | grep -v email-exists
+
+If you are in a hurry, you may want to change the sleep(1) at the end
+of the invite loop to less than a second.  If you want to give up on
+the first error, add an exit(1) where we check the $INVIDATION_ID.
+
 "
 
 # Option parsing:
@@ -73,8 +84,7 @@ do
 
     if ( ( echo "$INVITATION_ID" | grep -q '"code"' ) &&
          ( echo "$INVITATION_ID" | grep -q '"label"' ) ) ; then
-      echo "Got an error, aborting: $INVITATION_ID"
-      exit 1
+      echo "failed inviting $USER_NAME <$EMAIL>: $INVITATION_ID"
     fi
 
     echo "Sleeping 1 second..." 1>&2

From c507ed64a7d4f0af2bffe2f9c3eb4b5f89a477c0 Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Mon, 21 Oct 2019 11:22:28 +0200
Subject: [PATCH 07/26] Cleanup (#879)

* Fix: remove duplicate object field in ToJSON

* Remove unnecessary toJSON call
---
 libs/brig-types/src/Brig/Types/User.hs | 1 -
 libs/galley-types/src/Galley/Types.hs  | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/libs/brig-types/src/Brig/Types/User.hs b/libs/brig-types/src/Brig/Types/User.hs
index 98ff7e6d99c..f68cfb68aad 100644
--- a/libs/brig-types/src/Brig/Types/User.hs
+++ b/libs/brig-types/src/Brig/Types/User.hs
@@ -447,7 +447,6 @@ instance ToJSON NewUser where
         # "uuid"            .= newUserUUID u
         # "email"           .= newUserEmail u
         # "email_code"      .= newUserEmailCode u
-        # "password"        .= newUserPassword u
         # "picture"         .= newUserPict u
         # "assets"          .= newUserAssets u
         # "phone"           .= newUserPhone u
diff --git a/libs/galley-types/src/Galley/Types.hs b/libs/galley-types/src/Galley/Types.hs
index af50bddebed..9f826949b45 100644
--- a/libs/galley-types/src/Galley/Types.hs
+++ b/libs/galley-types/src/Galley/Types.hs
@@ -493,7 +493,7 @@ instance ToJSON UserClients where
        where
          fn u c m =
              let k = T.decodeLatin1 (toASCIIBytes (toUUID u)) in
-             Map.insert k (toJSON c) m
+             Map.insert k c m
 
 instance FromJSON UserClients where
     parseJSON =

From 1815fdb79f3e14de4818d9ef852588203a1c92f0 Mon Sep 17 00:00:00 2001
From: Arian van Putten <arian.vanputten@gmail.com>
Date: Mon, 21 Oct 2019 13:11:21 +0200
Subject: [PATCH 08/26] Change SCIM documentation to actually use SAML Ids
 (#883)

* Change SCIM documentation to actually use SAML Ids

UUIDs are a bit confusing here. It is more common that your SAML provider is configured to have email addresses isntead

Co-Authored-By: fisx <mf@zerobuzz.net>
---
 docs/reference/provisioning/scim-via-curl.md | 21 +++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/docs/reference/provisioning/scim-via-curl.md b/docs/reference/provisioning/scim-via-curl.md
index a3fd2bc0c3f..2b64e425f4b 100644
--- a/docs/reference/provisioning/scim-via-curl.md
+++ b/docs/reference/provisioning/scim-via-curl.md
@@ -92,19 +92,34 @@ A minimal definition of a user looks like this:
 ```bash
 export SCIM_USER='{
   "schemas"     : ["urn:ietf:params:scim:schemas:core:2.0:User"],
-  "externalId"  : "f8c4ffde-4592-11e9-8600-afe11dc7d07b",
+  "externalId"  : "nick@example.com",
   "userName"    : "nick",
   "displayName" : "The Nick"
 }'
 ```
 
+The `externalId` is used to construct a saml identity.  Two cases are
+currently supported:
+
+1. `externalId` contains a valid email address.  The SAML `NameID` has
+the form `<NameID
+Format="urn:oasis:names:tc:SAML:1.1:nameid-format:emailAddress">me@example.com</NameID>`.
+2. `externalId` contains anything that is *not* an email address.  The
+SAML `NameID` has the form `<NameID
+Format="urn:oasis:names:tc:SAML:1.1:nameid-format:unspecified">...</NameID>`.
+
+*NOTE: It is important to configure your SAML provider to use
+`nameid-format:emailAddress` or `nameid-format:unspecified`.  Other
+nameid formats are not supported at this moment*.
+See also: https://github.com/wireapp/wire-server/blob/c507ed64a7d4f0af2bffe2f9c3eb4b5f89a477c0/services/spar/src/Spar/Scim/User.hs#L149-L158
+
 We also support custom fields that are used in rich profiles in this
 form [see {#RefRichInfo}](../user/rich-info.md):
 
 ```bash
 export SCIM_USER='{
   "schemas"     : ["urn:ietf:params:scim:schemas:core:2.0:User", "urn:wire:scim:schemas:profile:1.0"],
-  "externalId"  : "f8c4ffde-4592-11e9-8600-afe11dc7d07b",
+  "externalId"  : "rnick@example.com",
   "userName"    : "rnick",
   "displayName" : "The Rich Nick",
   "urn:wire:scim:schemas:profile:1.0": {
@@ -160,7 +175,7 @@ up-to-date user present, just `GET` one right before the `PUT`.)
 ```bash
 export SCIM_USER='{
   "schemas"     : ["urn:ietf:params:scim:schemas:core:2.0:User"],
-  "externalId"  : "updated-user-id",
+  "externalId"  : "rnick@example.com",
   "userName"    : "newnick",
   "displayName" : "The New Nick"
 }'

From 8d0698dbc45f675a2b09943b0237c03b9c442f7d Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Mon, 21 Oct 2019 14:31:59 +0200
Subject: [PATCH 09/26] stack:  Split nix stuff into build deps and runtime
 deps. (#884)

Stack started not liking being called from a nix-shell anymore, because
stack wants to create the nix-shell itself so it does some
auto-detection and crashes.  To trick it into doing the right thing
again, we only use the nix-shell for non-build-deps and just for dev
tools, then stack itself will call another nix-shell file that just
handles build deps for the stack project.
---
 shell.nix      | 46 +++++++++-------------------------------------
 stack-deps.nix | 36 ++++++++++++++++++++++++++++++++++++
 stack.yaml     |  2 +-
 3 files changed, 46 insertions(+), 38 deletions(-)
 create mode 100644 stack-deps.nix

diff --git a/shell.nix b/shell.nix
index 47ba31c95c9..af3393d55a3 100644
--- a/shell.nix
+++ b/shell.nix
@@ -1,37 +1,9 @@
-{ pkgs ? import <nixpkgs> {} }:
-let 
-  cryptobox-c = pkgs.callPackage ({fetchFromGitHub, rustPlatform,  pkgconfig, libsodium}:
-    rustPlatform.buildRustPackage rec {
-      name = "cryptobox-c-${version}";
-      version = "2019-06-17";
-      buildInputs = [ pkgconfig libsodium ];
-      src = fetchFromGitHub {
-        owner = "wireapp";
-        repo = "cryptobox-c";
-        rev = "4067ad96b125942545dbdec8c1a89f1e1b65d013";
-        sha256 = "1i9dlhw0xk1viglyhail9fb36v1awrypps8jmhrkz8k1bhx98ci3";
-      };
-      cargoSha256 = "1373rpy0fi3cpacv06x1cv4cv0brwdri2680ymdkq8w44syp20ym";
-      postInstall = ''
-        mkdir -p $out/include
-        cp src/cbox.h $out/include
-      '';
-    }) {};
-in
-  pkgs.haskell.lib.buildStackProject {
-    name = "wire-server";
-    buildInputs = with pkgs; [ 
-      docker-compose
-      pkgconfig
-      cryptobox-c 
-      libsodium
-      geoip
-      protobuf
-      openssl
-      snappy
-      icu
-      zlib
-      libxml2
-    ];
-    ghc = pkgs.haskell.compiler.ghc844;
-  }
+{ pkgs ? import <nixpkgs> {}}:
+with pkgs; mkShell {
+  name = "shell";
+  buildInputs = [ 
+    docker-compose
+    gnumake
+    stack
+  ];
+}
diff --git a/stack-deps.nix b/stack-deps.nix
new file mode 100644
index 00000000000..815b2462449
--- /dev/null
+++ b/stack-deps.nix
@@ -0,0 +1,36 @@
+{ pkgs ? import <nixpkgs> {} }:
+let 
+  cryptobox-c = pkgs.callPackage ({fetchFromGitHub, rustPlatform,  pkgconfig, libsodium}:
+    rustPlatform.buildRustPackage rec {
+      name = "cryptobox-c-${version}";
+      version = "2019-06-17";
+      buildInputs = [ pkgconfig libsodium ];
+      src = fetchFromGitHub {
+        owner = "wireapp";
+        repo = "cryptobox-c";
+        rev = "4067ad96b125942545dbdec8c1a89f1e1b65d013";
+        sha256 = "1i9dlhw0xk1viglyhail9fb36v1awrypps8jmhrkz8k1bhx98ci3";
+      };
+      cargoSha256 = "1373rpy0fi3cpacv06x1cv4cv0brwdri2680ymdkq8w44syp20ym";
+      postInstall = ''
+        mkdir -p $out/include
+        cp src/cbox.h $out/include
+      '';
+    }) {};
+in
+  pkgs.haskell.lib.buildStackProject {
+    name = "wire-server";
+    buildInputs = with pkgs; [ 
+      pkgconfig
+      cryptobox-c 
+      libsodium
+      geoip
+      protobuf
+      openssl
+      snappy
+      icu
+      zlib
+      libxml2
+    ];
+    ghc = pkgs.haskell.compiler.ghc844;
+  }
diff --git a/stack.yaml b/stack.yaml
index 4bc77c623b8..a617362e07e 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -74,4 +74,4 @@ flags:
 allow-newer: false
 
 nix:
-  shell-file: shell.nix
+  shell-file: stack-deps.nix

From 08dd74232f2d577d9b03d4420fba4611b6a5cc6e Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Tue, 22 Oct 2019 11:02:39 +0200
Subject: [PATCH 10/26] Make gundeck handle AWS outages better. (#869)

Introduce `Gundeck.ThreadBudget.runWithBudget` that can be wrapped
around actions.  Wrapped actions are executed only if there is budget
available, otherwise they are dropped and a warning is logged.  See
the haddocks at the beginning of `Gundeck.ThreadBudget` for a
rationale.
---
 libs/types-common/package.yaml               |   1 +
 libs/types-common/src/Data/SizedHashMap.hs   |  54 +++
 libs/types-common/test/Main.hs               |   3 +-
 libs/types-common/test/Test/SizedHashMap.hs  |  62 ++++
 services/brig/src/Brig/Budget.hs             |   1 -
 services/cannon/src/Cannon/Dict.hs           |  52 +--
 services/gundeck/gundeck.integration.yaml    |   3 +
 services/gundeck/package.yaml                |   5 +
 services/gundeck/src/Gundeck/Env.hs          |   5 +-
 services/gundeck/src/Gundeck/Options.hs      |  10 +
 services/gundeck/src/Gundeck/Push.hs         | 107 ++++--
 services/gundeck/src/Gundeck/Push/Native.hs  |  11 +-
 services/gundeck/src/Gundeck/Run.hs          |   7 +-
 services/gundeck/src/Gundeck/ThreadBudget.hs | 255 ++++++++++++++
 services/gundeck/test/unit/Main.hs           |   2 +
 services/gundeck/test/unit/MockGundeck.hs    |   3 +
 services/gundeck/test/unit/ThreadBudget.hs   | 334 +++++++++++++++++++
 stack.yaml                                   |   1 +
 18 files changed, 833 insertions(+), 83 deletions(-)
 create mode 100644 libs/types-common/src/Data/SizedHashMap.hs
 create mode 100644 libs/types-common/test/Test/SizedHashMap.hs
 create mode 100644 services/gundeck/src/Gundeck/ThreadBudget.hs
 create mode 100644 services/gundeck/test/unit/ThreadBudget.hs

diff --git a/libs/types-common/package.yaml b/libs/types-common/package.yaml
index 217c0c8bab9..ebeecd958d5 100644
--- a/libs/types-common/package.yaml
+++ b/libs/types-common/package.yaml
@@ -91,6 +91,7 @@ tests:
     - text-format
     - time
     - types-common
+    - unordered-containers
     - uuid
 flags:
   arbitrary:
diff --git a/libs/types-common/src/Data/SizedHashMap.hs b/libs/types-common/src/Data/SizedHashMap.hs
new file mode 100644
index 00000000000..c372d9e57e0
--- /dev/null
+++ b/libs/types-common/src/Data/SizedHashMap.hs
@@ -0,0 +1,54 @@
+module Data.SizedHashMap
+    ( SizedHashMap
+    , fromSizedHashMap
+    , size
+    , empty
+    , insert
+    , keys
+    , elems
+    , toList
+    , lookup
+    , delete
+    )
+where
+
+import Imports hiding (lookup, toList)
+import Data.Hashable (Hashable)
+
+import qualified Data.HashMap.Strict as M
+
+
+data SizedHashMap k v = SizedHashMap !Int !(HashMap k v)
+
+fromSizedHashMap :: SizedHashMap k v -> HashMap k v
+fromSizedHashMap (SizedHashMap _ hm) = hm
+
+size :: forall k v. SizedHashMap k v -> Int
+size (SizedHashMap s _) = s
+
+empty :: forall k v. SizedHashMap k v
+empty = SizedHashMap 0 M.empty
+
+insert :: forall k v. (Eq k, Hashable k) => k -> v -> SizedHashMap k v -> SizedHashMap k v
+insert k v (SizedHashMap n hm) = SizedHashMap n' hm'
+  where
+    n' = if M.member k hm then n else n + 1
+    hm' = M.insert k v hm
+
+keys :: forall k v. SizedHashMap k v -> [k]
+keys (SizedHashMap _ hm) = M.keys hm
+
+elems :: forall k v. SizedHashMap k v -> [v]
+elems (SizedHashMap _ hm) = M.elems hm
+
+toList :: forall k v. SizedHashMap k v -> [(k, v)]
+toList (SizedHashMap _ hm) = M.toList hm
+
+lookup :: forall k v. (Eq k, Hashable k) => k -> SizedHashMap k v -> Maybe v
+lookup k (SizedHashMap _ hm) = M.lookup k hm
+
+delete :: forall k v. (Eq k, Hashable k) => k -> SizedHashMap k v -> SizedHashMap k v
+delete k (SizedHashMap n hm) = SizedHashMap n' hm'
+  where
+    n' = if M.member k hm then n - 1 else n
+    hm' = M.delete k hm
diff --git a/libs/types-common/test/Main.hs b/libs/types-common/test/Main.hs
index 6181799fd6b..39e33f1b7b3 100644
--- a/libs/types-common/test/Main.hs
+++ b/libs/types-common/test/Main.hs
@@ -2,8 +2,9 @@ module Main (main) where
 
 import           Imports
 import qualified Test.Properties as Properties
+import qualified Test.SizedHashMap as SizedHashMap
 import           Test.Tasty
 
 
 main :: IO ()
-main = defaultMain $ testGroup "Tests" [ Properties.tests ]
+main = defaultMain $ testGroup "Tests" [ Properties.tests, SizedHashMap.tests ]
diff --git a/libs/types-common/test/Test/SizedHashMap.hs b/libs/types-common/test/Test/SizedHashMap.hs
new file mode 100644
index 00000000000..f5c89ce9314
--- /dev/null
+++ b/libs/types-common/test/Test/SizedHashMap.hs
@@ -0,0 +1,62 @@
+module Test.SizedHashMap (tests) where
+
+import Imports
+
+import Data.SizedHashMap as SHM
+import Test.Tasty
+import Test.Tasty.HUnit
+
+import qualified Data.HashMap.Strict as HM
+
+
+zro, one, two :: HM.HashMap Char Char
+zro = HM.empty
+one = HM.insert '0' '0' zro
+two = HM.insert '1' '1' one
+
+zro', one', two' :: SHM.SizedHashMap Char Char
+zro' = SHM.empty
+one' = SHM.insert '0' '0' zro'
+two' = SHM.insert '1' '1' one'
+
+
+tests :: TestTree
+tests = testGroup "SizedHashMap"
+    [ testCase "empty" $ do
+        SHM.size zro' @=? 0
+        fromSizedHashMap zro' @=? zro
+
+    , testCase "insert" $ do
+        SHM.size one' @=? HM.size one
+        SHM.size two' @=? HM.size two
+        fromSizedHashMap one' @=? one
+        fromSizedHashMap two' @=? two
+
+    , testCase "keys" $ do
+        SHM.keys zro' @=? HM.keys zro
+        SHM.keys one' @=? HM.keys one
+        SHM.keys two' @=? HM.keys two
+
+    , testCase "elems" $ do
+        SHM.elems zro' @=? HM.elems zro
+        SHM.elems one' @=? HM.elems one
+        SHM.elems two' @=? HM.elems two
+
+    , testCase "toList" $ do
+        SHM.toList zro' @=? HM.toList zro
+        SHM.toList one' @=? HM.toList one
+        SHM.toList two' @=? HM.toList two
+
+    , testCase "lookup" $ do
+        SHM.lookup '0' zro' @=? HM.lookup '0' zro
+        SHM.lookup '0' one' @=? HM.lookup '0' one
+        SHM.lookup '0' two' @=? HM.lookup '0' two
+
+    , testCase "delete" $ do
+        size (SHM.delete '0' zro') @=? 0
+        size (SHM.delete '0' one') @=? 0
+        size (SHM.delete '0' two') @=? 1
+        fromSizedHashMap (SHM.delete '0' zro') @=? HM.delete '0' zro
+        fromSizedHashMap (SHM.delete '0' one') @=? HM.delete '0' one
+        fromSizedHashMap (SHM.delete '0' two') @=? HM.delete '0' two
+    ]
diff --git a/services/brig/src/Brig/Budget.hs b/services/brig/src/Brig/Budget.hs
index 656ac0252d1..2f7dcbe8a0b 100644
--- a/services/brig/src/Brig/Budget.hs
+++ b/services/brig/src/Brig/Budget.hs
@@ -11,7 +11,6 @@ module Brig.Budget
     ) where
 
 import Imports
-import Brig.Data.Instances ()
 import Cassandra
 import Data.Time.Clock
 
diff --git a/services/cannon/src/Cannon/Dict.hs b/services/cannon/src/Cannon/Dict.hs
index 730e49bdabf..dc93edcfc8f 100644
--- a/services/cannon/src/Cannon/Dict.hs
+++ b/services/cannon/src/Cannon/Dict.hs
@@ -11,43 +11,46 @@ module Cannon.Dict
 where
 
 import Imports hiding (lookup)
+
 import Data.Hashable (hash, Hashable)
+import Data.SizedHashMap (SizedHashMap)
 import Data.Vector (Vector, (!))
 
-import qualified Data.HashMap.Strict as M
-import qualified Data.Vector         as V
+import qualified Data.SizedHashMap as SHM
+import qualified Data.Vector       as V
+
 
 newtype Dict a b = Dict
     { _map :: Vector (IORef (SizedHashMap a b)) }
 
 size :: MonadIO m => Dict a b -> m Int
-size d = liftIO $ sum <$> mapM (\r -> hmsize <$> readIORef r) (_map d)
+size d = liftIO $ sum <$> mapM (\r -> SHM.size <$> readIORef r) (_map d)
 
 empty :: MonadIO m => Int -> m (Dict a b)
 empty w = liftIO $ if w > 0 && w < 8192
-    then Dict <$> V.generateM w (const $ newIORef hmempty)
+    then Dict <$> V.generateM w (const $ newIORef SHM.empty)
     else error "Dict.empty: slice number out of range [1, 8191]"
 
 insert :: (Eq a, Hashable a, MonadIO m) => a -> b -> Dict a b -> m ()
-insert k v = mutDict (hminsert k v) . getSlice k
+insert k v = mutDict (SHM.insert k v) . getSlice k
 
 add :: (Eq a, Hashable a, MonadIO m) => a -> b -> Dict a b -> m Bool
 add k v d = liftIO $ atomicModifyIORef' (getSlice k d) $ \m ->
-    if k `elem` hmkeys m
+    if k `elem` SHM.keys m
         then (m, False)
-        else (hminsert k v m, True)
+        else (SHM.insert k v m, True)
 
 remove :: (Eq a, Hashable a, MonadIO m) => a -> Dict a b -> m Bool
 remove = removeIf (const True)
 
 removeIf :: (Eq a, Hashable a, MonadIO m) => (Maybe b -> Bool) -> a -> Dict a b -> m Bool
 removeIf f k d = liftIO $ atomicModifyIORef' (getSlice k d) $ \m ->
-    if f (hmlookup k m)
-        then (hmdelete k m, True)
+    if f (SHM.lookup k m)
+        then (SHM.delete k m, True)
         else (m, False)
 
 lookup :: (Eq a, Hashable a, MonadIO m) => a -> Dict a b -> m (Maybe b)
-lookup k = liftIO . fmap (hmlookup k) . readIORef . getSlice k
+lookup k = liftIO . fmap (SHM.lookup k) . readIORef . getSlice k
 
 -----------------------------------------------------------------------------
 -- Internal
@@ -60,32 +63,3 @@ mutDict f d = liftIO $ atomicModifyIORef' d $ \m -> (f m, ())
 
 getSlice :: (Hashable a) => a -> Dict a b -> IORef (SizedHashMap a b)
 getSlice k (Dict m) = m ! (hash k `mod` V.length m)
-
-----------------------------------------------------------------------
--- hashmap with O(1) size operator
-
-data SizedHashMap a b = SizedHashMap !Int !(HashMap a b)
-
-hmsize :: forall k v. SizedHashMap k v -> Int
-hmsize (SizedHashMap s _) = s
-
-hmempty :: forall k v. SizedHashMap k v
-hmempty = SizedHashMap 0 M.empty
-
-hminsert :: forall k v. (Eq k, Hashable k) => k -> v -> SizedHashMap k v -> SizedHashMap k v
-hminsert k v (SizedHashMap n hm) = SizedHashMap n' hm'
-  where
-    n' = if M.member k hm then n else n + 1
-    hm' = M.insert k v hm
-
-hmkeys :: forall k v. SizedHashMap k v -> [k]
-hmkeys (SizedHashMap _ hm) = M.keys hm
-
-hmlookup :: forall k v. (Eq k, Hashable k) => k -> SizedHashMap k v -> Maybe v
-hmlookup k (SizedHashMap _ hm) = M.lookup k hm
-
-hmdelete :: forall k v. (Eq k, Hashable k) => k -> SizedHashMap k v -> SizedHashMap k v
-hmdelete k (SizedHashMap n hm) = SizedHashMap n' hm'
-  where
-    n' = if M.member k hm then n - 1 else n
-    hm' = M.delete k hm
diff --git a/services/gundeck/gundeck.integration.yaml b/services/gundeck/gundeck.integration.yaml
index ebd3897ce8a..9f6d0820a55 100644
--- a/services/gundeck/gundeck.integration.yaml
+++ b/services/gundeck/gundeck.integration.yaml
@@ -24,6 +24,9 @@ settings:
   httpPoolSize: 1024
   notificationTTL: 24192200
   bulkPush: true
+  maxConcurrentNativePushes:
+    hard: 30  # more than this number of threads will not be allowed
+    soft: 10  # more than this number of threads will be warned about
 
 logLevel: Info
 logNetStrings: false
diff --git a/services/gundeck/package.yaml b/services/gundeck/package.yaml
index c4ca4033a46..bb00596df9e 100644
--- a/services/gundeck/package.yaml
+++ b/services/gundeck/package.yaml
@@ -57,6 +57,7 @@ library:
   - redis-io >=0.4
   - resourcet >=1.1
   - retry >=0.5
+  - safe-exceptions
   - semigroups >=0.12
   - singletons >=1.0
   - split >=0.2
@@ -173,6 +174,7 @@ tests:
     - -threaded
     dependencies:
     - base
+    - async
     - aeson
     - aeson-pretty
     - amazonka
@@ -192,6 +194,7 @@ tests:
     - network-uri
     - QuickCheck
     - quickcheck-instances
+    - quickcheck-state-machine
     - random
     - scientific
     - string-conversions
@@ -199,8 +202,10 @@ tests:
     - tasty-hunit
     - tasty-quickcheck
     - text
+    - time
     - tinylog
     - transformers
+    - tree-diff
     - types-common
     - unordered-containers
     - uuid
diff --git a/services/gundeck/src/Gundeck/Env.hs b/services/gundeck/src/Gundeck/Env.hs
index df349104d52..e861566510f 100644
--- a/services/gundeck/src/Gundeck/Env.hs
+++ b/services/gundeck/src/Gundeck/Env.hs
@@ -12,6 +12,7 @@ import Data.Text (unpack)
 import Data.Time.Clock.POSIX
 import Util.Options
 import Gundeck.Options as Opt
+import Gundeck.ThreadBudget
 import Network.HTTP.Client (responseTimeoutMicro)
 import Network.HTTP.Client.TLS (tlsManagerSettings)
 
@@ -32,6 +33,7 @@ data Env = Env
     , _rstate  :: !Redis.Pool
     , _awsEnv  :: !Aws.Env
     , _time    :: !(IO Milliseconds)
+    , _threadBudgetState :: !(Maybe ThreadBudgetState)
     }
 
 makeLenses ''Env
@@ -74,7 +76,8 @@ createEnv m o = do
     io <- mkAutoUpdate defaultUpdateSettings {
             updateAction = Ms . round . (* 1000) <$> getPOSIXTime
     }
-    return $! Env def m o l n p r a io
+    mtbs <- mkThreadBudgetState `mapM` (o ^. optSettings . setMaxConcurrentNativePushes)
+    return $! Env def m o l n p r a io mtbs
 
 reqIdMsg :: RequestId -> Logger.Msg -> Logger.Msg
 reqIdMsg = ("request" Logger..=) . unRequestId
diff --git a/services/gundeck/src/Gundeck/Options.hs b/services/gundeck/src/Gundeck/Options.hs
index eb0effdc4f0..af888da2716 100644
--- a/services/gundeck/src/Gundeck/Options.hs
+++ b/services/gundeck/src/Gundeck/Options.hs
@@ -36,11 +36,21 @@ data Settings = Settings
     -- | Use this option to group push notifications and send them in bulk to Cannon, instead
     -- of in individual requests
     , _setBulkPush        :: !Bool
+    -- | Maximum number of concurrent threads calling SNS.
+    , _setMaxConcurrentNativePushes :: !(Maybe MaxConcurrentNativePushes)
+    } deriving (Show, Generic)
+
+data MaxConcurrentNativePushes = MaxConcurrentNativePushes
+    { _limitHard :: !(Maybe Int)  -- ^ more than this number of threads will not be allowed
+    , _limitSoft :: !(Maybe Int)  -- ^ more than this number of threads will be warned about
     } deriving (Show, Generic)
 
 deriveFromJSON toOptionFieldName ''Settings
 makeLenses ''Settings
 
+deriveFromJSON toOptionFieldName ''MaxConcurrentNativePushes
+makeLenses ''MaxConcurrentNativePushes
+
 data Opts = Opts
     { _optGundeck   :: !Endpoint       -- ^ Hostname and port to bind to
     , _optCassandra :: !CassandraOpts
diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs
index 84427c12911..be3fb5f37f6 100644
--- a/services/gundeck/src/Gundeck/Push.hs
+++ b/services/gundeck/src/Gundeck/Push.hs
@@ -8,6 +8,7 @@ module Gundeck.Push
     , pushAll, pushAny
     , MonadPushAll(..)
     , MonadNativeTargets(..)
+    , MonadMapAsync(..)
     , MonadPushAny(..)
     ) where
 
@@ -26,8 +27,9 @@ import Gundeck.Aws (endpointUsers)
 import Gundeck.Aws.Arn
 import Gundeck.Env
 import Gundeck.Monad
-import Gundeck.Push.Native.Types
 import Gundeck.Options
+import Gundeck.Push.Native.Types
+import Gundeck.ThreadBudget
 import Gundeck.Types
 import Gundeck.Util
 import Network.HTTP.Types
@@ -74,6 +76,7 @@ class MonadThrow m =>  MonadPushAll m where
   mpaStreamAdd        :: NotificationId -> List1 NotificationTarget -> List1 Aeson.Object -> NotificationTTL -> m ()
   mpaPushNative       :: Notification -> Push -> [Address] -> m ()
   mpaForkIO           :: m () -> m ()
+  mpaRunWithBudget    :: Int -> a -> m a -> m a
 
 instance MonadPushAll Gundeck where
   mpaNotificationTTL  = view (options . optSettings . setNotificationTTL)
@@ -83,20 +86,33 @@ instance MonadPushAll Gundeck where
   mpaStreamAdd        = Stream.add
   mpaPushNative       = pushNative
   mpaForkIO           = void . forkIO
+  mpaRunWithBudget    = runWithBudget''
+
+-- | Another layer of wrap around 'runWithBudget'.
+runWithBudget'' :: Int -> a -> Gundeck a -> Gundeck a
+runWithBudget'' budget fallback action = do
+  view threadBudgetState >>= \case
+    Nothing  -> action
+    Just tbs -> runWithBudget' tbs budget fallback action
+
 
 -- | Abstract over all effects in 'nativeTargets' (for unit testing).
 class Monad m => MonadNativeTargets m where
   mntgtLogErr          :: SomeException -> m ()
   mntgtLookupAddresses :: UserId -> m [Address]
-  mntgtMapAsync        :: (a -> m b) -> [a] -> m [Either SomeException b]
 
 instance MonadNativeTargets Gundeck where
   mntgtLogErr e = Log.err (msg (val "Failed to get native push address: " +++ show e))
   mntgtLookupAddresses rcp = Data.lookup rcp Data.One
+
+class Monad m => MonadMapAsync m where
+  mntgtMapAsync :: (a -> m b) -> [a] -> m [Either SomeException b]
+
+instance MonadMapAsync Gundeck where
   mntgtMapAsync = mapAsync
 
 -- | Abstract over all effects in 'pushAny' (for unit testing).
-class (MonadPushAll m, MonadNativeTargets m) => MonadPushAny m where
+class (MonadPushAll m, MonadNativeTargets m, MonadMapAsync m) => MonadPushAny m where
   mpyPush :: Notification
           -> List1 NotificationTarget
           -> UserId
@@ -132,9 +148,9 @@ pushAny' p = do
     unless (p^.pushTransient) $
         mpaStreamAdd i tgts pload =<< mpaNotificationTTL
     mpaForkIO $ do
-        prs <- mpyPush notif tgts (p^.pushOrigin) (p^.pushOriginConnection) (p^.pushConnections)
+        alreadySent <- mpyPush notif tgts (p^.pushOrigin) (p^.pushOriginConnection) (p^.pushConnections)
         unless (p^.pushTransient) $
-            mpaPushNative notif p =<< nativeTargets p prs
+            mpaPushNative notif p =<< nativeTargets p (nativeTargetsRecipients p) alreadySent
   where
     mkTarget :: Recipient -> NotificationTarget
     mkTarget r =
@@ -145,7 +161,7 @@ pushAny' p = do
 
 -- | Construct and send a single bulk push request to the client.  Write the 'Notification's from
 -- the request to C*.  Trigger native pushes for all delivery failures notifications.
-pushAll :: (MonadPushAll m, MonadNativeTargets m) => [Push] -> m ()
+pushAll :: (MonadPushAll m, MonadNativeTargets m, MonadMapAsync m) => [Push] -> m ()
 pushAll pushes = do
     targets :: [(Push, (Notification, List1 (Recipient, [Presence])))]
             <- zip pushes <$> (mkNotificationAndTargets `mapM` pushes)
@@ -172,8 +188,15 @@ pushAll pushes = do
         resp <- compilePushResps targets <$> mpaBulkPush (compilePushReq <$> targets)
 
         -- native push
-        forM_ resp $ \((notif, psh), alreadySent) -> unless (psh ^. pushTransient) $
-            mpaPushNative notif psh =<< nativeTargets psh alreadySent
+        forM_ resp $ \((notif :: Notification, psh :: Push), alreadySent :: [Presence]) -> do
+            let rcps' = nativeTargetsRecipients psh
+                budget = length rcps'
+                  -- this is a rough budget, since there may be more than one device in a
+                  -- 'Presence', so one budget token may trigger at most 8 push notifications
+                  -- to be sent out.
+            unless (psh ^. pushTransient)
+                $ mpaRunWithBudget budget ()
+                    $ mpaPushNative notif psh =<< nativeTargets psh rcps' alreadySent
 
 
 -- REFACTOR: @[Presence]@ here should be @newtype WebSockedDelivered = WebSockedDelivered [Presence]@
@@ -246,52 +269,65 @@ pushNative :: Notification -> Push -> [Address] -> Gundeck ()
 pushNative _ _ [] = return ()
 pushNative notif p rcps = do
     let prio = p^.pushNativePriority
-    void $ Native.push (Native.NativePush (ntfId notif) prio Nothing) rcps
+    Native.push (Native.NativePush (ntfId notif) prio Nothing) rcps
 
-nativeTargets :: forall m. MonadNativeTargets m => Push -> [Presence] -> m [Address]
-nativeTargets p pres =
-    let rcps' = filter routeNative (toList (fromRange (p^.pushRecipients)))
-    in mntgtMapAsync addresses rcps' >>= fmap concat . mapM check
+-- | Compute list of 'Recipient's from a 'Push' that may be interested in a native push.  More
+-- filtering in 'nativeTargets'.
+nativeTargetsRecipients :: Push -> [Recipient]
+nativeTargetsRecipients psh = filter routeNative (toList (fromRange (psh ^. pushRecipients)))
   where
-    -- Interested in native pushes?
-    routeNative u = u^.recipientRoute /= RouteDirect
-                 && (u^.recipientId /= p^.pushOrigin || p^.pushNativeIncludeOrigin)
+    routeNative u = u ^. recipientRoute /= RouteDirect
+                 && (u ^. recipientId /= psh ^. pushOrigin || psh ^. pushNativeIncludeOrigin)
 
+-- | TODO: 'nativeTargets' calls cassandra once for each 'Recipient' of the 'Push'.  Instead,
+-- it should be called once with @[Push]@ for every call to 'pushAll', and that call should
+-- only call cassandra once in total, yielding all addresses of all recipients of all pushes.
+--
+-- FUTUREWORK: we may want to turn 'mntgtMapAsync' into an ordinary `mapM`: it's cassandra
+-- access, so it'll be fast either way given the size of the input, and synchronous calls
+-- impose a much lower risk of choking when system load peaks.
+nativeTargets
+  :: forall m. (MonadNativeTargets m, MonadMapAsync m)
+  => Push -> [Recipient] -> [Presence] -> m [Address]
+nativeTargets psh rcps' alreadySent =
+    mntgtMapAsync addresses rcps' >>= fmap concat . mapM check
+  where
     addresses :: Recipient -> m [Address]
     addresses u = do
-        addrs <- mntgtLookupAddresses (u^.recipientId)
+        addrs <- mntgtLookupAddresses (u ^. recipientId)
         return $ preference
                . filter (eligible u)
                $ addrs
 
+    eligible :: Recipient -> Address -> Bool
     eligible u a
         -- Never include the origin client.
-        | a^.addrUser == p^.pushOrigin && Just (a^.addrConn) == p^.pushOriginConnection = False
+        | a ^. addrUser == psh ^. pushOrigin && Just (a ^. addrConn) == psh ^. pushOriginConnection = False
         -- Is the specific client an intended recipient?
-        | not (eligibleClient a (u^.recipientClients)) = False
+        | not (eligibleClient a (u ^. recipientClients)) = False
         -- Is the client not whitelisted?
         | not (whitelistedOrNoWhitelist a) = False
-        -- Include client if not found in presences.
-        | otherwise = isNothing (List.find (isOnline a) pres)
+        -- Include client if not found in already served presences.
+        | otherwise = isNothing (List.find (isOnline a) alreadySent)
 
-    isOnline a x =  a^.addrUser == Presence.userId x
-                && (a^.addrConn == Presence.connId x || equalClient a x)
+    isOnline a x =  a ^. addrUser == Presence.userId x
+                && (a ^. addrConn == Presence.connId x || equalClient a x)
 
-    equalClient a x = Just (a^.addrClient) == Presence.clientId x
+    equalClient a x = Just (a ^. addrClient) == Presence.clientId x
 
     eligibleClient _ RecipientClientsAll = True
-    eligibleClient a (RecipientClientsSome cs) = (a^.addrClient) `elem` cs
+    eligibleClient a (RecipientClientsSome cs) = (a ^. addrClient) `elem` cs
 
-    whitelistedOrNoWhitelist a = null (p^.pushConnections)
-                              || a^.addrConn `elem` p^.pushConnections
+    whitelistedOrNoWhitelist a = null (psh ^. pushConnections)
+                              || a ^. addrConn `elem` psh ^. pushConnections
 
     -- Apply transport preference in case of alternative transports for the
     -- same client (currently only APNS vs APNS VoIP). If no explicit
     -- preference is given, the default preference depends on the priority.
-    preference as = let pref = p^.pushNativeAps >>= view apsPreference in
+    preference as = let pref = psh ^. pushNativeAps >>= view apsPreference in
         filter (pick (fromMaybe defPreference pref)) as
       where
-        pick pr a = case a^.addrTransport of
+        pick pr a = case a ^. addrTransport of
             GCM             -> True
             APNS            -> pr == ApsStdPreference  || notAny a APNSVoIP
             APNSSandbox     -> pr == ApsStdPreference  || notAny a APNSVoIPSandbox
@@ -300,10 +336,10 @@ nativeTargets p pres =
 
         notAny a t = not (any (\a' ->
             addrEqualClient a a'
-            && a^.addrApp == a'^.addrApp
-            && a'^.addrTransport == t) as)
+            && a ^. addrApp == a' ^. addrApp
+            && a' ^. addrTransport == t) as)
 
-        defPreference = case p^.pushNativePriority of
+        defPreference = case psh ^. pushNativePriority of
             LowPriority  -> ApsStdPreference
             HighPriority -> ApsVoIPPreference
 
@@ -311,8 +347,9 @@ nativeTargets p pres =
     check (Left  e) = mntgtLogErr e >> return []
     check (Right r) = return r
 
+
 addToken :: UserId ::: ConnId ::: JsonRequest PushToken ::: JSON -> Gundeck Response
-addToken (uid ::: cid ::: req ::: _) = do
+addToken (uid ::: cid ::: req ::: _) = mpaRunWithBudget 1 snsThreadBudgetReached $ do
     new <- fromJsonBody req
     (cur, old) <- foldl' (matching new) (Nothing, []) <$> Data.lookup uid Data.Quorum
     Log.info $ "user"  .= UUID.toASCIIBytes (toUUID uid)
@@ -425,6 +462,10 @@ invalidToken :: Response
 invalidToken = json (Error status400 "invalid-token" "Invalid push token")
              & setStatus status404
 
+snsThreadBudgetReached :: Response
+snsThreadBudgetReached = json (Error status400 "sns-thread-budget-reached" "Too many concurrent calls to SNS; is SNS down?")
+           & setStatus status413
+
 tokenTooLong :: Response
 tokenTooLong = json (Error status400 "token-too-long" "Push token length must be < 8192 for GCM or 400 for APNS")
              & setStatus status413
diff --git a/services/gundeck/src/Gundeck/Push/Native.hs b/services/gundeck/src/Gundeck/Push/Native.hs
index d25dcb2d056..86add930825 100644
--- a/services/gundeck/src/Gundeck/Push/Native.hs
+++ b/services/gundeck/src/Gundeck/Push/Native.hs
@@ -30,12 +30,12 @@ import qualified Gundeck.Notification.Data as Stream
 import qualified Gundeck.Push.Data         as Data
 import qualified System.Logger.Class       as Log
 
-push :: NativePush -> [Address] -> Gundeck [Result]
-push _    [] = return []
-push m   [a] = pure <$> push1 m a
-push m addrs = mapConcurrently (push1 m) addrs
+push :: NativePush -> [Address] -> Gundeck ()
+push _    [] = pure ()
+push m   [a] = push1 m a
+push m addrs = void $ mapConcurrently (push1 m) addrs
 
-push1 :: NativePush -> Address -> Gundeck Result
+push1 :: NativePush -> Address -> Gundeck ()
 push1 m a = do
     e <- view awsEnv
     r <- Aws.execute e $ publish m a
@@ -51,7 +51,6 @@ push1 m a = do
         Failure (PushException ex) _ -> do
             logError a "Native push failed" ex
             view monitor >>= counterIncr (path "push.native.errors")
-    return r
   where
     onDisabled =
         handleAny (logError a "Failed to cleanup disabled endpoint") $ do
diff --git a/services/gundeck/src/Gundeck/Run.hs b/services/gundeck/src/Gundeck/Run.hs
index c80a0425a95..9293084ee27 100644
--- a/services/gundeck/src/Gundeck/Run.hs
+++ b/services/gundeck/src/Gundeck/Run.hs
@@ -5,7 +5,7 @@ import Cassandra (runClient, shutdown)
 import Cassandra.Schema (versionCheck)
 import Control.Exception (finally)
 import Control.Lens hiding (enum)
-import Data.Metrics.Middleware
+import Data.Metrics.Middleware (metrics)
 import Data.Metrics.Middleware.Prometheus (waiPrometheusMiddleware)
 import Data.Metrics.WaiRoute (treeToPaths)
 import Data.Text (unpack)
@@ -14,11 +14,12 @@ import Gundeck.Env
 import Gundeck.Monad
 import Gundeck.Options
 import Gundeck.React
+import Gundeck.ThreadBudget
 import Network.Wai as Wai
 import Network.Wai.Utilities.Server hiding (serverPort)
 import Util.Options
 
-import qualified Control.Concurrent.Async as Async
+import qualified UnliftIO.Async as Async
 import qualified Gundeck.Aws as Aws
 import qualified Network.Wai.Middleware.Gzip as GZip
 import qualified Network.Wai.Middleware.Gunzip as GZip
@@ -33,10 +34,12 @@ run o = do
     let l = e^.applog
     s <- newSettings $ defaultServer (unpack $ o^.optGundeck.epHost) (o^.optGundeck.epPort) l m
     lst <- Async.async $ Aws.execute (e^.awsEnv) (Aws.listen (runDirect e . onEvent))
+    wtbs <- forM (e ^. threadBudgetState) $ \tbs -> Async.async $ runDirect e $ watchThreadBudgetState m tbs 10
     runSettingsWithShutdown s (middleware e $ app e) 5 `finally` do
         Log.info l $ Log.msg (Log.val "Shutting down ...")
         shutdown (e^.cstate)
         Async.cancel lst
+        forM_ wtbs Async.cancel
         Log.close (e^.applog)
   where
     middleware :: Env -> Wai.Middleware
diff --git a/services/gundeck/src/Gundeck/ThreadBudget.hs b/services/gundeck/src/Gundeck/ThreadBudget.hs
new file mode 100644
index 00000000000..e4085c22860
--- /dev/null
+++ b/services/gundeck/src/Gundeck/ThreadBudget.hs
@@ -0,0 +1,255 @@
+-- | Like "Brig.Budget", but in-memory, per host (not per service), and with an strict/exact
+-- upper bound.  Like https://hackage.haskell.org/package/token-bucket, but takes the entire
+-- run-time of the actions into account, not just the number of executions.
+-- http://hackage.haskell.org/package/rate-limit also looks related.
+--
+-- USE CASE: keep a lid of stalled native push notification threads.  if SNS is up, there
+-- will be many short-running executions of the action.  when SNS is down, the threads will
+-- accumulate in memory and choke the gundeck instances.  so we want to stop spawning more
+-- threads (and discard or queue native push notifications) before we run out of memory (which
+-- could cause system outages).
+--
+-- FUTUREWORK: http-client connection pools should handle this naturally and without doing
+-- anything, but instead connection pools grow infinitely until system resources (file
+-- handles, memory) are exhausted.  See
+-- https://github.com/snoyberg/http-client/issues/307#issuecomment-343829351.  We tried to fix
+-- this here: https://github.com/wireapp/wire-server/pull/609, but getting this right requires
+-- quite some digging: https://github.com/snoyberg/http-client/issues/394.  So if you ever
+-- want to figure this out properly, plan in some time for it.
+module Gundeck.ThreadBudget
+  ( ThreadBudgetState
+  , mkThreadBudgetState
+  , runWithBudget
+  , runWithBudget'
+  , watchThreadBudgetState
+
+  -- * for testing
+  , threadBudgetLimits
+  , budgetSpent
+  , cancelAllThreads
+  ) where
+
+import Imports
+
+import Control.Exception.Safe (catchAny)
+import Control.Lens
+import Control.Monad.Catch (MonadCatch)
+import Data.Metrics (Metrics)
+import Data.Metrics.Middleware (gaugeSet, path)
+import Data.Time
+import Data.UUID (UUID, toText)
+import Data.UUID.V4 (nextRandom)
+import Gundeck.Options
+import UnliftIO.Async
+import UnliftIO.Exception (finally)
+
+import qualified Data.Set as Set
+import qualified Data.HashMap.Strict as HM
+import qualified System.Logger.Class as LC
+
+
+data ThreadBudgetState = ThreadBudgetState
+  { threadBudgetLimits  :: MaxConcurrentNativePushes
+  , _threadBudgetRunning :: IORef BudgetMap
+  } deriving (Generic)
+
+-- | Store all handles for cleanup in 'watchThreadBudgetState'.
+data BudgetMap = BudgetMap
+  { bspent :: Int
+  , bmap :: HashMap UUID (Int, Maybe (Async ()))
+  }
+  deriving (Eq, Generic)
+
+-- | Instead of taking the pre-computed total budget spent of the 'BudgetMap' (O(1)), this
+-- counts all the threads that are successfully running (dropping the ones that are just about
+-- to try to grab a token).
+--
+-- WARNING: takes O(n)) and should only be used in testing.
+budgetSpent :: ThreadBudgetState -> IO Int
+budgetSpent (ThreadBudgetState _ running) = budgetSpent' <$> readIORef running
+
+budgetSpent' :: BudgetMap -> Int
+budgetSpent' = sum . fmap fst . filter (isJust . snd) . HM.elems . bmap
+
+cancelAllThreads :: ThreadBudgetState -> IO ()
+cancelAllThreads (ThreadBudgetState _ ref) = readIORef ref
+  >>= mapM_ cancel . catMaybes . fmap snd . HM.elems . bmap
+
+mkThreadBudgetState :: HasCallStack => MaxConcurrentNativePushes -> IO ThreadBudgetState
+mkThreadBudgetState limits = ThreadBudgetState limits <$> newIORef (BudgetMap 0 HM.empty)
+
+
+-- | Allocate the resources for a new action to be called (but don't call the action yet).
+allocate
+  :: IORef BudgetMap -> UUID -> Int -> MonadIO m => m Int
+allocate ref key newspent
+  = atomicModifyIORef' ref $
+      \(BudgetMap spent hm) ->
+        ( BudgetMap (spent + newspent) (HM.insert key (newspent, Nothing) hm)
+        , spent
+        )
+
+-- | Register an already-allocated action with its 'Async'.
+register
+  :: IORef BudgetMap -> UUID -> Async () -> MonadIO m => m Int
+register ref key handle
+  = atomicModifyIORef' ref $
+      \(BudgetMap spent hm) ->
+        ( BudgetMap spent (HM.adjust (_2 .~ Just handle)  key hm)
+        , spent
+        )
+
+-- | Remove an registered and/or allocated action from a 'BudgetMap'.
+unregister
+  :: IORef BudgetMap -> UUID -> MonadIO m => m ()
+unregister ref key
+  = atomicModifyIORef' ref $
+      \bhm@(BudgetMap spent hm) ->
+        case HM.lookup key hm of
+          Just (newspent, _) -> (BudgetMap (spent - newspent) (HM.delete key hm), ())
+          Nothing -> (bhm, ())
+
+
+-- | If there is budget available, execute the action synchronously; otherwise, log a warning
+-- and return immediately.  Make sure the budget state is updated accordingly both when
+-- starting and ending the execution.
+--
+-- The hard limit in the 'ThreadBudgetState' argument is guaranteed to be an upper bound for
+-- the number of concurrently spent budget tokens; surpassing the soft limit will trigger a
+-- warning, but still execute the action.  One action can use up any integer number of budget
+-- tokens, including 0 and negative.
+--
+-- The action is called in an 'Async', but 'runWithBudget' waits for it to finish so it can
+-- update the budget.
+runWithBudget
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadUnliftIO m)
+  => ThreadBudgetState -> Int -> m () -> m ()
+runWithBudget tbs spent = runWithBudget' tbs spent ()
+
+-- | More flexible variant of 'runWithBudget' that allows the action to return a value.  With
+-- a default in case of budget exhaustion.
+runWithBudget'
+  :: forall m a. (MonadIO m, LC.MonadLogger m, MonadUnliftIO m)
+  => ThreadBudgetState -> Int -> a -> m a -> m a
+runWithBudget' (ThreadBudgetState limits ref) spent fallback action = do
+  key <- liftIO nextRandom
+  (`finally` unregister ref key) $ do
+    oldsize <- allocate ref key spent
+
+    warnNoBudget (maybe False (oldsize >=) (limits ^. limitSoft))
+                 (maybe False (oldsize >=) (limits ^. limitHard))
+                 oldsize
+
+    if (maybe True (oldsize <) (limits ^. limitHard))
+        then go key oldsize
+        else pure fallback
+  where
+    go :: UUID -> Int -> m a
+    go key oldsize = do
+      LC.debug $
+        "key"   LC..= (toText key) LC.~~
+        "spent" LC..= oldsize LC.~~
+        LC.msg (LC.val "runWithBudget: go")
+
+      handle <- async action
+      _ <- register ref key (const () <$> handle)
+      wait handle
+
+    -- iff soft and/or hard limit are breached, log a warning-level message.
+    warnNoBudget :: Bool -> Bool -> Int -> m ()
+    warnNoBudget False False _ = pure ()
+    warnNoBudget soft hard oldsize = do
+      let limit = if hard then "hard" else "soft"
+      LC.warn $
+        "spent" LC..= show oldsize LC.~~
+        "soft-breach" LC..= soft LC.~~
+        "hard-breach" LC..= hard LC.~~
+        LC.msg (LC.val "runWithBudget: " <> limit <> " limit reached")
+
+
+-- | Fork a thread that checks with the given frequency if any async handles stored in the
+-- state are stale (ie., have terminated with or without exception, but not been removed).  If
+-- that happens, log a warning.
+--
+-- 'runWithBudget' should keep track of the state itself; 'watchThreadBudgetState' is solely a
+-- safety precaution to see if there aren't any corner cases we missed.
+--
+-- Also, issue some metrics.
+watchThreadBudgetState
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
+  => Metrics -> ThreadBudgetState -> NominalDiffTime -> m ()
+watchThreadBudgetState metrics (ThreadBudgetState limits ref) freq = safeForever $ do
+  recordMetrics metrics limits ref
+  removeStaleHandles ref
+  threadDelayNominalDiffTime freq
+
+recordMetrics
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
+  => Metrics -> MaxConcurrentNativePushes -> IORef BudgetMap -> m ()
+recordMetrics metrics limits ref = do
+  (BudgetMap spent _) <- readIORef ref
+  gaugeSet (fromIntegral spent) (path "net.nativepush.thread_budget_allocated") metrics
+  forM_ (limits ^. limitHard) $ \lim ->
+    gaugeSet (fromIntegral lim) (path "net.nativepush.thread_budget_hard_limit") metrics
+  forM_ (limits ^. limitSoft) $ \lim ->
+    gaugeSet (fromIntegral lim) (path "net.nativepush.thread_budget_soft_limit") metrics
+
+threadDelayNominalDiffTime :: NominalDiffTime -> MonadIO m => m ()
+threadDelayNominalDiffTime = threadDelay . round . (* 1000000) . toRational
+
+staleTolerance :: NominalDiffTime
+staleTolerance = 3
+
+-- | Get all handles for asyncs that have terminated, but not been removed from the state.  Do
+-- that again after 'staleTolerance' to make sure that we don't catch any handles that would
+-- have been removed during the 'runWithBudget' cleanup, but we were faster.  The intersection
+-- between the two rounds constitutes the legitimately stale handles: warn about them, and
+-- then remove them from the budgetmap.
+removeStaleHandles
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
+  => IORef BudgetMap -> m ()
+removeStaleHandles ref = do
+  round1 <- getStaleHandles
+  threadDelayNominalDiffTime staleTolerance
+  round2 <- getStaleHandles
+
+  let staleHandles = Set.intersection round1 round2
+
+  unless (null staleHandles) $ do
+    warnStaleHandles (Set.size staleHandles) =<< readIORef ref
+    forM_ staleHandles $ \key -> do
+      mapM_ waitCatch . join . fmap snd =<< HM.lookup key . bmap <$> readIORef ref
+      unregister ref key
+
+  isUnsanitary <- atomicModifyIORef' ref sanitize
+  when isUnsanitary . LC.warn . LC.msg . LC.val $
+    "watchThreadBudgetState: total overall thread budget diverged from async weights (repaired)."
+
+  where
+    getStaleHandles :: m (Set UUID)
+    getStaleHandles = Set.fromList . mconcat <$> do
+      handles <- HM.toList . bmap <$> readIORef ref
+      forM handles $ \case
+        (_, (_, Nothing)) -> do
+          pure []
+        (key, (_, Just handle)) -> do
+          status <- poll handle
+          pure [key | isJust status]
+
+    warnStaleHandles :: Int -> BudgetMap -> m ()
+    warnStaleHandles num (BudgetMap spent _) = LC.warn $
+      "spent" LC..= show spent
+      LC.~~ LC.msg ("watchThreadBudgetState: removed " <> show num <> " stale handles.")
+
+    -- check that overall budget matches the budget in the indiviual map entries.
+    sanitize :: BudgetMap -> (BudgetMap, Bool)
+    sanitize bm@(BudgetMap spent hm) = (BudgetMap spent' hm, spent == spent')
+      where spent' = budgetSpent' bm
+
+
+safeForever
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
+  => m () -> m ()
+safeForever action = forever $ action `catchAny` \exc -> do
+  LC.err $ "error" LC..= show exc LC.~~ LC.msg (LC.val "watchThreadBudgetState: crashed; retrying")
+  threadDelay 60000000  -- pause to keep worst-case noise in logs manageable
diff --git a/services/gundeck/test/unit/Main.hs b/services/gundeck/test/unit/Main.hs
index 36570e300c3..2e7d251a8d9 100644
--- a/services/gundeck/test/unit/Main.hs
+++ b/services/gundeck/test/unit/Main.hs
@@ -14,6 +14,7 @@ import qualified Gundeck.API
 import qualified Json
 import qualified Native
 import qualified Push
+import qualified ThreadBudget
 
 main :: IO ()
 main = withOpenSSL . defaultMain $
@@ -25,4 +26,5 @@ main = withOpenSSL . defaultMain $
         , Json.tests
         , Native.tests
         , Push.tests
+        , ThreadBudget.tests
         ]
diff --git a/services/gundeck/test/unit/MockGundeck.hs b/services/gundeck/test/unit/MockGundeck.hs
index 972049bc7d3..db02cdbd663 100644
--- a/services/gundeck/test/unit/MockGundeck.hs
+++ b/services/gundeck/test/unit/MockGundeck.hs
@@ -393,10 +393,13 @@ instance MonadPushAll MockGundeck where
   mpaPushNative = mockPushNative
   mpaForkIO = id  -- just don't fork.  (this *may* cause deadlocks in principle, but as long as it
                   -- doesn't, this is good enough for testing).
+  mpaRunWithBudget = \_ _ -> id  -- no throttling needed as long as we don't overdo it in the tests...
 
 instance MonadNativeTargets MockGundeck where
   mntgtLogErr _ = pure ()
   mntgtLookupAddresses = mockLookupAddresses
+
+instance MonadMapAsync MockGundeck where
   mntgtMapAsync f xs = Right <$$> mapM f xs  -- (no concurrency)
 
 instance MonadPushAny MockGundeck where
diff --git a/services/gundeck/test/unit/ThreadBudget.hs b/services/gundeck/test/unit/ThreadBudget.hs
new file mode 100644
index 00000000000..43c4beee6d0
--- /dev/null
+++ b/services/gundeck/test/unit/ThreadBudget.hs
@@ -0,0 +1,334 @@
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE RecordWildCards #-}
+
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+module ThreadBudget where
+
+import Imports
+
+import Control.Concurrent.Async
+import Control.Lens
+import Control.Monad.Catch (MonadCatch, catch)
+import Data.Metrics.Middleware (metrics)
+import Data.String.Conversions (cs)
+import Data.Time
+import Data.TreeDiff.Class (ToExpr)
+import GHC.Generics
+import Gundeck.ThreadBudget
+import Gundeck.Options
+import Test.QuickCheck
+import Test.QuickCheck.Monadic
+import Test.StateMachine
+import Test.Tasty
+import Test.Tasty.HUnit
+import Test.Tasty.QuickCheck
+
+import qualified System.Logger.Class as LC
+import qualified Test.StateMachine.Types as STM
+import qualified Test.StateMachine.Types.Rank2 as Rank2
+
+
+----------------------------------------------------------------------
+-- helpers
+
+newtype NumberOfThreads = NumberOfThreads { fromNumberOfThreads :: Int }
+  deriving (Eq, Ord, Show, Generic, ToExpr)
+
+-- | 'microseconds' determines how long one unit lasts.  there is a trade-off of fast
+-- vs. robust in this whole setup.  this type is supposed to help us find a good sweet spot.
+newtype MilliSeconds = MilliSeconds { fromMilliSeconds :: Int }
+  deriving (Eq, Ord, Show, Generic, ToExpr)
+
+-- toMillisecondsCeiling 0.03      == MilliSeconds 30
+-- toMillisecondsCeiling 0.003     == MilliSeconds 3
+-- toMillisecondsCeiling 0.0003    == MilliSeconds 1
+-- toMillisecondsCeiling 0.0000003 == MilliSeconds 1
+toMillisecondsCeiling :: NominalDiffTime -> MilliSeconds
+toMillisecondsCeiling = MilliSeconds . ceiling . (* 1000) . toRational
+
+milliSecondsToNominalDiffTime :: MilliSeconds -> NominalDiffTime
+milliSecondsToNominalDiffTime = fromRational . (/ 1000) . toRational . fromMilliSeconds
+
+instance Arbitrary NumberOfThreads where
+  arbitrary = NumberOfThreads <$> choose (1, 30)
+  shrink (NumberOfThreads n) = NumberOfThreads <$> filter (> 0) (shrink n)
+
+instance Arbitrary MilliSeconds where
+  arbitrary = MilliSeconds <$> choose (1, 30)
+  shrink (MilliSeconds n) = MilliSeconds <$> filter (> 0) (shrink n)
+
+
+data LogEntry = NoBudget | Debug String | Unknown String
+  deriving (Eq, Show, Generic)
+
+makePrisms ''LogEntry
+
+type LogHistory = MVar [LogEntry]
+
+
+extractLogHistory :: (HasCallStack, MonadReader LogHistory m, MonadIO m) => m [LogEntry]
+extractLogHistory = do
+  logHistory <- ask
+  liftIO $ modifyMVar logHistory (pure . ([],))
+
+expectLogHistory :: (HasCallStack, MonadReader LogHistory m, MonadIO m) => ([LogEntry] -> Bool) -> m ()
+expectLogHistory expected = do
+  logHistory <- ask
+  liftIO $ do
+    found <- modifyMVar logHistory (\found -> pure ([], found))
+    expected (filter (isn't _Debug) found) @? ("unexpected log data: " <> show found)
+
+enterLogHistory :: (HasCallStack, MonadReader LogHistory m, MonadIO m) => LogEntry -> m ()
+enterLogHistory entry = do
+  logHistory <- ask
+  liftIO $ do
+    modifyMVar_ logHistory (\found -> pure (entry : found))
+
+instance LC.MonadLogger (ReaderT LogHistory IO) where
+  log level msg = do
+    let raw :: String = cs $ LC.render LC.renderNetstr msg
+        parsed
+          | level == LC.Debug                                   = Debug raw
+          | "runWithBudget: hard limit reached" `isInfixOf` raw = NoBudget
+          | "runWithBudget: soft limit reached" `isInfixOf` raw = NoBudget
+          | otherwise                                           = Unknown raw
+    enterLogHistory parsed
+
+delayms :: MilliSeconds -> (MonadCatch m, MonadIO m) => m ()
+delayms = delay' . (* 1000) . fromMilliSeconds
+
+delayndt :: NominalDiffTime -> (MonadCatch m, MonadIO m) => m ()
+delayndt = delay' . round . (* 1000) . (* 1000) . toRational
+
+delay' :: Int -> (MonadCatch m, MonadIO m) => m ()
+delay' microsecs = threadDelay microsecs `catch` \AsyncCancelled -> pure ()
+
+burstActions
+  :: HasCallStack
+  => ThreadBudgetState
+  -> LogHistory
+  -> MilliSeconds
+  -> NumberOfThreads
+  -> (MonadIO m) => m ()
+burstActions tbs logHistory howlong (NumberOfThreads howmany)
+    = let budgeted = runWithBudget tbs 1 (delayms howlong)
+      in liftIO . replicateM_ howmany . forkIO $ runReaderT budgeted logHistory
+
+-- | Start a watcher with given params and a frequency of 10 milliseconds, so we are more
+-- likely to find weird race conditions.
+mkWatcher :: ThreadBudgetState -> LogHistory -> IO (Async ())
+mkWatcher tbs logHistory = do
+  mtr <- metrics
+  async $ runReaderT (watchThreadBudgetState mtr tbs 0.01) logHistory
+    `catch` \AsyncCancelled -> pure ()
+
+
+----------------------------------------------------------------------
+-- TOC
+
+tests :: TestTree
+tests = testGroup "thread budgets" $
+  [ testCase "unit test" testThreadBudgets
+  , testProperty "qc stm (sequential)" propSequential
+  ]
+
+
+----------------------------------------------------------------------
+-- deterministic unit test
+
+testThreadBudgets :: Assertion
+testThreadBudgets = do
+  tbs <- mkThreadBudgetState (MaxConcurrentNativePushes (Just 5) (Just 5))
+  logHistory :: LogHistory <- newMVar []
+  watcher <- mkWatcher tbs logHistory
+
+  flip runReaderT logHistory $ do
+    burstActions tbs logHistory (MilliSeconds 100) (NumberOfThreads 5)
+    delayms (MilliSeconds 10)
+    expectLogHistory null
+    liftIO $ budgetSpent tbs >>= (@=? 5)
+
+    burstActions tbs logHistory (MilliSeconds 100) (NumberOfThreads 3)
+    delayms (MilliSeconds 10)
+    expectLogHistory (== [NoBudget, NoBudget, NoBudget])
+    liftIO $ budgetSpent tbs >>= (@=? 5)
+
+    burstActions tbs logHistory (MilliSeconds 100) (NumberOfThreads 3)
+    delayms (MilliSeconds 10)
+    expectLogHistory (== [NoBudget, NoBudget, NoBudget])
+    liftIO $ budgetSpent tbs >>= (@=? 5)
+
+    delayms (MilliSeconds 80)
+
+    burstActions tbs logHistory (MilliSeconds 100) (NumberOfThreads 3)
+    delayms (MilliSeconds 10)
+    expectLogHistory null
+    liftIO $ budgetSpent tbs >>= (@=? 3)
+
+    burstActions tbs logHistory (MilliSeconds 100) (NumberOfThreads 3)
+    delayms (MilliSeconds 10)
+    expectLogHistory (== [NoBudget])
+    liftIO $ budgetSpent tbs >>= (@=? 5)
+
+  cancel watcher
+
+
+----------------------------------------------------------------------
+-- property-based state machine tests
+
+type State = Reference (Opaque (ThreadBudgetState, Async (), LogHistory))
+
+newtype Model r = Model (Maybe (State r))
+  deriving (Show, Generic)
+
+instance ToExpr (Model Symbolic)
+instance ToExpr (Model Concrete)
+
+
+data Command r
+  = Init NumberOfThreads
+  | Run (State r) NumberOfThreads MilliSeconds
+  | Wait (State r) MilliSeconds
+  | Measure (State r)
+  deriving (Show, Generic, Generic1, Rank2.Functor, Rank2.Foldable, Rank2.Traversable)
+
+data Response r
+  = InitResponse (State r)
+  | RunResponse
+  | WaitResponse
+  | MeasureResponse { rspConcreteRunning :: Int }
+  deriving (Show, Generic, Generic1, Rank2.Functor, Rank2.Foldable, Rank2.Traversable)
+
+
+generator :: HasCallStack => Model Symbolic -> Gen (Command Symbolic)
+generator (Model Nothing) = Init <$> arbitrary
+generator (Model (Just st)) =
+  oneof [ Run st <$> arbitrary <*> arbitrary
+        , Wait st <$> arbitrary
+        , pure $ Measure st
+        ]
+
+shrinker :: HasCallStack => Command Symbolic -> [Command Symbolic]
+shrinker (Init _)     = []
+shrinker (Run st n m) = Wait st (MilliSeconds 1) : (Run st <$> shrink n <*> shrink m)
+shrinker (Wait st n)  = Wait st <$> shrink n
+shrinker (Measure _)  = []
+
+
+initModel :: HasCallStack => Model r
+initModel = Model Nothing
+
+
+semantics :: HasCallStack => Command Concrete -> IO (Response Concrete)
+semantics (Init (NumberOfThreads limit))
+  = do
+    tbs <- mkThreadBudgetState (MaxConcurrentNativePushes (Just limit) (Just limit))
+    logHistory <- newMVar []
+    watcher <- mkWatcher tbs logHistory
+    pure . InitResponse . reference . Opaque $ (tbs, watcher, logHistory)
+
+-- 'Run' works asynchronously: start new threads, but return without any time passing.
+semantics (Run (opaque -> (tbs, _, logHistory)) howmany howlong)
+  = do
+    burstActions tbs logHistory howlong howmany
+    pure RunResponse
+
+-- 'Wait' makes time pass, ie. reduces the run time of running threads, and removes the ones
+-- that drop below 0.
+semantics (Wait _ howlong)
+  = do
+    delayms howlong
+    pure WaitResponse
+
+-- 'Measure' looks at the concrete state and records it into the model.
+semantics (Measure (opaque -> (tbs, _, _)))
+  = do
+    rspConcreteRunning <- budgetSpent tbs
+    pure MeasureResponse{..}
+
+
+transition :: HasCallStack => Model r -> Command r -> Response r -> Model r
+transition (Model Nothing) (Init _) (InitResponse st) = Model (Just st)
+transition (Model (Just st)) Run{} RunResponse = Model (Just st)
+transition (Model (Just st)) Wait{} WaitResponse = Model (Just st)
+transition (Model (Just st)) Measure{} MeasureResponse{..} = Model (Just st)
+transition _ _ _ = error "impossible."
+
+
+precondition :: HasCallStack => Model Symbolic -> Command Symbolic -> Logic
+precondition _ _ = Top
+
+postcondition :: HasCallStack => Model Concrete -> Command Concrete -> Response Concrete -> Logic
+postcondition (Model Nothing) Init{} InitResponse{} = Top
+postcondition (Model (Just _)) Run{} RunResponse{} = Top
+postcondition (Model (Just _)) Wait{} WaitResponse{} = Top
+postcondition model@(Model (Just _)) cmd@Measure{} resp@MeasureResponse{..}
+  = threadLimitExceeded
+  where
+    Model (Just state) = transition model cmd resp
+
+    rspThreadLimit :: Int
+    rspThreadLimit = case opaque state of
+      (tbs, _, _) -> tbs ^?! Control.Lens.to threadBudgetLimits . limitHard . _Just
+
+    -- number of running threads is never above the limit.
+    threadLimitExceeded = Annotate "thread limit exceeded" $ rspConcreteRunning .<= rspThreadLimit
+
+    -- FUTUREWORK: check that the number of running threads matches the model exactly.  looks
+    -- plausible, but when i tried to make the model rich enough to express this test i didn't
+    -- manage to sort out the timing.
+    -- syncNumRunning = Annotate "out of sync" $ rspConcreteRunning .== rspModelRunning
+
+postcondition m c r = error $ "impossible: " <> show (m, c, r)
+
+
+mock :: HasCallStack => Model Symbolic -> Command Symbolic -> GenSym (Response Symbolic)
+mock (Model Nothing) (Init _)
+  = InitResponse <$> genSym
+
+mock (Model (Just _)) Run{} = pure RunResponse
+mock (Model (Just _)) Wait{} = pure WaitResponse
+mock (Model (Just _)) Measure{} = pure MeasureResponse{..}
+  where
+    rspConcreteRunning = undefined
+    -- FUTUREWORK: mock is cool because if we do this right, it gives us a quickcheck-
+    -- validated mock component that we can use in other tests.  it appears it's not needed in
+    -- the tests in this module, though, and we will need to keep track of more of the
+    -- concrete state in the model if we want to fill in this 'undefined'.
+    --
+    -- See also: https://www.well-typed.com/blog/2019/01/qsm-in-depth/
+
+mock badmodel badcmd = error $ "impossible: " <> show (badmodel, badcmd)
+
+
+sm :: StateMachine Model Command IO Response
+sm = StateMachine
+  { STM.initModel     = initModel
+  , STM.transition    = transition
+  , STM.precondition  = precondition
+  , STM.postcondition = postcondition
+  , STM.invariant     = Nothing
+  , STM.generator     = generator
+  , STM.distribution  = Nothing
+  , STM.shrinker      = shrinker
+  , STM.semantics     = semantics
+  , STM.mock          = mock
+  }
+
+
+-- | Remove resources created by the concrete 'STM.Commands', namely watcher and budgeted
+-- async threads.
+shutdown :: Model Concrete -> MonadIO m => m ()
+shutdown (Model Nothing) = pure ()
+shutdown (Model (Just (opaque -> (tbs, watcher, _)))) = liftIO $ do
+  cancelAllThreads tbs
+  cancel watcher
+
+-- | FUTUREWORK: in this use case of quickcheck-state-machine it may be more interesting to
+-- look at fewer, but longer command sequences.
+propSequential :: Property
+propSequential = forAllCommands sm Nothing $ \cmds -> monadicIO $ do
+  (hist, model, res) <- runCommands sm cmds
+  shutdown model
+  prettyCommands sm hist (checkCommandNames cmds (res === Ok))
diff --git a/stack.yaml b/stack.yaml
index a617362e07e..ee6dff9619a 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -58,6 +58,7 @@ extra-deps:
   commit: c0bcbe8ae5bb6fdc0b5b94f640f63a615c068cbf    # master (Apr 25, 2019)
 - git: https://github.com/wireapp/hscim
   commit: 6b98b894c127eed4a5bde646ebf20febcfa656fa    # master (Apr 2, 2019)
+- quickcheck-state-machine-0.4.2
 
 flags:
   types-common:

From c4f7eb73da4c42424833e02498d01cbc5fc79363 Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Tue, 22 Oct 2019 15:15:06 +0200
Subject: [PATCH 11/26] Fix detail in swagger docs (#885)

Co-Authored-By: Tiago Manuel Ventura Loureiro <tiago.loureiro@gmail.com>
---
 services/galley/src/Galley/API.hs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/services/galley/src/Galley/API.hs b/services/galley/src/Galley/API.hs
index 2b7f698e882..4b304512d9a 100644
--- a/services/galley/src/Galley/API.hs
+++ b/services/galley/src/Galley/API.hs
@@ -200,6 +200,7 @@ sitemap = do
         body (ref TeamsModel.teamMemberDelete) $ do
             optional
             description "JSON body, required only for binding teams."
+        response 202 "Team member scheduled for deletion" end
         errorResponse Error.noTeamMember
         errorResponse (Error.operationDenied RemoveTeamMember)
         errorResponse Error.reAuthFailed

From ff67fc55404bce5af7f416360448084190382b66 Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Thu, 24 Oct 2019 12:31:56 +0200
Subject: [PATCH 12/26] add issue template (#889)

---
 .../ISSUE_TEMPLATE/question-installation.md   | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/question-installation.md

diff --git a/.github/ISSUE_TEMPLATE/question-installation.md b/.github/ISSUE_TEMPLATE/question-installation.md
new file mode 100644
index 00000000000..4becf8d53fd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/question-installation.md
@@ -0,0 +1,37 @@
+---
+name: Question-Installation
+about: Question about my installation of wire-server
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+* [ ] I have seen https://docs.wire.com/ and https://github.com/wireapp/wire-server-deploy - the documentation there does not answer my question.
+
+## My question:
+
+## Context:
+
+> Please provide sufficent context about your problem:
+
+### How did you install wire-server?
+
+> On kubernetes? With docker-compose? by manually compiling and running?
+
+### How many servers are involved?
+
+### What is installed on which servers?
+
+> E.g Server A has component X and server B has component Y
+
+### Provide details about networking
+
+> We don't need to know any specific IP address, but it helps if you provide information whether an IP is ipv4 or ipv6, whether is is publicly reachable from the global internet or not, and if you installed any component of wire-server, which network interfaces are processes listening on?
+
+### How did you configure wire-server?
+
+> *Note: only the configuration from helm charts in wire-server-deploy is what we support, like [these defaults](https://github.com/wireapp/wire-server-deploy/blob/develop/charts/brig/values.yaml) applied [here](https://github.com/wireapp/wire-server-deploy/blob/develop/charts/brig/templates/configmap.yaml) in the case of `brig`. If you have used other configuration files, please post them (or the relevant parts of them).*
+> Did you use the helm charts from wire-server-deploy?
+> Did you use and adapt configuration files from wire-server? If so, which ones?
+> Are there any overrides?

From 32c605730eff3d9df1bb4d7e2dc5b3e517b3eb00 Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Thu, 24 Oct 2019 12:42:04 +0200
Subject: [PATCH 13/26] Update README (#887)

---
 tools/db/migrate-sso-feature-flag/README.md | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/db/migrate-sso-feature-flag/README.md b/tools/db/migrate-sso-feature-flag/README.md
index 9c11ec2cc57..13f1f32628d 100644
--- a/tools/db/migrate-sso-feature-flag/README.md
+++ b/tools/db/migrate-sso-feature-flag/README.md
@@ -1,3 +1,11 @@
 A clone of ../service-backfill/ that enables the SSO feature flag for
-all teams that already have an IdP.  See README.md there on how to run
-this.
+all teams that already have an IdP.
+
+Connects to galley and spar databases.
+
+Set up port-forwarding to spar and galley databases (hacky, slow, maybe dangerous), or run from a machine with access to those databases (preferred approach). Refer to ../service-backfill/ for an example. Then:
+
+```sh
+# assuming local port forwarding cassandra_galley on 2021 and cassandra_spar on 2022:
+./dist/migrate-sso-feature-flag --cassandra-host-spar localhost --cassandra-port-spar 2022 --cassandra-keyspace-spar spar --cassandra-host-galley localhost --cassandra-port-galley 2021 --cassandra-keyspace-galley galley
+```

From 28f6deee4816d063cff1d20258244f14515fc4f9 Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Thu, 24 Oct 2019 12:42:40 +0200
Subject: [PATCH 14/26] fix incorrect boolean logic (#890)

Currently, we get spammed with these warnings before any traffic
happens, due to an incorrect handling of boolean.

[gundeck] W, request=N/A, watchThreadBudgetState: total overall thread budget diverged from async weights (repaired).
[gundeck] W, request=N/A, watchThreadBudgetState: total overall thread budget diverged from async weights (repaired).
[gundeck] W, request=N/A, watchThreadBudgetState: total overall thread budget diverged from async weights (repaired).
---
 services/gundeck/src/Gundeck/ThreadBudget.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/services/gundeck/src/Gundeck/ThreadBudget.hs b/services/gundeck/src/Gundeck/ThreadBudget.hs
index e4085c22860..636ce2a1788 100644
--- a/services/gundeck/src/Gundeck/ThreadBudget.hs
+++ b/services/gundeck/src/Gundeck/ThreadBudget.hs
@@ -221,8 +221,8 @@ removeStaleHandles ref = do
       mapM_ waitCatch . join . fmap snd =<< HM.lookup key . bmap <$> readIORef ref
       unregister ref key
 
-  isUnsanitary <- atomicModifyIORef' ref sanitize
-  when isUnsanitary . LC.warn . LC.msg . LC.val $
+  isSanitary <- atomicModifyIORef' ref sanitize
+  unless isSanitary . LC.warn . LC.msg . LC.val $
     "watchThreadBudgetState: total overall thread budget diverged from async weights (repaired)."
 
   where

From a95becdf5794897ff04489ae819e71f7660f2e84 Mon Sep 17 00:00:00 2001
From: fisx <mf@zerobuzz.net>
Date: Fri, 25 Oct 2019 10:06:54 +0200
Subject: [PATCH 15/26] benchmarking for O(n) function, remove expensive call
 from atomicModifyIORef (#891)

* Add ad-hoc bench for O(n) function

* Fix: do not block ThreadBudgetState for expensive sanity check

* move benchmarking code out from production code, do real benchmarks
---
 services/gundeck/package.yaml                |  3 +++
 services/gundeck/src/Gundeck/ThreadBudget.hs | 15 ++++++-------
 services/gundeck/test/bench/Main.hs          | 23 ++++++++++++++++++++
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/services/gundeck/package.yaml b/services/gundeck/package.yaml
index bb00596df9e..6cdac2ce3bd 100644
--- a/services/gundeck/package.yaml
+++ b/services/gundeck/package.yaml
@@ -224,9 +224,12 @@ benchmarks:
     - gundeck
     - gundeck-types
     - HsOpenSSL
+    - random
     - text
     - types-common
     - unordered-containers
+    - time
+    - uuid
 flags:
   static:
     description: Enable static linking
diff --git a/services/gundeck/src/Gundeck/ThreadBudget.hs b/services/gundeck/src/Gundeck/ThreadBudget.hs
index 636ce2a1788..7257cb090f8 100644
--- a/services/gundeck/src/Gundeck/ThreadBudget.hs
+++ b/services/gundeck/src/Gundeck/ThreadBudget.hs
@@ -23,10 +23,14 @@ module Gundeck.ThreadBudget
   , runWithBudget'
   , watchThreadBudgetState
 
-  -- * for testing
+  -- * for testing: FUTUREWORK: factor out into a library?
   , threadBudgetLimits
   , budgetSpent
+  , budgetSpent'
+  , _threadBudgetRunning
   , cancelAllThreads
+  , allocate
+  , BudgetMap
   ) where
 
 import Imports
@@ -64,7 +68,7 @@ data BudgetMap = BudgetMap
 -- counts all the threads that are successfully running (dropping the ones that are just about
 -- to try to grab a token).
 --
--- WARNING: takes O(n)) and should only be used in testing.
+-- WARNING: takes O(n)), use with care.  See 'bench_BudgetSpent''.
 budgetSpent :: ThreadBudgetState -> IO Int
 budgetSpent (ThreadBudgetState _ running) = budgetSpent' <$> readIORef running
 
@@ -221,7 +225,7 @@ removeStaleHandles ref = do
       mapM_ waitCatch . join . fmap snd =<< HM.lookup key . bmap <$> readIORef ref
       unregister ref key
 
-  isSanitary <- atomicModifyIORef' ref sanitize
+  isSanitary <- (\bm -> bspent bm == budgetSpent' bm) <$> readIORef ref
   unless isSanitary . LC.warn . LC.msg . LC.val $
     "watchThreadBudgetState: total overall thread budget diverged from async weights (repaired)."
 
@@ -241,11 +245,6 @@ removeStaleHandles ref = do
       "spent" LC..= show spent
       LC.~~ LC.msg ("watchThreadBudgetState: removed " <> show num <> " stale handles.")
 
-    -- check that overall budget matches the budget in the indiviual map entries.
-    sanitize :: BudgetMap -> (BudgetMap, Bool)
-    sanitize bm@(BudgetMap spent hm) = (BudgetMap spent' hm, spent == spent')
-      where spent' = budgetSpent' bm
-
 
 safeForever
   :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
diff --git a/services/gundeck/test/bench/Main.hs b/services/gundeck/test/bench/Main.hs
index 98a3e775285..2d3badd4d79 100644
--- a/services/gundeck/test/bench/Main.hs
+++ b/services/gundeck/test/bench/Main.hs
@@ -6,17 +6,26 @@ import Data.Id (randomId, ConnId (..), ClientId (..))
 import Gundeck.Types.Push
 import Gundeck.Push.Native.Serialise
 import Gundeck.Push.Native.Types
+import Gundeck.ThreadBudget
+import Gundeck.Options
 import Network.AWS (Region (Ireland))
 import OpenSSL (withOpenSSL)
+import System.Random (randomRIO)
+import Data.UUID.V4 (nextRandom)
 
 import qualified Data.Text.Lazy      as LT
 
 main :: IO ()
 main = withOpenSSL $ do
+    prepared <- prepareBudgetState (100000)
     defaultMain [
         bgroup "notice"
             [ bench "32"   $ nfIO notice
             ]
+        , bgroup "ThreadBudget"
+            [ bench "budgetSpent'" $ nfIO (bench_BudgetSpent' prepared)
+            , bench "prepare + budgetSpent'" $ nfIO (bench_BudgetSpent' =<< prepareBudgetState 100000)
+            ]
         ]
 
 -----------------------------------------------------------------------------
@@ -30,6 +39,11 @@ notice = do
     Right txt <- serialise msg a
     return $! LT.toStrict txt
 
+bench_BudgetSpent' :: IORef BudgetMap -> IO ()
+bench_BudgetSpent' ref = do
+    budgetmap <- readIORef ref
+    void $ return $ budgetSpent' budgetmap
+
 -----------------------------------------------------------------------------
 -- Utilities
 
@@ -47,3 +61,12 @@ mkEndpoint :: Transport -> AppName -> EndpointArn
 mkEndpoint t a = mkSnsArn Ireland (Account "test") topic
   where
     topic = mkEndpointTopic (ArnEnv "test") t a (EndpointId "test")
+
+prepareBudgetState :: Int -> IO (IORef BudgetMap)
+prepareBudgetState size = do
+    ref <- _threadBudgetRunning <$> mkThreadBudgetState (MaxConcurrentNativePushes Nothing Nothing)
+    forM_ [1..size] $ \_ -> do
+        key <- nextRandom
+        weight <- randomRIO (1, 1000)
+        allocate ref key weight
+    return ref

From a6ed328e13a97fa90e2efd26c8497d039787300b Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Sun, 27 Oct 2019 00:37:13 +0200
Subject: [PATCH 16/26] emit counter metric on soft/hard limit breached (#892)

* emit counter metric on soft/hard limit breached

* Cleanup exports

* More related work
---
 services/gundeck/src/Gundeck/Push.hs          |   3 +-
 services/gundeck/src/Gundeck/ThreadBudget.hs  | 233 +-----------------
 .../src/Gundeck/ThreadBudget/Internal.hs      | 223 +++++++++++++++++
 services/gundeck/test/bench/Main.hs           |   2 +-
 services/gundeck/test/unit/ThreadBudget.hs    |   9 +-
 5 files changed, 236 insertions(+), 234 deletions(-)
 create mode 100644 services/gundeck/src/Gundeck/ThreadBudget/Internal.hs

diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs
index be3fb5f37f6..f3dc5e73d6b 100644
--- a/services/gundeck/src/Gundeck/Push.hs
+++ b/services/gundeck/src/Gundeck/Push.hs
@@ -91,9 +91,10 @@ instance MonadPushAll Gundeck where
 -- | Another layer of wrap around 'runWithBudget'.
 runWithBudget'' :: Int -> a -> Gundeck a -> Gundeck a
 runWithBudget'' budget fallback action = do
+  metrics <- view monitor
   view threadBudgetState >>= \case
     Nothing  -> action
-    Just tbs -> runWithBudget' tbs budget fallback action
+    Just tbs -> runWithBudget' metrics tbs budget fallback action
 
 
 -- | Abstract over all effects in 'nativeTargets' (for unit testing).
diff --git a/services/gundeck/src/Gundeck/ThreadBudget.hs b/services/gundeck/src/Gundeck/ThreadBudget.hs
index 7257cb090f8..d7aabfe8b4d 100644
--- a/services/gundeck/src/Gundeck/ThreadBudget.hs
+++ b/services/gundeck/src/Gundeck/ThreadBudget.hs
@@ -3,6 +3,10 @@
 -- run-time of the actions into account, not just the number of executions.
 -- http://hackage.haskell.org/package/rate-limit also looks related.
 --
+-- FUTUREWORK: https://github.com/layer-3-communications/lockpool seems like almost exactly
+-- the same thing, but I only found this after ThreadBudget was done.  Before considering to
+-- ThreadBudget a standalone package, take a closer look at lockpool!
+--
 -- USE CASE: keep a lid of stalled native push notification threads.  if SNS is up, there
 -- will be many short-running executions of the action.  when SNS is down, the threads will
 -- accumulate in memory and choke the gundeck instances.  so we want to stop spawning more
@@ -22,233 +26,6 @@ module Gundeck.ThreadBudget
   , runWithBudget
   , runWithBudget'
   , watchThreadBudgetState
-
-  -- * for testing: FUTUREWORK: factor out into a library?
-  , threadBudgetLimits
-  , budgetSpent
-  , budgetSpent'
-  , _threadBudgetRunning
-  , cancelAllThreads
-  , allocate
-  , BudgetMap
   ) where
 
-import Imports
-
-import Control.Exception.Safe (catchAny)
-import Control.Lens
-import Control.Monad.Catch (MonadCatch)
-import Data.Metrics (Metrics)
-import Data.Metrics.Middleware (gaugeSet, path)
-import Data.Time
-import Data.UUID (UUID, toText)
-import Data.UUID.V4 (nextRandom)
-import Gundeck.Options
-import UnliftIO.Async
-import UnliftIO.Exception (finally)
-
-import qualified Data.Set as Set
-import qualified Data.HashMap.Strict as HM
-import qualified System.Logger.Class as LC
-
-
-data ThreadBudgetState = ThreadBudgetState
-  { threadBudgetLimits  :: MaxConcurrentNativePushes
-  , _threadBudgetRunning :: IORef BudgetMap
-  } deriving (Generic)
-
--- | Store all handles for cleanup in 'watchThreadBudgetState'.
-data BudgetMap = BudgetMap
-  { bspent :: Int
-  , bmap :: HashMap UUID (Int, Maybe (Async ()))
-  }
-  deriving (Eq, Generic)
-
--- | Instead of taking the pre-computed total budget spent of the 'BudgetMap' (O(1)), this
--- counts all the threads that are successfully running (dropping the ones that are just about
--- to try to grab a token).
---
--- WARNING: takes O(n)), use with care.  See 'bench_BudgetSpent''.
-budgetSpent :: ThreadBudgetState -> IO Int
-budgetSpent (ThreadBudgetState _ running) = budgetSpent' <$> readIORef running
-
-budgetSpent' :: BudgetMap -> Int
-budgetSpent' = sum . fmap fst . filter (isJust . snd) . HM.elems . bmap
-
-cancelAllThreads :: ThreadBudgetState -> IO ()
-cancelAllThreads (ThreadBudgetState _ ref) = readIORef ref
-  >>= mapM_ cancel . catMaybes . fmap snd . HM.elems . bmap
-
-mkThreadBudgetState :: HasCallStack => MaxConcurrentNativePushes -> IO ThreadBudgetState
-mkThreadBudgetState limits = ThreadBudgetState limits <$> newIORef (BudgetMap 0 HM.empty)
-
-
--- | Allocate the resources for a new action to be called (but don't call the action yet).
-allocate
-  :: IORef BudgetMap -> UUID -> Int -> MonadIO m => m Int
-allocate ref key newspent
-  = atomicModifyIORef' ref $
-      \(BudgetMap spent hm) ->
-        ( BudgetMap (spent + newspent) (HM.insert key (newspent, Nothing) hm)
-        , spent
-        )
-
--- | Register an already-allocated action with its 'Async'.
-register
-  :: IORef BudgetMap -> UUID -> Async () -> MonadIO m => m Int
-register ref key handle
-  = atomicModifyIORef' ref $
-      \(BudgetMap spent hm) ->
-        ( BudgetMap spent (HM.adjust (_2 .~ Just handle)  key hm)
-        , spent
-        )
-
--- | Remove an registered and/or allocated action from a 'BudgetMap'.
-unregister
-  :: IORef BudgetMap -> UUID -> MonadIO m => m ()
-unregister ref key
-  = atomicModifyIORef' ref $
-      \bhm@(BudgetMap spent hm) ->
-        case HM.lookup key hm of
-          Just (newspent, _) -> (BudgetMap (spent - newspent) (HM.delete key hm), ())
-          Nothing -> (bhm, ())
-
-
--- | If there is budget available, execute the action synchronously; otherwise, log a warning
--- and return immediately.  Make sure the budget state is updated accordingly both when
--- starting and ending the execution.
---
--- The hard limit in the 'ThreadBudgetState' argument is guaranteed to be an upper bound for
--- the number of concurrently spent budget tokens; surpassing the soft limit will trigger a
--- warning, but still execute the action.  One action can use up any integer number of budget
--- tokens, including 0 and negative.
---
--- The action is called in an 'Async', but 'runWithBudget' waits for it to finish so it can
--- update the budget.
-runWithBudget
-  :: forall m. (MonadIO m, LC.MonadLogger m, MonadUnliftIO m)
-  => ThreadBudgetState -> Int -> m () -> m ()
-runWithBudget tbs spent = runWithBudget' tbs spent ()
-
--- | More flexible variant of 'runWithBudget' that allows the action to return a value.  With
--- a default in case of budget exhaustion.
-runWithBudget'
-  :: forall m a. (MonadIO m, LC.MonadLogger m, MonadUnliftIO m)
-  => ThreadBudgetState -> Int -> a -> m a -> m a
-runWithBudget' (ThreadBudgetState limits ref) spent fallback action = do
-  key <- liftIO nextRandom
-  (`finally` unregister ref key) $ do
-    oldsize <- allocate ref key spent
-
-    warnNoBudget (maybe False (oldsize >=) (limits ^. limitSoft))
-                 (maybe False (oldsize >=) (limits ^. limitHard))
-                 oldsize
-
-    if (maybe True (oldsize <) (limits ^. limitHard))
-        then go key oldsize
-        else pure fallback
-  where
-    go :: UUID -> Int -> m a
-    go key oldsize = do
-      LC.debug $
-        "key"   LC..= (toText key) LC.~~
-        "spent" LC..= oldsize LC.~~
-        LC.msg (LC.val "runWithBudget: go")
-
-      handle <- async action
-      _ <- register ref key (const () <$> handle)
-      wait handle
-
-    -- iff soft and/or hard limit are breached, log a warning-level message.
-    warnNoBudget :: Bool -> Bool -> Int -> m ()
-    warnNoBudget False False _ = pure ()
-    warnNoBudget soft hard oldsize = do
-      let limit = if hard then "hard" else "soft"
-      LC.warn $
-        "spent" LC..= show oldsize LC.~~
-        "soft-breach" LC..= soft LC.~~
-        "hard-breach" LC..= hard LC.~~
-        LC.msg (LC.val "runWithBudget: " <> limit <> " limit reached")
-
-
--- | Fork a thread that checks with the given frequency if any async handles stored in the
--- state are stale (ie., have terminated with or without exception, but not been removed).  If
--- that happens, log a warning.
---
--- 'runWithBudget' should keep track of the state itself; 'watchThreadBudgetState' is solely a
--- safety precaution to see if there aren't any corner cases we missed.
---
--- Also, issue some metrics.
-watchThreadBudgetState
-  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
-  => Metrics -> ThreadBudgetState -> NominalDiffTime -> m ()
-watchThreadBudgetState metrics (ThreadBudgetState limits ref) freq = safeForever $ do
-  recordMetrics metrics limits ref
-  removeStaleHandles ref
-  threadDelayNominalDiffTime freq
-
-recordMetrics
-  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
-  => Metrics -> MaxConcurrentNativePushes -> IORef BudgetMap -> m ()
-recordMetrics metrics limits ref = do
-  (BudgetMap spent _) <- readIORef ref
-  gaugeSet (fromIntegral spent) (path "net.nativepush.thread_budget_allocated") metrics
-  forM_ (limits ^. limitHard) $ \lim ->
-    gaugeSet (fromIntegral lim) (path "net.nativepush.thread_budget_hard_limit") metrics
-  forM_ (limits ^. limitSoft) $ \lim ->
-    gaugeSet (fromIntegral lim) (path "net.nativepush.thread_budget_soft_limit") metrics
-
-threadDelayNominalDiffTime :: NominalDiffTime -> MonadIO m => m ()
-threadDelayNominalDiffTime = threadDelay . round . (* 1000000) . toRational
-
-staleTolerance :: NominalDiffTime
-staleTolerance = 3
-
--- | Get all handles for asyncs that have terminated, but not been removed from the state.  Do
--- that again after 'staleTolerance' to make sure that we don't catch any handles that would
--- have been removed during the 'runWithBudget' cleanup, but we were faster.  The intersection
--- between the two rounds constitutes the legitimately stale handles: warn about them, and
--- then remove them from the budgetmap.
-removeStaleHandles
-  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
-  => IORef BudgetMap -> m ()
-removeStaleHandles ref = do
-  round1 <- getStaleHandles
-  threadDelayNominalDiffTime staleTolerance
-  round2 <- getStaleHandles
-
-  let staleHandles = Set.intersection round1 round2
-
-  unless (null staleHandles) $ do
-    warnStaleHandles (Set.size staleHandles) =<< readIORef ref
-    forM_ staleHandles $ \key -> do
-      mapM_ waitCatch . join . fmap snd =<< HM.lookup key . bmap <$> readIORef ref
-      unregister ref key
-
-  isSanitary <- (\bm -> bspent bm == budgetSpent' bm) <$> readIORef ref
-  unless isSanitary . LC.warn . LC.msg . LC.val $
-    "watchThreadBudgetState: total overall thread budget diverged from async weights (repaired)."
-
-  where
-    getStaleHandles :: m (Set UUID)
-    getStaleHandles = Set.fromList . mconcat <$> do
-      handles <- HM.toList . bmap <$> readIORef ref
-      forM handles $ \case
-        (_, (_, Nothing)) -> do
-          pure []
-        (key, (_, Just handle)) -> do
-          status <- poll handle
-          pure [key | isJust status]
-
-    warnStaleHandles :: Int -> BudgetMap -> m ()
-    warnStaleHandles num (BudgetMap spent _) = LC.warn $
-      "spent" LC..= show spent
-      LC.~~ LC.msg ("watchThreadBudgetState: removed " <> show num <> " stale handles.")
-
-
-safeForever
-  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
-  => m () -> m ()
-safeForever action = forever $ action `catchAny` \exc -> do
-  LC.err $ "error" LC..= show exc LC.~~ LC.msg (LC.val "watchThreadBudgetState: crashed; retrying")
-  threadDelay 60000000  -- pause to keep worst-case noise in logs manageable
+import Gundeck.ThreadBudget.Internal
diff --git a/services/gundeck/src/Gundeck/ThreadBudget/Internal.hs b/services/gundeck/src/Gundeck/ThreadBudget/Internal.hs
new file mode 100644
index 00000000000..b8bfa822b78
--- /dev/null
+++ b/services/gundeck/src/Gundeck/ThreadBudget/Internal.hs
@@ -0,0 +1,223 @@
+module Gundeck.ThreadBudget.Internal where
+
+import Imports
+
+import Control.Exception.Safe (catchAny)
+import Control.Lens
+import Control.Monad.Catch (MonadCatch)
+import Data.Metrics (Metrics, counterIncr)
+import Data.Metrics.Middleware (gaugeSet, path)
+import Data.Time
+import Data.UUID (UUID, toText)
+import Data.UUID.V4 (nextRandom)
+import Gundeck.Options
+import UnliftIO.Async
+import UnliftIO.Exception (finally)
+
+import qualified Data.Set as Set
+import qualified Data.HashMap.Strict as HM
+import qualified System.Logger.Class as LC
+
+
+data ThreadBudgetState = ThreadBudgetState
+  { threadBudgetLimits  :: MaxConcurrentNativePushes
+  , _threadBudgetRunning :: IORef BudgetMap
+  } deriving (Generic)
+
+-- | Store all handles for cleanup in 'watchThreadBudgetState'.
+data BudgetMap = BudgetMap
+  { bspent :: Int
+  , bmap :: HashMap UUID (Int, Maybe (Async ()))
+  }
+  deriving (Eq, Generic)
+
+-- | Instead of taking the pre-computed total budget spent of the 'BudgetMap' (O(1)), this
+-- counts all the threads that are successfully running (dropping the ones that are just about
+-- to try to grab a token).
+--
+-- WARNING: takes O(n)), use with care.  See 'bench_BudgetSpent''.
+budgetSpent :: ThreadBudgetState -> IO Int
+budgetSpent (ThreadBudgetState _ running) = budgetSpent' <$> readIORef running
+
+budgetSpent' :: BudgetMap -> Int
+budgetSpent' = sum . fmap fst . filter (isJust . snd) . HM.elems . bmap
+
+cancelAllThreads :: ThreadBudgetState -> IO ()
+cancelAllThreads (ThreadBudgetState _ ref) = readIORef ref
+  >>= mapM_ cancel . catMaybes . fmap snd . HM.elems . bmap
+
+mkThreadBudgetState :: HasCallStack => MaxConcurrentNativePushes -> IO ThreadBudgetState
+mkThreadBudgetState limits = ThreadBudgetState limits <$> newIORef (BudgetMap 0 HM.empty)
+
+
+-- | Allocate the resources for a new action to be called (but don't call the action yet).
+allocate
+  :: IORef BudgetMap -> UUID -> Int -> MonadIO m => m Int
+allocate ref key newspent
+  = atomicModifyIORef' ref $
+      \(BudgetMap spent hm) ->
+        ( BudgetMap (spent + newspent) (HM.insert key (newspent, Nothing) hm)
+        , spent
+        )
+
+-- | Register an already-allocated action with its 'Async'.
+register
+  :: IORef BudgetMap -> UUID -> Async () -> MonadIO m => m Int
+register ref key handle
+  = atomicModifyIORef' ref $
+      \(BudgetMap spent hm) ->
+        ( BudgetMap spent (HM.adjust (_2 .~ Just handle)  key hm)
+        , spent
+        )
+
+-- | Remove an registered and/or allocated action from a 'BudgetMap'.
+unregister
+  :: IORef BudgetMap -> UUID -> MonadIO m => m ()
+unregister ref key
+  = atomicModifyIORef' ref $
+      \bhm@(BudgetMap spent hm) ->
+        case HM.lookup key hm of
+          Just (newspent, _) -> (BudgetMap (spent - newspent) (HM.delete key hm), ())
+          Nothing -> (bhm, ())
+
+
+-- | If there is budget available, execute the action synchronously; otherwise, log a warning
+-- and return immediately.  Make sure the budget state is updated accordingly both when
+-- starting and ending the execution.
+--
+-- The hard limit in the 'ThreadBudgetState' argument is guaranteed to be an upper bound for
+-- the number of concurrently spent budget tokens; surpassing the soft limit will trigger a
+-- warning, but still execute the action.  One action can use up any integer number of budget
+-- tokens, including 0 and negative.
+--
+-- The action is called in an 'Async', but 'runWithBudget' waits for it to finish so it can
+-- update the budget.
+runWithBudget
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadUnliftIO m)
+  => Metrics -> ThreadBudgetState -> Int -> m () -> m ()
+runWithBudget metrics tbs spent = runWithBudget' metrics tbs spent ()
+
+-- | More flexible variant of 'runWithBudget' that allows the action to return a value.  With
+-- a default in case of budget exhaustion.
+runWithBudget'
+  :: forall m a. (MonadIO m, LC.MonadLogger m, MonadUnliftIO m)
+  => Metrics -> ThreadBudgetState -> Int -> a -> m a -> m a
+runWithBudget' metrics (ThreadBudgetState limits ref) spent fallback action = do
+  key <- liftIO nextRandom
+  (`finally` unregister ref key) $ do
+    oldsize <- allocate ref key spent
+
+    let softLimitBreached = maybe False (oldsize >=) (limits ^. limitSoft)
+        hardLimitBreached = maybe False (oldsize >=) (limits ^. limitHard)
+    warnNoBudget softLimitBreached hardLimitBreached oldsize
+
+    if (maybe True (oldsize <) (limits ^. limitHard))
+        then go key oldsize
+        else pure fallback
+  where
+    go :: UUID -> Int -> m a
+    go key oldsize = do
+      LC.debug $
+        "key"   LC..= (toText key) LC.~~
+        "spent" LC..= oldsize LC.~~
+        LC.msg (LC.val "runWithBudget: go")
+
+      handle <- async action
+      _ <- register ref key (const () <$> handle)
+      wait handle
+
+    -- iff soft and/or hard limit are breached, log a warning-level message.
+    warnNoBudget :: Bool -> Bool -> Int -> m ()
+    warnNoBudget False False _ = pure ()
+    warnNoBudget soft hard oldsize = do
+      let limit = if hard then "hard" else "soft"
+          metric = "net.nativepush." <> limit <> "_limit_breached"
+      counterIncr (path metric) metrics
+      LC.warn $
+        "spent" LC..= show oldsize LC.~~
+        "soft-breach" LC..= soft LC.~~
+        "hard-breach" LC..= hard LC.~~
+        LC.msg ("runWithBudget: " <> limit <> " limit reached")
+
+
+-- | Fork a thread that checks with the given frequency if any async handles stored in the
+-- state are stale (ie., have terminated with or without exception, but not been removed).  If
+-- that happens, log a warning.
+--
+-- 'runWithBudget' should keep track of the state itself; 'watchThreadBudgetState' is solely a
+-- safety precaution to see if there aren't any corner cases we missed.
+--
+-- Also, issue some metrics.
+watchThreadBudgetState
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
+  => Metrics -> ThreadBudgetState -> NominalDiffTime -> m ()
+watchThreadBudgetState metrics (ThreadBudgetState limits ref) freq = safeForever $ do
+  recordMetrics metrics limits ref
+  removeStaleHandles ref
+  threadDelayNominalDiffTime freq
+
+recordMetrics
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
+  => Metrics -> MaxConcurrentNativePushes -> IORef BudgetMap -> m ()
+recordMetrics metrics limits ref = do
+  (BudgetMap spent _) <- readIORef ref
+  gaugeSet (fromIntegral spent) (path "net.nativepush.thread_budget_allocated") metrics
+  forM_ (limits ^. limitHard) $ \lim ->
+    gaugeSet (fromIntegral lim) (path "net.nativepush.thread_budget_hard_limit") metrics
+  forM_ (limits ^. limitSoft) $ \lim ->
+    gaugeSet (fromIntegral lim) (path "net.nativepush.thread_budget_soft_limit") metrics
+
+threadDelayNominalDiffTime :: NominalDiffTime -> MonadIO m => m ()
+threadDelayNominalDiffTime = threadDelay . round . (* 1000000) . toRational
+
+staleTolerance :: NominalDiffTime
+staleTolerance = 3
+
+-- | Get all handles for asyncs that have terminated, but not been removed from the state.  Do
+-- that again after 'staleTolerance' to make sure that we don't catch any handles that would
+-- have been removed during the 'runWithBudget' cleanup, but we were faster.  The intersection
+-- between the two rounds constitutes the legitimately stale handles: warn about them, and
+-- then remove them from the budgetmap.
+removeStaleHandles
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
+  => IORef BudgetMap -> m ()
+removeStaleHandles ref = do
+  round1 <- getStaleHandles
+  threadDelayNominalDiffTime staleTolerance
+  round2 <- getStaleHandles
+
+  let staleHandles = Set.intersection round1 round2
+
+  unless (null staleHandles) $ do
+    warnStaleHandles (Set.size staleHandles) =<< readIORef ref
+    forM_ staleHandles $ \key -> do
+      mapM_ waitCatch . join . fmap snd =<< HM.lookup key . bmap <$> readIORef ref
+      unregister ref key
+
+  isSanitary <- (\bm -> bspent bm == budgetSpent' bm) <$> readIORef ref
+  unless isSanitary . LC.warn . LC.msg . LC.val $
+    "watchThreadBudgetState: total overall thread budget diverged from async weights (repaired)."
+
+  where
+    getStaleHandles :: m (Set UUID)
+    getStaleHandles = Set.fromList . mconcat <$> do
+      handles <- HM.toList . bmap <$> readIORef ref
+      forM handles $ \case
+        (_, (_, Nothing)) -> do
+          pure []
+        (key, (_, Just handle)) -> do
+          status <- poll handle
+          pure [key | isJust status]
+
+    warnStaleHandles :: Int -> BudgetMap -> m ()
+    warnStaleHandles num (BudgetMap spent _) = LC.warn $
+      "spent" LC..= show spent
+      LC.~~ LC.msg ("watchThreadBudgetState: removed " <> show num <> " stale handles.")
+
+
+safeForever
+  :: forall m. (MonadIO m, LC.MonadLogger m, MonadCatch m)
+  => m () -> m ()
+safeForever action = forever $ action `catchAny` \exc -> do
+  LC.err $ "error" LC..= show exc LC.~~ LC.msg (LC.val "watchThreadBudgetState: crashed; retrying")
+  threadDelay 60000000  -- pause to keep worst-case noise in logs manageable
diff --git a/services/gundeck/test/bench/Main.hs b/services/gundeck/test/bench/Main.hs
index 2d3badd4d79..712293407ff 100644
--- a/services/gundeck/test/bench/Main.hs
+++ b/services/gundeck/test/bench/Main.hs
@@ -6,7 +6,7 @@ import Data.Id (randomId, ConnId (..), ClientId (..))
 import Gundeck.Types.Push
 import Gundeck.Push.Native.Serialise
 import Gundeck.Push.Native.Types
-import Gundeck.ThreadBudget
+import Gundeck.ThreadBudget.Internal
 import Gundeck.Options
 import Network.AWS (Region (Ireland))
 import OpenSSL (withOpenSSL)
diff --git a/services/gundeck/test/unit/ThreadBudget.hs b/services/gundeck/test/unit/ThreadBudget.hs
index 43c4beee6d0..4f135866271 100644
--- a/services/gundeck/test/unit/ThreadBudget.hs
+++ b/services/gundeck/test/unit/ThreadBudget.hs
@@ -15,7 +15,7 @@ import Data.String.Conversions (cs)
 import Data.Time
 import Data.TreeDiff.Class (ToExpr)
 import GHC.Generics
-import Gundeck.ThreadBudget
+import Gundeck.ThreadBudget.Internal
 import Gundeck.Options
 import Test.QuickCheck
 import Test.QuickCheck.Monadic
@@ -111,9 +111,10 @@ burstActions
   -> MilliSeconds
   -> NumberOfThreads
   -> (MonadIO m) => m ()
-burstActions tbs logHistory howlong (NumberOfThreads howmany)
-    = let budgeted = runWithBudget tbs 1 (delayms howlong)
-      in liftIO . replicateM_ howmany . forkIO $ runReaderT budgeted logHistory
+burstActions tbs logHistory howlong (NumberOfThreads howmany) = do
+  mtr <- metrics
+  let budgeted = runWithBudget mtr tbs 1 (delayms howlong)
+  liftIO . replicateM_ howmany . forkIO $ runReaderT budgeted logHistory
 
 -- | Start a watcher with given params and a frequency of 10 milliseconds, so we are more
 -- likely to find weird race conditions.

From 51b25f7df01f79d2775de3e3f75dbcb3242e1f90 Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Mon, 4 Nov 2019 16:40:17 +0100
Subject: [PATCH 17/26] Do not send events to team members to be deleted (#897)

Events about deleted conversations do not need to be sent to everyone
when a team is deleted. Indeed, it is harmful in that it leads to performance problems:
For large teams with many conversations, notifying
every team member about that fact that every conversation has been
deleted creates a lot of events.
None of these events are useful, since on a team deletion, the accounts
of every user are deleted, i.e. in practice these events cannot be read.

This PR fixes the bug that was introduced in #849
---
 services/galley/src/Galley/API/Teams.hs       | 7 +++++--
 services/galley/test/integration/API/Teams.hs | 3 +--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/services/galley/src/Galley/API/Teams.hs b/services/galley/src/Galley/API/Teams.hs
index b90c7207500..29aace4ca78 100644
--- a/services/galley/src/Galley/API/Teams.hs
+++ b/services/galley/src/Galley/API/Teams.hs
@@ -214,9 +214,12 @@ uncheckedDeleteTeam zusr zcon tid = do
         -> Galley ([Push],[(BotMember, Conv.Event)])
     pushConvDeleteEvents now teamMembs c (pp, ee) = do
         (bots, convMembs) <- botsAndUsers <$> Data.members (c ^. conversationId)
-        let mm = convMembsAndTeamMembs convMembs teamMembs
+        -- Only nonTeamMembers need to get any events, since on team deletion,
+        -- all team users are deleted immediately after these events are sent
+        -- and will thus never be able to see these events in practice.
+        let mm = nonTeamMembers convMembs teamMembs
         let e = Conv.Event Conv.ConvDelete (c ^. conversationId) zusr now Nothing
-        let p = newPush zusr (ConvEvent e) mm
+        let p = newPush zusr (ConvEvent e) (map recipient mm)
         let ee' = bots `zip` repeat e
         let pp' = maybe pp (\x -> (x & pushConn .~ zcon) : pp) p
         pure (pp', ee' ++ ee)
diff --git a/services/galley/test/integration/API/Teams.hs b/services/galley/test/integration/API/Teams.hs
index 341b7ff5023..84b6f634b9f 100644
--- a/services/galley/test/integration/API/Teams.hs
+++ b/services/galley/test/integration/API/Teams.hs
@@ -598,8 +598,7 @@ testDeleteTeam = do
             const 202 === statusCode
         checkTeamDeleteEvent tid wsOwner
         checkTeamDeleteEvent tid wsMember
-        checkConvDeleteEvent cid1 wsOwner
-        checkConvDeleteEvent cid1 wsMember
+        -- team members should not receive conversation delete events
         checkConvDeleteEvent cid1 wsExtern
         WS.assertNoEvent timeout [wsOwner, wsExtern, wsMember]
 

From 268520cbdeb8e64b68ddedfd273dbe35a16ab6e6 Mon Sep 17 00:00:00 2001
From: Serge Bazanski <serge@wire.com>
Date: Mon, 4 Nov 2019 18:18:53 +0100
Subject: [PATCH 18/26] http-client: bump to wire-20191104

---
 snapshots/wire-1.4.yaml | 12 ++++++++++++
 stack.yaml              |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 snapshots/wire-1.4.yaml

diff --git a/snapshots/wire-1.4.yaml b/snapshots/wire-1.4.yaml
new file mode 100644
index 00000000000..4e83c24eced
--- /dev/null
+++ b/snapshots/wire-1.4.yaml
@@ -0,0 +1,12 @@
+# DO NOT MODIFY THIS FILE. See README.md to learn why.
+
+resolver: https://raw.githubusercontent.com/wireapp/wire-server/develop/snapshots/wire-1.3.yaml
+name: wire-1.4
+
+packages:
+- archive: https://github.com/wireapp/http-client/archive/wire-2019-11-04.tar.gz
+  subdirs:
+  - http-client
+  - http-client-openssl
+  - http-client-tls
+  - http-conduit
diff --git a/stack.yaml b/stack.yaml
index ee6dff9619a..c90fe32dd0b 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -1,4 +1,4 @@
-resolver: snapshots/wire-1.3.yaml
+resolver: snapshots/wire-1.4.yaml
 
 packages:
 - libs/api-bot

From 13ff5a76a114f3b073e6d2841e9b4535862c31fa Mon Sep 17 00:00:00 2001
From: Serge Bazanski <serge@wire.com>
Date: Mon, 4 Nov 2019 18:40:05 +0100
Subject: [PATCH 19/26] snapshots/wire-1.4.yaml: comment

---
 snapshots/wire-1.4.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/snapshots/wire-1.4.yaml b/snapshots/wire-1.4.yaml
index 4e83c24eced..82e4d23959e 100644
--- a/snapshots/wire-1.4.yaml
+++ b/snapshots/wire-1.4.yaml
@@ -4,6 +4,14 @@ resolver: https://raw.githubusercontent.com/wireapp/wire-server/develop/snapshot
 name: wire-1.4
 
 packages:
+  # http-client forked by wire, commit 032b6503ab0c47f8f85bf48e0beb1f895a95bb27
+  # Contains patches on top of http-client-openssl-0.2.2.0:
+  #  - 89136497b8e0fa0624c1451883eb011347203532
+  #  - 916b04313c6864e02ebed4278b43b971189c61cd
+  #  - 64ebec4fe7b48c131b7d6f0f8d7a6c6cacae70e6
+  #  - 78ebeead1a2efb17c55fb72d2d0041295d1271b8
+  #  - 032b6503ab0c47f8f85bf48e0beb1f895a95bb27
+  # These provide a hacky way to implement TLS ceritifcate pinning.
 - archive: https://github.com/wireapp/http-client/archive/wire-2019-11-04.tar.gz
   subdirs:
   - http-client

From 2f314e6341188c59644ec7e8ec2e51bd3c8f01cb Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Tue, 5 Nov 2019 14:06:37 +0100
Subject: [PATCH 20/26] gundeck: cleanup of deprecated code (#894)

Follow-up from #549
---
 services/gundeck/src/Gundeck/API.hs    | 14 --------------
 services/gundeck/src/Gundeck/Client.hs | 10 ++--------
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/services/gundeck/src/Gundeck/API.hs b/services/gundeck/src/Gundeck/API.hs
index 8f9cc5d31a5..78b8b33dc4b 100644
--- a/services/gundeck/src/Gundeck/API.hs
+++ b/services/gundeck/src/Gundeck/API.hs
@@ -64,11 +64,6 @@ sitemap = do
         returns (ref Model.pushTokenList)
         response 200 "Object containing list of push tokens" end
 
-    post "/i/push" (continue Push.push) $
-        request .&. accept "application" "json"
-        -- TODO: REFACTOR: this end-point is probably noise, and should be dropped.  @/i/push/v2@ does exactly
-        -- the same thing.
-
     post "/i/push/v2" (continue Push.push) $
         request .&. accept "application" "json"
 
@@ -143,15 +138,6 @@ sitemap = do
 
     -- User-Client API -------------------------------------------------------
 
-    -- DEPRECATED: this is deprecated as of https://github.com/wireapp/wire-server/pull/549 (can be
-    -- removed once brig is deployed everywhere and won't trip over this missing any more.)
-    put "/i/clients/:cid" (continue Client.register) $
-        header "Z-User"
-        .&. param "cid"
-        .&. request
-        .&. contentType "application" "json"
-        .&. accept "application" "json"
-
     delete "/i/clients/:cid" (continue Client.unregister) $
         header "Z-User" .&. param "cid"
 
diff --git a/services/gundeck/src/Gundeck/Client.hs b/services/gundeck/src/Gundeck/Client.hs
index cd110218c28..8b783d52ff4 100644
--- a/services/gundeck/src/Gundeck/Client.hs
+++ b/services/gundeck/src/Gundeck/Client.hs
@@ -1,6 +1,5 @@
 module Gundeck.Client
-    ( register
-    , unregister
+    ( unregister
     , removeUser
     ) where
 
@@ -10,17 +9,12 @@ import Data.Id
 import Data.Predicate
 import Gundeck.Monad
 import Gundeck.Push.Native
-import Gundeck.Util
-import Network.Wai (Request, Response)
+import Network.Wai (Response)
 import Network.Wai.Utilities
 
 import qualified Gundeck.Notification.Data as Notifications
 import qualified Gundeck.Push.Data         as Push
 
--- | DEPRECATED: remove once brig is upgraded everywhere.
-register :: UserId ::: ClientId ::: Request ::: JSON ::: JSON -> Gundeck Response
-register (_uid ::: _cid ::: _req ::: _) = return empty
-
 unregister :: UserId ::: ClientId -> Gundeck Response
 unregister (uid ::: cid) = do
     toks <- filter byClient <$> Push.lookup uid Push.Quorum

From c2a79607b7a8bae340e3ff415e22a0e1781e0d80 Mon Sep 17 00:00:00 2001
From: Serge Bazanski <serge@wire.com>
Date: Tue, 5 Nov 2019 11:09:38 +0100
Subject: [PATCH 21/26] snapshots/wire-1.4.yaml: typo

---
 snapshots/wire-1.4.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snapshots/wire-1.4.yaml b/snapshots/wire-1.4.yaml
index 82e4d23959e..023c078c9ba 100644
--- a/snapshots/wire-1.4.yaml
+++ b/snapshots/wire-1.4.yaml
@@ -11,7 +11,7 @@ packages:
   #  - 64ebec4fe7b48c131b7d6f0f8d7a6c6cacae70e6
   #  - 78ebeead1a2efb17c55fb72d2d0041295d1271b8
   #  - 032b6503ab0c47f8f85bf48e0beb1f895a95bb27
-  # These provide a hacky way to implement TLS ceritifcate pinning.
+  # These provide a hacky way to implement TLS certificate pinning.
 - archive: https://github.com/wireapp/http-client/archive/wire-2019-11-04.tar.gz
   subdirs:
   - http-client

From 407a8aef4f3e51423269ab92ba811886273d1336 Mon Sep 17 00:00:00 2001
From: Arian van Putten <arian.vanputten@gmail.com>
Date: Wed, 6 Nov 2019 12:06:51 +0100
Subject: [PATCH 22/26] Pin nixpkgs (#898)

---
 stack-deps.nix | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/stack-deps.nix b/stack-deps.nix
index 815b2462449..945c6778e77 100644
--- a/stack-deps.nix
+++ b/stack-deps.nix
@@ -1,5 +1,22 @@
-{ pkgs ? import <nixpkgs> {} }:
 let 
+  # Pin nixpkgs for all dependencies.
+  # If you want to update.
+  # 1. go to https://nixos.org/channels/nixos-19.09
+  # 2. copy the URL to nixexprs.tar.gz and the sha256 hash
+  # 3. Uncomment the  sha256 = 00000 field
+  # 4. nix-build
+  # 5. Make nix complain to you what the correct hash is.
+  # 6. comment sha256 = 0000 and   add sha256 = <actual-hash>
+  # 7. nix-build
+  # 8. commit
+  # TODO(arianvp): There are tools that automate this; we should use them
+  pkgsTar = builtins.fetchTarball {
+    name = "nixos-1909";
+    url = "https://releases.nixos.org/nixos/19.09/nixos-19.09.1019.c5aabb0d603/nixexprs.tar.xz";
+    sha256 = "1hjw843g964aj9cd9p6x5473yy4sfmqnqlvavc5c1lbqa8v676zg";
+    # sha256 = "0000000000000000000000000000000000000000000000000000";
+  };
+  pkgs = import pkgsTar {};
   cryptobox-c = pkgs.callPackage ({fetchFromGitHub, rustPlatform,  pkgconfig, libsodium}:
     rustPlatform.buildRustPackage rec {
       name = "cryptobox-c-${version}";

From 80f1adc8c2ffcde81eb6d118ff222b1006cd649a Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Wed, 6 Nov 2019 12:42:41 +0100
Subject: [PATCH 23/26] throttle team deletion (#900)

* throttle galley->gundeck requests on team deletion by sending events slower (configurable, optional, slow/cautious defaults)
* throttle user deletions (brig) by waiting in between each user deletion (configurable, optional, default: 100ms)
---
 services/brig/brig.integration.yaml           |  1 +
 .../brig/src/Brig/InternalEvent/Process.hs    |  7 +++++
 services/brig/src/Brig/Options.hs             |  6 ++++
 services/galley/galley.integration.yaml       |  3 ++
 services/galley/src/Galley/API/Teams.hs       | 29 +++++++++++++++----
 services/galley/src/Galley/Options.hs         | 22 ++++++++++----
 6 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/services/brig/brig.integration.yaml b/services/brig/brig.integration.yaml
index ca5d4b37808..605a2705cc0 100644
--- a/services/brig/brig.integration.yaml
+++ b/services/brig/brig.integration.yaml
@@ -150,6 +150,7 @@ optSettings:
   setEmailVisibility: visible_to_self
   setPropertyMaxKeyLen: 1024
   setPropertyMaxValueLen: 4096
+  setDeleteThrottleMillis: 0
 
 logLevel: Warn
 logNetStrings: false
diff --git a/services/brig/src/Brig/InternalEvent/Process.hs b/services/brig/src/Brig/InternalEvent/Process.hs
index 16054a20715..3ae069ba699 100644
--- a/services/brig/src/Brig/InternalEvent/Process.hs
+++ b/services/brig/src/Brig/InternalEvent/Process.hs
@@ -4,7 +4,9 @@ module Brig.InternalEvent.Process
 
 import Imports
 import Brig.App
+import Brig.Options (setDeleteThrottleMillis, defDeleteThrottleMillis)
 import Brig.InternalEvent.Types
+import Control.Lens (view)
 import Control.Monad.Catch
 import Data.ByteString.Conversion
 import System.Logger.Class (field, msg, (~~), val)
@@ -23,6 +25,11 @@ onEvent n = handleTimeout $ case n of
         Log.info $ msg (val "Processing user delete event")
                 ~~ field "user" (toByteString uid)
         API.lookupAccount uid >>= mapM_ API.deleteAccount
+        -- As user deletions are expensive resource-wise in the context of
+        -- bulk user deletions (e.g. during team deletions),
+        -- wait 'delay' ms before processing the next event
+        delay <- fromMaybe defDeleteThrottleMillis . setDeleteThrottleMillis <$> view settings
+        liftIO $ threadDelay (1000 * delay)
     DeleteService pid sid -> do
         Log.info $ msg (val "Processing service delete event")
                 ~~ field "provider" (toByteString pid)
diff --git a/services/brig/src/Brig/Options.hs b/services/brig/src/Brig/Options.hs
index 49937ca5541..24170d59587 100644
--- a/services/brig/src/Brig/Options.hs
+++ b/services/brig/src/Brig/Options.hs
@@ -312,6 +312,9 @@ data Settings = Settings
 
     , setPropertyMaxKeyLen     :: !(Maybe Int64)
     , setPropertyMaxValueLen   :: !(Maybe Int64)
+    , setDeleteThrottleMillis  :: !(Maybe Int) -- ^ How long, in milliseconds, to wait
+                                               -- in between processing delete events
+                                               -- from the internal delete queue
 
     } deriving (Show, Generic)
 
@@ -321,6 +324,9 @@ defMaxKeyLen = 256
 defMaxValueLen :: Int64
 defMaxValueLen = 512
 
+defDeleteThrottleMillis :: Int
+defDeleteThrottleMillis = 100
+
 instance FromJSON Timeout where
     parseJSON (Y.Number n) =
         let defaultV = 3600
diff --git a/services/galley/galley.integration.yaml b/services/galley/galley.integration.yaml
index 5e4b4924c14..236968b3ee4 100644
--- a/services/galley/galley.integration.yaml
+++ b/services/galley/galley.integration.yaml
@@ -26,6 +26,9 @@ settings:
   maxConvSize: 16
   intraListing: false
   conversationCodeURI: https://app.wire.com/join/
+  concurrentDeletionEvents: 1024
+  deleteConvThrottleMillis: 0
+
   featureFlags:  # see #RefConfigOptions in `/docs/reference`
     sso: disabled-by-default
     legalhold: disabled-by-default
diff --git a/services/galley/src/Galley/API/Teams.hs b/services/galley/src/Galley/API/Teams.hs
index 29aace4ca78..cded2143c59 100644
--- a/services/galley/src/Galley/API/Teams.hs
+++ b/services/galley/src/Galley/API/Teams.hs
@@ -61,6 +61,7 @@ import Network.Wai.Predicate hiding (setStatus, result, or)
 import Network.Wai.Utilities
 import UnliftIO (mapConcurrently)
 
+import qualified Data.List.Extra as List
 import qualified Data.Set as Set
 import qualified Galley.Data as Data
 import qualified Galley.Data.LegalHold as LegalHoldData
@@ -193,10 +194,9 @@ uncheckedDeleteTeam zusr zcon tid = do
         membs    <- Data.teamMembers tid
         now      <- liftIO getCurrentTime
         convs    <- filter (not . view managedConversation) <$> Data.teamConversations tid
-        (ue, be) <- foldrM (pushConvDeleteEvents now membs) ([],[]) convs
+        (ue, be) <- foldrM (createConvDeleteEvents now membs) ([],[]) convs
         let e = newEvent TeamDelete tid now
-        let r = list1 (userRecipient zusr) (membersToRecipients (Just zusr) membs)
-        pushSome ((newPush1 zusr (TeamEvent e) r & pushConn .~ zcon) : ue)
+        pushDeleteEvents membs e ue
         void . forkIO $ void $ External.deliver be
         -- TODO: we don't delete bots here, but we should do that, since
         -- every bot user can only be in a single conversation. Just
@@ -206,13 +206,32 @@ uncheckedDeleteTeam zusr zcon tid = do
             Journal.teamDelete tid
         Data.deleteTeam tid
   where
-    pushConvDeleteEvents
+    pushDeleteEvents :: [TeamMember] -> Event -> [Push] -> Galley ()
+    pushDeleteEvents membs e ue = do
+        o <- view $ options . optSettings
+        let r = list1 (userRecipient zusr) (membersToRecipients (Just zusr) membs)
+        -- To avoid DoS on gundeck, send team deletion events in chunks
+        let chunkSize = fromMaybe defConcurrentDeletionEvents (o^.setConcurrentDeletionEvents)
+        let chunks = List.chunksOf chunkSize (toList r)
+        forM_ chunks $ \chunk -> case chunk of
+            [] -> return ()
+            -- push TeamDelete events
+            x:xs -> push1 (newPush1 zusr (TeamEvent e) (list1 x xs) & pushConn .~ zcon)
+
+        -- To avoid DoS on gundeck, send conversation deletion events slowly
+        let delay = 1000 * (fromMaybe defDeleteConvThrottleMillis (o^.setDeleteConvThrottleMillis))
+        forM_ ue $ \event -> do
+            -- push ConversationDelete events
+            push1 event
+            threadDelay delay
+
+    createConvDeleteEvents
         :: UTCTime
         -> [TeamMember]
         -> TeamConversation
         -> ([Push],[(BotMember, Conv.Event)])
         -> Galley ([Push],[(BotMember, Conv.Event)])
-    pushConvDeleteEvents now teamMembs c (pp, ee) = do
+    createConvDeleteEvents now teamMembs c (pp, ee) = do
         (bots, convMembs) <- botsAndUsers <$> Data.members (c ^. conversationId)
         -- Only nonTeamMembers need to get any events, since on team deletion,
         -- all team users are deleted immediately after these events are sent
diff --git a/services/galley/src/Galley/Options.hs b/services/galley/src/Galley/Options.hs
index 33989be8b0d..fa0739f15b1 100644
--- a/services/galley/src/Galley/Options.hs
+++ b/services/galley/src/Galley/Options.hs
@@ -12,21 +12,31 @@ import Galley.Types.Teams (FeatureFlags(..))
 data Settings = Settings
     {
     -- | Number of connections for the HTTP client pool
-      _setHttpPoolSize          :: !Int
+      _setHttpPoolSize              :: !Int
     -- | Max number of members in a team. NOTE: This must be in sync with Brig
-    , _setMaxTeamSize           :: !Word16
+    , _setMaxTeamSize               :: !Word16
     -- | Max number of members in a conversation. NOTE: This must be in sync with Brig
-    , _setMaxConvSize           :: !Word16
+    , _setMaxConvSize               :: !Word16
     -- | Whether to call Brig for device listing
-    , _setIntraListing          :: !Bool
+    , _setIntraListing              :: !Bool
     -- | URI prefix for conversations with access mode @code@
-    , _setConversationCodeURI   :: !HttpsUrl
-    , _setFeatureFlags          :: !FeatureFlags
+    , _setConversationCodeURI       :: !HttpsUrl
+    -- | Throttling: limits to concurrent deletion events
+    , _setConcurrentDeletionEvents  :: !(Maybe Int)
+    -- | Throttling: delay between sending events upon team deletion
+    , _setDeleteConvThrottleMillis  :: !(Maybe Int)
+    , _setFeatureFlags              :: !FeatureFlags
     } deriving (Show, Generic)
 
 deriveFromJSON toOptionFieldName ''Settings
 makeLenses ''Settings
 
+defConcurrentDeletionEvents :: Int
+defConcurrentDeletionEvents = 128
+
+defDeleteConvThrottleMillis :: Int
+defDeleteConvThrottleMillis = 20
+
 data JournalOpts = JournalOpts
     { _awsQueueName :: !Text         -- ^ SQS queue name to send team events
     , _awsEndpoint  :: !AWSEndpoint  -- ^ AWS endpoint

From 83c30f9fe24b2bbcd7f037f550ab4dcee5a86a23 Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Wed, 6 Nov 2019 14:23:53 +0100
Subject: [PATCH 24/26] Feature/gundeck limit parallel connections (#895)

Chunk nativeToken lookups and chunk native pushes to SNS to avoid unbounded new connections to open on when sending to large amounts of recipients.

* configurable (suggestion: 32); default to unbounded
---
 services/gundeck/gundeck.integration.yaml   |  1 +
 services/gundeck/src/Gundeck/Options.hs     |  4 ++++
 services/gundeck/src/Gundeck/Push.hs        | 25 ++++++++++++---------
 services/gundeck/src/Gundeck/Push/Native.hs | 12 +++++++++-
 services/gundeck/test/unit/MockGundeck.hs   |  1 +
 5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/services/gundeck/gundeck.integration.yaml b/services/gundeck/gundeck.integration.yaml
index 9f6d0820a55..6ffd0d2e8bd 100644
--- a/services/gundeck/gundeck.integration.yaml
+++ b/services/gundeck/gundeck.integration.yaml
@@ -24,6 +24,7 @@ settings:
   httpPoolSize: 1024
   notificationTTL: 24192200
   bulkPush: true
+  perNativePushConcurrency: 32
   maxConcurrentNativePushes:
     hard: 30  # more than this number of threads will not be allowed
     soft: 10  # more than this number of threads will be warned about
diff --git a/services/gundeck/src/Gundeck/Options.hs b/services/gundeck/src/Gundeck/Options.hs
index af888da2716..484fcfc3687 100644
--- a/services/gundeck/src/Gundeck/Options.hs
+++ b/services/gundeck/src/Gundeck/Options.hs
@@ -38,6 +38,10 @@ data Settings = Settings
     , _setBulkPush        :: !Bool
     -- | Maximum number of concurrent threads calling SNS.
     , _setMaxConcurrentNativePushes :: !(Maybe MaxConcurrentNativePushes)
+    -- | Maximum number of parallel requests to SNS and cassandra
+    -- during native push processing (per incoming push request)
+    -- defaults to unbounded, if unset.
+    , _setPerNativePushConcurrency  :: !(Maybe Int)
     } deriving (Show, Generic)
 
 data MaxConcurrentNativePushes = MaxConcurrentNativePushes
diff --git a/services/gundeck/src/Gundeck/Push.hs b/services/gundeck/src/Gundeck/Push.hs
index f3dc5e73d6b..7d4ab86a07f 100644
--- a/services/gundeck/src/Gundeck/Push.hs
+++ b/services/gundeck/src/Gundeck/Push.hs
@@ -107,10 +107,16 @@ instance MonadNativeTargets Gundeck where
   mntgtLookupAddresses rcp = Data.lookup rcp Data.One
 
 class Monad m => MonadMapAsync m where
-  mntgtMapAsync :: (a -> m b) -> [a] -> m [Either SomeException b]
+  mntgtMapAsync         :: (a -> m b) -> [a] -> m [Either SomeException b]
+  mntgtPerPushConcurrency  :: m (Maybe Int)
 
 instance MonadMapAsync Gundeck where
-  mntgtMapAsync = mapAsync
+  mntgtPerPushConcurrency = view (options . optSettings . setPerNativePushConcurrency )
+  mntgtMapAsync f l = do
+      perPushConcurrency <- mntgtPerPushConcurrency
+      case perPushConcurrency of
+          Nothing -> mapAsync f l
+          Just chunkSize -> concat <$> mapM (mapAsync f) (List.chunksOf chunkSize l)
 
 -- | Abstract over all effects in 'pushAny' (for unit testing).
 class (MonadPushAll m, MonadNativeTargets m, MonadMapAsync m) => MonadPushAny m where
@@ -189,14 +195,17 @@ pushAll pushes = do
         resp <- compilePushResps targets <$> mpaBulkPush (compilePushReq <$> targets)
 
         -- native push
+        perPushConcurrency <- mntgtPerPushConcurrency
         forM_ resp $ \((notif :: Notification, psh :: Push), alreadySent :: [Presence]) -> do
             let rcps' = nativeTargetsRecipients psh
-                budget = length rcps'
-                  -- this is a rough budget, since there may be more than one device in a
+                cost = maybe (length rcps') (min (length rcps')) perPushConcurrency
+                  -- ^ this is a rough budget cost, since there may be more than one device in a
                   -- 'Presence', so one budget token may trigger at most 8 push notifications
                   -- to be sent out.
+                  -- If perPushConcurrency is defined, we take the min with 'perNativePushConcurrency', as native push requests
+                  -- to cassandra and SNS are limited to 'perNativePushConcurrency' in parallel.
             unless (psh ^. pushTransient)
-                $ mpaRunWithBudget budget ()
+                $ mpaRunWithBudget cost ()
                     $ mpaPushNative notif psh =<< nativeTargets psh rcps' alreadySent
 
 
@@ -280,13 +289,9 @@ nativeTargetsRecipients psh = filter routeNative (toList (fromRange (psh ^. push
     routeNative u = u ^. recipientRoute /= RouteDirect
                  && (u ^. recipientId /= psh ^. pushOrigin || psh ^. pushNativeIncludeOrigin)
 
--- | TODO: 'nativeTargets' calls cassandra once for each 'Recipient' of the 'Push'.  Instead,
+-- | FUTUREWORK: 'nativeTargets' calls cassandra once for each 'Recipient' of the 'Push'.  Instead,
 -- it should be called once with @[Push]@ for every call to 'pushAll', and that call should
 -- only call cassandra once in total, yielding all addresses of all recipients of all pushes.
---
--- FUTUREWORK: we may want to turn 'mntgtMapAsync' into an ordinary `mapM`: it's cassandra
--- access, so it'll be fast either way given the size of the input, and synchronous calls
--- impose a much lower risk of choking when system load peaks.
 nativeTargets
   :: forall m. (MonadNativeTargets m, MonadMapAsync m)
   => Push -> [Recipient] -> [Presence] -> m [Address]
diff --git a/services/gundeck/src/Gundeck/Push/Native.hs b/services/gundeck/src/Gundeck/Push/Native.hs
index 86add930825..82655ce60f0 100644
--- a/services/gundeck/src/Gundeck/Push/Native.hs
+++ b/services/gundeck/src/Gundeck/Push/Native.hs
@@ -29,11 +29,21 @@ import qualified Gundeck.Aws               as Aws
 import qualified Gundeck.Notification.Data as Stream
 import qualified Gundeck.Push.Data         as Data
 import qualified System.Logger.Class       as Log
+import qualified Data.List.Extra           as List
 
 push :: NativePush -> [Address] -> Gundeck ()
 push _    [] = pure ()
 push m   [a] = push1 m a
-push m addrs = void $ mapConcurrently (push1 m) addrs
+push m addrs = do
+    perPushConcurrency <- view (options . optSettings . setPerNativePushConcurrency )
+    case perPushConcurrency of
+        -- send all at once
+        Nothing -> void $ mapConcurrently (push1 m) addrs
+        -- avoid high amounts of fresh parallel network requests by
+        -- parallelizing only chunkSize native pushes at a time
+        Just chunkSize -> do
+            let chunks = List.chunksOf chunkSize addrs
+            mapM_ (mapConcurrently (push1 m)) chunks
 
 push1 :: NativePush -> Address -> Gundeck ()
 push1 m a = do
diff --git a/services/gundeck/test/unit/MockGundeck.hs b/services/gundeck/test/unit/MockGundeck.hs
index db02cdbd663..7de66181428 100644
--- a/services/gundeck/test/unit/MockGundeck.hs
+++ b/services/gundeck/test/unit/MockGundeck.hs
@@ -400,6 +400,7 @@ instance MonadNativeTargets MockGundeck where
   mntgtLookupAddresses = mockLookupAddresses
 
 instance MonadMapAsync MockGundeck where
+  mntgtPerPushConcurrency = pure Nothing -- (unbounded)
   mntgtMapAsync f xs = Right <$$> mapM f xs  -- (no concurrency)
 
 instance MonadPushAny MockGundeck where

From 7bc768606601a13497a1e64fa9b26c5b5a5cb74e Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Wed, 6 Nov 2019 14:25:10 +0100
Subject: [PATCH 25/26] stern add endpoint (#896)

---
 tools/stern/src/Stern/API.hs   | 11 +++++++++++
 tools/stern/src/Stern/Types.hs | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/tools/stern/src/Stern/API.hs b/tools/stern/src/Stern/API.hs
index 920bcadc0cc..010b15c0398 100644
--- a/tools/stern/src/Stern/API.hs
+++ b/tools/stern/src/Stern/API.hs
@@ -293,6 +293,15 @@ sitemap = do
             description "Team ID"
         Doc.response 200 "Team Information" Doc.end
 
+    get "/teams/:tid" (continue getTeamAdminInfo) $
+        capture "tid"
+
+    document "GET" "getTeamAdminInfo" $ do
+        summary "Gets information about a team's owners and admins only"
+        Doc.parameter Doc.Path "tid" Doc.bytes' $
+            description "Team ID"
+        Doc.response 200 "Team Information about Owners and Admins" Doc.end
+
     -- feature flags
 
     get "/teams/:tid/features/legalhold" (continue (liftM json . Intra.getLegalholdStatus)) $
@@ -533,6 +542,8 @@ deleteFromBlacklist emailOrPhone = do
 getTeamInfo :: TeamId -> Handler Response
 getTeamInfo = liftM json . Intra.getTeamInfo
 
+getTeamAdminInfo :: TeamId -> Handler Response
+getTeamAdminInfo = liftM (json . toAdminInfo) . Intra.getTeamInfo
 
 setLegalholdStatus :: JSON ::: TeamId ::: JsonRequest SetLegalHoldStatus -> Handler Response
 setLegalholdStatus (_ ::: tid ::: req) = do
diff --git a/tools/stern/src/Stern/Types.hs b/tools/stern/src/Stern/Types.hs
index a24ca114aae..d184175b41e 100644
--- a/tools/stern/src/Stern/Types.hs
+++ b/tools/stern/src/Stern/Types.hs
@@ -14,7 +14,6 @@ import Data.Aeson.TH
 import Data.ByteString.Conversion
 import Data.Json.Util
 import Data.Range
-import Data.Text
 import Imports
 import Galley.Types.Teams
 import Galley.Types.Teams.Intra
@@ -35,12 +34,42 @@ data TeamInfo = TeamInfo
     , tiMembers :: [TeamMemberInfo]
     }
 
+data TeamAdminInfo = TeamAdminInfo
+    { taData    :: TeamData
+    , taOwners  :: [TeamMemberInfo]
+    , taAdmins  :: [TeamMemberInfo]
+    , taMembers :: Int
+    }
+
+toAdminInfo :: TeamInfo -> TeamAdminInfo
+toAdminInfo (TeamInfo d members) = TeamAdminInfo
+    { taData = d
+    , taMembers = length members
+    , taOwners = filter (\(TeamMemberInfo m) -> isOwner m) members
+    , taAdmins = filter (\(TeamMemberInfo m) -> isAdmin m) members
+    }
+
+-- FUTUREWORK: use the same criteria as in RoleOwner, RoleAdmin
+isOwner :: TeamMember -> Bool
+isOwner m = hasPermission m SetBilling
+
+isAdmin :: TeamMember -> Bool
+isAdmin m = (hasPermission m AddTeamMember) && not (hasPermission m SetBilling)
+
 instance ToJSON TeamInfo where
     toJSON (TeamInfo d m) = object
         [ "info"    .= d
         , "members" .= m
         ]
 
+instance ToJSON TeamAdminInfo where
+    toJSON (TeamAdminInfo d o a m) = object
+        [ "info"          .= d
+        , "owners"        .= o
+        , "admins"        .= a
+        , "total_members" .= m
+        ]
+
 newtype UserProperties = UserProperties
     { unUserProperties :: M.HashMap PropertyKey PropertyValue
     } deriving (Eq, Show, ToJSON)

From 4b58cf99d2c53a63aa33ae4a588732890d3faeb8 Mon Sep 17 00:00:00 2001
From: jschaul <jschaul@users.noreply.github.com>
Date: Wed, 6 Nov 2019 18:10:19 +0100
Subject: [PATCH 26/26] Changelog for release 2019-11-06

- New configuration options available (none mandatory). See #895 #900 #869

- Support HEAD requests for `/sso/initiate-bind` (#878)

- Do not send conversation delete events to team members upon team deletion (#897)
- Support SNI for bot registrations (by bumping http-client version) (#899)

- Make gundeck handle AWS outages better. (#869, #890, #892)
- Improve performance by avoiding unbounded intra-service traffic spikes on team deletions (#900)
- Add optional native push connection throttling (#895)
- New backoffice/stern endpoint (#896)
- SAML: Store raw idp metadata with typed details in c* (#872)
- documentation/script updates
---
 CHANGELOG.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 58fbbc44fcd..68c480da637 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,27 @@
+# 2019-11-06 #901
+
+## Relevant for self-hosters
+
+- New configuration options available (none mandatory). See #895 #900 #869
+
+## Relevant for client developers
+
+- Support HEAD requests for `/sso/initiate-bind` (#878)
+
+## Bug fixes
+
+- Do not send conversation delete events to team members upon team deletion (#897)
+- Support SNI for bot registrations (by bumping http-client version) (#899)
+
+## Internal changes
+
+- Make gundeck handle AWS outages better. (#869, #890, #892)
+- Improve performance by avoiding unbounded intra-service traffic spikes on team deletions (#900)
+- Add optional native push connection throttling (#895)
+- New backoffice/stern endpoint (#896)
+- SAML: Store raw idp metadata with typed details in c* (#872)
+- documentation/script updates
+
 # 2019-09-30 #868
 
 ## Relevant for self-hosters