diff --git a/common/common.cmake b/common/common.cmake
index a60eb33af851..bf32054daa80 100644
--- a/common/common.cmake
+++ b/common/common.cmake
@@ -11,7 +11,6 @@ function(gpt4all_add_warning_options target)
         -Wextra-semi
         -Wformat=2
         -Wmissing-include-dirs
-        -Wnull-dereference
         -Wstrict-overflow=2
         -Wvla
         # errors
@@ -22,8 +21,6 @@ function(gpt4all_add_warning_options target)
         # disabled warnings
         -Wno-sign-compare
         -Wno-unused-parameter
-        -Wno-unused-function
-        -Wno-unused-variable
     )
     if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
         target_compile_options("${target}" PRIVATE
diff --git a/gpt4all-backend/deps/llama.cpp-mainline b/gpt4all-backend/deps/llama.cpp-mainline
index b3b5c0571eda..58a55efc4ae5 160000
--- a/gpt4all-backend/deps/llama.cpp-mainline
+++ b/gpt4all-backend/deps/llama.cpp-mainline
@@ -1 +1 @@
-Subproject commit b3b5c0571eda3065035a7f25f7b84640b159d821
+Subproject commit 58a55efc4ae5dd3bc12887d47981faa7136027af
diff --git a/gpt4all-backend/include/gpt4all-backend/llmodel.h b/gpt4all-backend/include/gpt4all-backend/llmodel.h
index 1b60dae6faa2..ecb4a6ffe244 100644
--- a/gpt4all-backend/include/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/include/gpt4all-backend/llmodel.h
@@ -213,7 +213,7 @@ class LLModel {
 protected:
     // These are pure virtual because subclasses need to implement as the default implementation of
     // 'prompt' above calls these functions
-    virtual std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special = false) = 0;
+    virtual std::vector<Token> tokenize(std::string_view str, bool special = false) = 0;
     virtual bool isSpecialToken(Token id) const = 0;
     virtual std::string tokenToString(Token id) const = 0;
     virtual void initSampler(PromptContext &ctx) = 0;
diff --git a/gpt4all-backend/src/llamamodel.cpp b/gpt4all-backend/src/llamamodel.cpp
index cd3ffa578177..d34024f432a5 100644
--- a/gpt4all-backend/src/llamamodel.cpp
+++ b/gpt4all-backend/src/llamamodel.cpp
@@ -511,7 +511,7 @@ size_t LLamaModel::restoreState(std::span<const uint8_t> src)
     return llama_state_set_data(d_ptr->ctx, src.data(), src.size());
 }
 
-std::vector<LLModel::Token> LLamaModel::tokenize(PromptContext &ctx, std::string_view str, bool special)
+std::vector<LLModel::Token> LLamaModel::tokenize(std::string_view str, bool special)
 {
     bool atStart = m_tokenize_last_token == -1;
     bool insertSpace = atStart || isSpecialToken(m_tokenize_last_token);
diff --git a/gpt4all-backend/src/llamamodel_impl.h b/gpt4all-backend/src/llamamodel_impl.h
index 0d0ddf34abc2..f7b3a47e0b6e 100644
--- a/gpt4all-backend/src/llamamodel_impl.h
+++ b/gpt4all-backend/src/llamamodel_impl.h
@@ -54,7 +54,7 @@ class LLamaModel : public LLModel {
     bool m_supportsCompletion = false;
 
 protected:
-    std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override;
+    std::vector<Token> tokenize(std::string_view str, bool special) override;
     bool isSpecialToken(Token id) const override;
     std::string tokenToString(Token id) const override;
     void initSampler(PromptContext &ctx) override;
diff --git a/gpt4all-backend/src/llmodel_shared.cpp b/gpt4all-backend/src/llmodel_shared.cpp
index c1e969d4b940..3868f0d07ad3 100644
--- a/gpt4all-backend/src/llmodel_shared.cpp
+++ b/gpt4all-backend/src/llmodel_shared.cpp
@@ -90,41 +90,33 @@ void LLModel::prompt(const std::string &prompt,
         }
     }
 
-    auto old_n_past = promptCtx.n_past; // prepare to fake n_past for tokenize
-
     // tokenize the user prompt
     std::vector<Token> embd_inp;
     if (placeholders.empty()) {
         // this is unusual, but well-defined
         std::cerr << __func__ << ": prompt template has no placeholder\n";
-        embd_inp = tokenize(promptCtx, promptTemplate, true);
+        embd_inp = tokenize(promptTemplate, true);
     } else {
         // template: beginning of user prompt
         const auto &phUser = placeholders[0];
         std::string userPrefix(phUser.prefix());
-        if (!userPrefix.empty()) {
-            embd_inp = tokenize(promptCtx, userPrefix, true);
-            promptCtx.n_past += embd_inp.size();
-        }
+        if (!userPrefix.empty())
+            embd_inp = tokenize(userPrefix, true);
 
         // user input (shouldn't have special token processing)
-        auto tokens = tokenize(promptCtx, prompt, special);
+        auto tokens = tokenize(prompt, special);
         embd_inp.insert(embd_inp.end(), tokens.begin(), tokens.end());
-        promptCtx.n_past += tokens.size();
 
         // template: end of user prompt + start of assistant prompt
         size_t start = phUser.position() + phUser.length();
         size_t end = placeholders.size() >= 2 ? placeholders[1].position() : promptTemplate.length();
         auto userToAsst = promptTemplate.substr(start, end - start);
         if (!userToAsst.empty()) {
-            tokens = tokenize(promptCtx, userToAsst, true);
+            tokens = tokenize(userToAsst, true);
             embd_inp.insert(embd_inp.end(), tokens.begin(), tokens.end());
-            promptCtx.n_past += tokens.size();
         }
     }
 
-    promptCtx.n_past = old_n_past; // restore n_past so decodePrompt can increment it
-
     // decode the user prompt
     if (!decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp))
         return; // error
@@ -133,7 +125,7 @@ void LLModel::prompt(const std::string &prompt,
     if (!fakeReply) {
         generateResponse(responseCallback, allowContextShift, promptCtx);
     } else {
-        embd_inp = tokenize(promptCtx, *fakeReply, false);
+        embd_inp = tokenize(*fakeReply, false);
         if (!decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp, true))
             return; // error
     }
@@ -148,7 +140,7 @@ void LLModel::prompt(const std::string &prompt,
         asstSuffix = "\n\n"; // default to a blank link, good for e.g. Alpaca
     }
     if (!asstSuffix.empty()) {
-        embd_inp = tokenize(promptCtx, asstSuffix, true);
+        embd_inp = tokenize(asstSuffix, true);
         decodePrompt(promptCallback, responseCallback, allowContextShift, promptCtx, embd_inp);
     }
 }
diff --git a/gpt4all-chat/deps/usearch b/gpt4all-chat/deps/usearch
index 1f0618a86f9d..9e59f1036657 160000
--- a/gpt4all-chat/deps/usearch
+++ b/gpt4all-chat/deps/usearch
@@ -1 +1 @@
-Subproject commit 1f0618a86f9dbb7386237241cee96cc425dd7b55
+Subproject commit 9e59f1036657303b29eaf709945f339e403e5f2f
diff --git a/gpt4all-chat/src/chat.cpp b/gpt4all-chat/src/chat.cpp
index 347a9cd24c12..6b9c8898624f 100644
--- a/gpt4all-chat/src/chat.cpp
+++ b/gpt4all-chat/src/chat.cpp
@@ -31,7 +31,7 @@ Chat::Chat(QObject *parent)
     connectLLM();
 }
 
-Chat::Chat(bool isServer, QObject *parent)
+Chat::Chat(server_tag_t, QObject *parent)
     : QObject(parent)
     , m_id(Network::globalInstance()->generateUniqueId())
     , m_name(tr("Server Chat"))
diff --git a/gpt4all-chat/src/chat.h b/gpt4all-chat/src/chat.h
index 2d98322fef8d..05a6878d3520 100644
--- a/gpt4all-chat/src/chat.h
+++ b/gpt4all-chat/src/chat.h
@@ -45,6 +45,10 @@ class Chat : public QObject
     QML_UNCREATABLE("Only creatable from c++!")
 
 public:
+    // tag for constructing a server chat
+    struct server_tag_t { explicit server_tag_t() = default; };
+    static inline constexpr server_tag_t server_tag = server_tag_t();
+
     enum ResponseState {
         ResponseStopped,
         LocalDocsRetrieval,
@@ -56,7 +60,7 @@ class Chat : public QObject
     Q_ENUM(ResponseState)
 
     explicit Chat(QObject *parent = nullptr);
-    explicit Chat(bool isServer, QObject *parent = nullptr);
+    explicit Chat(server_tag_t, QObject *parent = nullptr);
     virtual ~Chat();
     void destroy() { m_llmodel->destroy(); }
     void connectLLM();
diff --git a/gpt4all-chat/src/chatapi.h b/gpt4all-chat/src/chatapi.h
index 463996b2302b..31d05310c6a7 100644
--- a/gpt4all-chat/src/chatapi.h
+++ b/gpt4all-chat/src/chatapi.h
@@ -98,9 +98,8 @@ class ChatAPI : public QObject, public LLModel {
     // them as they are only called from the default implementation of 'prompt' which we override and
     // completely replace
 
-    std::vector<Token> tokenize(PromptContext &ctx, std::string_view str, bool special) override
+    std::vector<Token> tokenize(std::string_view str, bool special) override
     {
-        (void)ctx;
         (void)str;
         (void)special;
         throw std::logic_error("not implemented");
diff --git a/gpt4all-chat/src/chatlistmodel.h b/gpt4all-chat/src/chatlistmodel.h
index 95f8e6f2a523..a30efd5aff32 100644
--- a/gpt4all-chat/src/chatlistmodel.h
+++ b/gpt4all-chat/src/chatlistmodel.h
@@ -147,7 +147,7 @@ class ChatListModel : public QAbstractListModel
         if (m_serverChat)
             return;
 
-        m_serverChat = new Chat(true /*isServer*/, this);
+        m_serverChat = new Chat(Chat::server_tag, this);
         beginInsertRows(QModelIndex(), m_chats.size(), m_chats.size());
         m_chats.append(m_serverChat);
         endInsertRows();
diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp
index 2b133ce760bf..9898ba22f49e 100644
--- a/gpt4all-chat/src/chatllm.cpp
+++ b/gpt4all-chat/src/chatllm.cpp
@@ -179,6 +179,8 @@ void ChatLLM::handleForceMetalChanged(bool forceMetal)
         reloadModel();
         m_reloadingToChangeVariant = false;
     }
+#else
+    Q_UNUSED(forceMetal);
 #endif
 }
 
diff --git a/gpt4all-chat/src/chatllm.h b/gpt4all-chat/src/chatllm.h
index 201c7779516c..98348f5f891a 100644
--- a/gpt4all-chat/src/chatllm.h
+++ b/gpt4all-chat/src/chatllm.h
@@ -51,7 +51,7 @@ enum class LLModelTypeV1 { // since chat version 6 (v2.5.0)
     NONE      = -1, // no state
 };
 
-static LLModelTypeV1 parseLLModelTypeV1(int type)
+inline LLModelTypeV1 parseLLModelTypeV1(int type)
 {
     switch (LLModelTypeV1(type)) {
     case LLModelTypeV1::GPTJ:
@@ -68,7 +68,7 @@ static LLModelTypeV1 parseLLModelTypeV1(int type)
     }
 }
 
-static LLModelTypeV1 parseLLModelTypeV0(int v0)
+inline LLModelTypeV1 parseLLModelTypeV0(int v0)
 {
     switch (LLModelTypeV0(v0)) {
     case LLModelTypeV0::MPT:       return LLModelTypeV1::MPT;
diff --git a/gpt4all-chat/src/chatviewtextprocessor.cpp b/gpt4all-chat/src/chatviewtextprocessor.cpp
index 3dba986cb449..c7c221a3d991 100644
--- a/gpt4all-chat/src/chatviewtextprocessor.cpp
+++ b/gpt4all-chat/src/chatviewtextprocessor.cpp
@@ -967,8 +967,6 @@ void ChatViewTextProcessor::handleCodeBlocks()
         cursor.setPosition(matchesCode[index].capturedEnd(), QTextCursor::KeepAnchor);
         cursor.removeSelectedText();
 
-        int startPos = cursor.position();
-
         QTextFrameFormat frameFormat = frameFormatBase;
         QString capturedText = matchesCode[index].captured(1);
         QString codeLanguage;
@@ -1004,7 +1002,7 @@ void ChatViewTextProcessor::handleCodeBlocks()
         QTextFrame *mainFrame = cursor.currentFrame();
         cursor.setCharFormat(textFormat);
 
-        QTextFrame *frame = cursor.insertFrame(frameFormat);
+        cursor.insertFrame(frameFormat);
         QTextTable *table = cursor.insertTable(codeLanguage.isEmpty() ? 1 : 2, 1, tableFormat);
 
         if (!codeLanguage.isEmpty()) {
@@ -1016,7 +1014,6 @@ void ChatViewTextProcessor::handleCodeBlocks()
             headerCursor.insertText(codeLanguage);
             QTextTableCell copy = headerTable->cellAt(0, 1);
             QTextCursor copyCursor = copy.firstCursorPosition();
-            int startPos = copyCursor.position();
             CodeCopy newCopy;
             newCopy.text = lines.join("\n");
             newCopy.startPos = copyCursor.position();
diff --git a/gpt4all-chat/src/database.cpp b/gpt4all-chat/src/database.cpp
index bec2e54810aa..1b86caace5e7 100644
--- a/gpt4all-chat/src/database.cpp
+++ b/gpt4all-chat/src/database.cpp
@@ -290,7 +290,7 @@ static bool selectCountChunks(QSqlQuery &q, int folder_id, int &count)
     return true;
 }
 
-static bool selectChunk(QSqlQuery &q, const QList<int> &chunk_ids, int retrievalSize)
+static bool selectChunk(QSqlQuery &q, const QList<int> &chunk_ids)
 {
     QString chunk_ids_str = QString::number(chunk_ids[0]);
     for (size_t i = 1; i < chunk_ids.size(); ++i)
@@ -307,10 +307,6 @@ static const QString INSERT_COLLECTION_SQL = uR"(
         returning id;
     )"_s;
 
-static const QString DELETE_COLLECTION_SQL = uR"(
-    delete from collections where name = ? and folder_id = ?;
-    )"_s;
-
 static const QString SELECT_FOLDERS_FROM_COLLECTIONS_SQL = uR"(
     select f.id, f.path
     from collections c
@@ -379,15 +375,6 @@ static bool addCollection(QSqlQuery &q, const QString &collection_name, const QD
     return true;
 }
 
-static bool removeCollection(QSqlQuery &q, const QString &collection_name, int folder_id)
-{
-    if (!q.prepare(DELETE_COLLECTION_SQL))
-        return false;
-    q.addBindValue(collection_name);
-    q.addBindValue(folder_id);
-    return q.exec();
-}
-
 static bool selectFoldersFromCollection(QSqlQuery &q, const QString &collection_name, QList<QPair<int, QString>> *folders)
 {
     if (!q.prepare(SELECT_FOLDERS_FROM_COLLECTIONS_SQL))
@@ -520,10 +507,6 @@ static const QString GET_FOLDER_EMBEDDING_MODEL_SQL = uR"(
     where ci.folder_id = ?;
     )"_s;
 
-static const QString SELECT_ALL_FOLDERPATHS_SQL = uR"(
-    select path from folders;
-    )"_s;
-
 static const QString FOLDER_REMOVE_ALL_DOCS_SQL[] = {
     uR"(
         delete from embeddings
@@ -598,17 +581,6 @@ static bool sqlGetFolderEmbeddingModel(QSqlQuery &q, int id, QString &embedding_
     return true;
 }
 
-static bool selectAllFolderPaths(QSqlQuery &q, QList<QString> *folder_paths)
-{
-    if (!q.prepare(SELECT_ALL_FOLDERPATHS_SQL))
-        return false;
-    if (!q.exec())
-        return false;
-    while (q.next())
-        folder_paths->append(q.value(0).toString());
-    return true;
-}
-
 static const QString INSERT_COLLECTION_ITEM_SQL = uR"(
     insert into collection_items(collection_id, folder_id)
     values(?, ?)
@@ -2499,7 +2471,7 @@ void Database::retrieveFromDB(const QList<QString> &collections, const QString &
         return;
 
     QSqlQuery q(m_db);
-    if (!selectChunk(q, searchResults, retrievalSize)) {
+    if (!selectChunk(q, searchResults)) {
         qDebug() << "ERROR: selecting chunks:" << q.lastError();
         return;
     }
diff --git a/gpt4all-chat/src/download.cpp b/gpt4all-chat/src/download.cpp
index e773a41fe385..0f5b7b5dd570 100644
--- a/gpt4all-chat/src/download.cpp
+++ b/gpt4all-chat/src/download.cpp
@@ -58,11 +58,6 @@ Download::Download()
     m_startTime = QDateTime::currentDateTime();
 }
 
-static bool operator==(const ReleaseInfo& lhs, const ReleaseInfo& rhs)
-{
-    return lhs.version == rhs.version;
-}
-
 std::strong_ordering Download::compareAppVersions(const QString &a, const QString &b)
 {
     static QRegularExpression versionRegex(R"(^(\d+(?:\.\d+){0,2})(-.+)?$)");