From bfcabe97f01b11b1b4b0e16b933ac2930c51e1b6 Mon Sep 17 00:00:00 2001 From: Adam Treat Date: Mon, 14 Oct 2024 08:29:10 -0400 Subject: [PATCH] Non-automatic localdocs. Signed-off-by: Adam Treat --- gpt4all-chat/qml/LocalDocsSettings.qml | 31 +++++++++++ gpt4all-chat/qml/LocalDocsView.qml | 14 ++++- gpt4all-chat/src/database.cpp | 71 ++++++++++++++++++++++---- gpt4all-chat/src/database.h | 7 ++- gpt4all-chat/src/localdocs.cpp | 3 +- gpt4all-chat/src/localdocsmodel.cpp | 5 ++ gpt4all-chat/src/localdocsmodel.h | 3 +- gpt4all-chat/src/mysettings.cpp | 4 ++ gpt4all-chat/src/mysettings.h | 4 ++ 9 files changed, 126 insertions(+), 16 deletions(-) diff --git a/gpt4all-chat/qml/LocalDocsSettings.qml b/gpt4all-chat/qml/LocalDocsSettings.qml index db86481f5e0a..43925153568e 100644 --- a/gpt4all-chat/qml/LocalDocsSettings.qml +++ b/gpt4all-chat/qml/LocalDocsSettings.qml @@ -29,6 +29,37 @@ MySettingsTab { text: qsTr("LocalDocs Settings") } + ColumnLayout { + spacing: 10 + Label { + color: theme.styledTextColor + font.pixelSize: theme.fontSizeLarge + font.bold: true + text: qsTr("Behavior") + } + + Rectangle { + Layout.fillWidth: true + height: 1 + color: theme.settingsDivider + } + } + + RowLayout { + MySettingsLabel { + id: automaticUpdateLabel + text: qsTr("Automatic Update") + helpText: qsTr("Whenever a file or folder changes it should automatically be re-indexed/embedded.") + } + MyCheckBox { + id: automaticUpdateBox + checked: MySettings.localDocsAutomaticUpdate + onClicked: { + MySettings.localDocsAutomaticUpdate = !MySettings.localDocsAutomaticUpdate + } + } + } + ColumnLayout { spacing: 10 Label { diff --git a/gpt4all-chat/qml/LocalDocsView.qml b/gpt4all-chat/qml/LocalDocsView.qml index c87586043199..cdd9a4fd0908 100644 --- a/gpt4all-chat/qml/LocalDocsView.qml +++ b/gpt4all-chat/qml/LocalDocsView.qml @@ -210,6 +210,9 @@ Rectangle { if (model.currentEmbeddingsToIndex !== 0) return theme.altProgressBackground + if (model.outOfDate) + return theme.altProgressBackground + if (model.forceIndexing) return theme.red200 @@ -247,6 +250,9 @@ Rectangle { if (model.currentEmbeddingsToIndex !== 0) return theme.altProgressText + if (model.outOfDate) + return theme.altProgressText + if (model.forceIndexing) return theme.textErrorColor @@ -267,6 +273,9 @@ Rectangle { if (model.forceIndexing) return qsTr("REQUIRES UPDATE") + if (model.outOfDate) + return qsTr("OUT OF DATE") + if (model.installed) return qsTr("READY") @@ -305,8 +314,11 @@ Rectangle { if (model.forceIndexing) return qsTr("This collection requires an update after version change") + if (model.outOfDate) + return qsTr("This collection is out of date and needs to be rebuilt") + if (model.installed) - return qsTr("Automatically reindexes upon changes to the folder") + return qsTr("This collection is up to date") return qsTr("Installation in progress") } diff --git a/gpt4all-chat/src/database.cpp b/gpt4all-chat/src/database.cpp index 0f271410fb9d..dcf35b760447 100644 --- a/gpt4all-chat/src/database.cpp +++ b/gpt4all-chat/src/database.cpp @@ -1014,8 +1014,9 @@ bool Database::initDb(const QString &modelPath, const QList &old return true; } -Database::Database(int chunkSize, QStringList extensions) +Database::Database(bool automaticUpdate, int chunkSize, QStringList extensions) : QObject(nullptr) + , m_automaticUpdate(automaticUpdate) , m_chunkSize(chunkSize) , m_scannedFileExtensions(std::move(extensions)) , m_scanIntervalTimer(new QTimer(this)) @@ -1100,6 +1101,7 @@ void Database::updateFolderToIndex(int folder_id, size_t countForFolder, bool se sendChunkList(); // send any remaining embedding chunks to llm item.indexing = false; item.installed = true; + item.outOfDate = false; // Set the last update if we are done if (item.startUpdate > item.lastUpdate && item.currentEmbeddingsToIndex == 0) @@ -1599,6 +1601,41 @@ void Database::enqueueDocumentInternal(DocumentInfo &&info, bool prepend) queue.insert(prepend ? queue.begin() : queue.end(), std::move(info)); } +bool Database::isOutOfDate(int folder_id, std::list &&infos) const +{ + for (auto &info : infos) { + // Update info + info.file.stat(); + + // If the doc has since been deleted or no longer readable, then we schedule more work and return + // leaving the cleanup for the cleanup handler + if (!info.file.exists() || !info.file.isReadable()) + return true; + + const qint64 document_time = info.file.fileTime(QFile::FileModificationTime).toMSecsSinceEpoch(); + const QString document_path = info.file.canonicalFilePath(); + const bool currentlyProcessing = info.currentlyProcessing; + + // Check and see if we already have this document + QSqlQuery q(m_db); + int existing_id = -1; + qint64 existing_time = -1; + if (!selectDocument(q, document_path, &existing_id, &existing_time)) { + handleDocumentError("ERROR: Cannot select document", existing_id, document_path, q.lastError()); + continue; + } + + // If not, then we are out of date + if (existing_id == -1) + return true; + + // If we have the document and the time does not match, then we are out of date + if (document_time != existing_time) + return true; + } + return false; +} + void Database::enqueueDocuments(int folder_id, std::list &&infos) { // enqueue all documents @@ -1766,7 +1803,7 @@ void Database::scanQueue() return updateFolderToIndex(folder_id, countForFolder); } -void Database::scanDocuments(int folder_id, const QString &folder_path) +void Database::scanDocuments(int folder_id, const QString &folder_path, bool forceIndexing) { #if defined(DEBUG) qDebug() << "scanning folder for documents" << folder_path; @@ -1791,9 +1828,14 @@ void Database::scanDocuments(int folder_id, const QString &folder_path) if (!infos.empty()) { CollectionItem item = guiCollectionItem(folder_id); - item.indexing = true; + const bool shouldIndex = m_automaticUpdate || forceIndexing; + if (shouldIndex) + item.indexing = true; + else + item.outOfDate = isOutOfDate(folder_id, std::move(infos)); updateGuiForCollectionItem(item); - enqueueDocuments(folder_id, std::move(infos)); + if (shouldIndex) + enqueueDocuments(folder_id, std::move(infos)); } else { updateFolderToIndex(folder_id, 0, false); } @@ -1847,7 +1889,7 @@ void Database::addCurrentFolders() for (const auto &i : collections) { if (!i.forceIndexing) { addFolderToWatch(i.folder_path); - scanDocuments(i.folder_id, i.folder_path); + scanDocuments(i.folder_id, i.folder_path, false /*forceIndexing*/); } } @@ -1982,7 +2024,7 @@ void Database::forceIndexing(const QString &collection, const QString &embedding item.forceIndexing = false; updateGuiForCollectionItem(item); addFolderToWatch(folder.second); - scanDocuments(folder.first, folder.second); + scanDocuments(folder.first, folder.second, true /*forceIndexing*/); } } @@ -2014,7 +2056,7 @@ void Database::forceRebuildFolder(const QString &path) item.currentEmbeddingsToIndex = item.totalEmbeddingsToIndex = 0; updateGuiForCollectionItem(item); - scanDocuments(folder_id, path); + scanDocuments(folder_id, path, true /*forceIndexing*/); } bool Database::addFolder(const QString &collection, const QString &path, const QString &embedding_model) @@ -2058,7 +2100,7 @@ bool Database::addFolder(const QString &collection, const QString &path, const Q // note: this is the existing embedding model if the collection was found if (!item->embeddingModel.isNull()) { addFolderToWatch(path); - scanDocuments(folder_id, path); + scanDocuments(folder_id, path, true /*forceIndexing*/); } } return true; @@ -2665,7 +2707,7 @@ void Database::changeFileExtensions(const QStringList &extensions) for (const auto &i: std::as_const(collections)) { if (!i.forceIndexing) - scanDocuments(i.folder_id, i.folder_path); + scanDocuments(i.folder_id, i.folder_path, false /*forceIndexing*/); } } @@ -2702,6 +2744,13 @@ void Database::directoryChanged(const QString &path) updateCollectionStatistics(); // Rescan the documents associated with the folder - if (folder_id != -1) - scanDocuments(folder_id, path); + if (folder_id != -1) { + if (m_automaticUpdate) { + scanDocuments(folder_id, path, false /*forceIndexing*/); + } else { + CollectionItem item = guiCollectionItem(folder_id); + item.outOfDate = true; + updateGuiForCollectionItem(item); + } + } } diff --git a/gpt4all-chat/src/database.h b/gpt4all-chat/src/database.h index 0e90c260057a..d650d9bfcab5 100644 --- a/gpt4all-chat/src/database.h +++ b/gpt4all-chat/src/database.h @@ -134,6 +134,7 @@ struct CollectionItem { bool installed = false; bool indexing = false; bool forceIndexing = false; + bool outOfDate = false; QString error; // progress @@ -189,7 +190,7 @@ class Database : public QObject { Q_OBJECT public: - Database(int chunkSize, QStringList extensions); + Database(bool automaticUpdate, int chunkSize, QStringList extensions); ~Database() override; bool isValid() const { return m_databaseValid; } @@ -198,7 +199,7 @@ public Q_SLOTS: void start(); bool scanQueueInterrupted() const; void scanQueueBatch(); - void scanDocuments(int folder_id, const QString &folder_path); + void scanDocuments(int folder_id, const QString &folder_path, bool forceIndexing); void forceIndexing(const QString &collection, const QString &embedding_model); void forceRebuildFolder(const QString &path); bool addFolder(const QString &collection, const QString &path, const QString &embedding_model); @@ -250,6 +251,7 @@ private Q_SLOTS: DocumentInfo dequeueDocument(); void removeFolderFromDocumentQueue(int folder_id); void enqueueDocumentInternal(DocumentInfo &&info, bool prepend = false); + bool isOutOfDate(int folder_id, std::list &&infos) const; void enqueueDocuments(int folder_id, std::list &&infos); void scanQueue(); bool cleanDB(); @@ -287,6 +289,7 @@ private Q_SLOTS: private: QSqlDatabase m_db; + bool m_automaticUpdate; int m_chunkSize; QStringList m_scannedFileExtensions; QTimer *m_scanIntervalTimer; diff --git a/gpt4all-chat/src/localdocs.cpp b/gpt4all-chat/src/localdocs.cpp index 0b69e83424b5..757b76ab187a 100644 --- a/gpt4all-chat/src/localdocs.cpp +++ b/gpt4all-chat/src/localdocs.cpp @@ -26,7 +26,8 @@ LocalDocs::LocalDocs() connect(MySettings::globalInstance(), &MySettings::localDocsFileExtensionsChanged, this, &LocalDocs::handleFileExtensionsChanged); // Create the DB with the chunk size from settings - m_database = new Database(MySettings::globalInstance()->localDocsChunkSize(), + m_database = new Database(MySettings::globalInstance()->localDocsAutomaticUpdate(), + MySettings::globalInstance()->localDocsChunkSize(), MySettings::globalInstance()->localDocsFileExtensions()); connect(this, &LocalDocs::requestStart, m_database, diff --git a/gpt4all-chat/src/localdocsmodel.cpp b/gpt4all-chat/src/localdocsmodel.cpp index a10773625e0c..8e47779462ec 100644 --- a/gpt4all-chat/src/localdocsmodel.cpp +++ b/gpt4all-chat/src/localdocsmodel.cpp @@ -121,6 +121,8 @@ QVariant LocalDocsModel::data(const QModelIndex &index, int role) const return item.embeddingModel; case UpdatingRole: return item.indexing || item.currentEmbeddingsToIndex != 0; + case OutOfDateRole: + return item.outOfDate; } return QVariant(); @@ -149,6 +151,7 @@ QHash LocalDocsModel::roleNames() const roles[FileCurrentlyProcessingRole] = "fileCurrentlyProcessing"; roles[EmbeddingModelRole] = "embeddingModel"; roles[UpdatingRole] = "updating"; + roles[OutOfDateRole] = "outOfDate"; return roles; } @@ -200,6 +203,8 @@ void LocalDocsModel::updateCollectionItem(const CollectionItem &item) changed.append(FileCurrentlyProcessingRole); if (stored.embeddingModel != item.embeddingModel) changed.append(EmbeddingModelRole); + if (stored.outOfDate != item.outOfDate) + changed.append(OutOfDateRole); // preserve collection name as we ignore it for matching QString collection = stored.collection; diff --git a/gpt4all-chat/src/localdocsmodel.h b/gpt4all-chat/src/localdocsmodel.h index ddce8963997b..2bc45e8c20cf 100644 --- a/gpt4all-chat/src/localdocsmodel.h +++ b/gpt4all-chat/src/localdocsmodel.h @@ -70,7 +70,8 @@ class LocalDocsModel : public QAbstractListModel LastUpdateRole, FileCurrentlyProcessingRole, EmbeddingModelRole, - UpdatingRole + UpdatingRole, + OutOfDateRole }; explicit LocalDocsModel(QObject *parent = nullptr); diff --git a/gpt4all-chat/src/mysettings.cpp b/gpt4all-chat/src/mysettings.cpp index 38c8ab6821f5..881ff1a405f5 100644 --- a/gpt4all-chat/src/mysettings.cpp +++ b/gpt4all-chat/src/mysettings.cpp @@ -59,6 +59,7 @@ static const QVariantMap basicDefaults { { "localdocs/useRemoteEmbed", false }, { "localdocs/nomicAPIKey", "" }, { "localdocs/embedDevice", "Auto" }, + { "localdocs/automaticUpdate",false }, { "network/attribution", "" }, }; @@ -224,6 +225,7 @@ void MySettings::restoreLocalDocsDefaults() setLocalDocsUseRemoteEmbed(basicDefaults.value("localdocs/useRemoteEmbed").toBool()); setLocalDocsNomicAPIKey(basicDefaults.value("localdocs/nomicAPIKey").toString()); setLocalDocsEmbedDevice(basicDefaults.value("localdocs/embedDevice").toString()); + setLocalDocsAutomaticUpdate(basicDefaults.value("localdocs/automaticUpdate").toBool()); } void MySettings::eraseModel(const ModelInfo &info) @@ -455,6 +457,7 @@ QStringList MySettings::localDocsFileExtensions() const { return getBasicSetting bool MySettings::localDocsUseRemoteEmbed() const { return getBasicSetting("localdocs/useRemoteEmbed").toBool(); } QString MySettings::localDocsNomicAPIKey() const { return getBasicSetting("localdocs/nomicAPIKey" ).toString(); } QString MySettings::localDocsEmbedDevice() const { return getBasicSetting("localdocs/embedDevice" ).toString(); } +bool MySettings::localDocsAutomaticUpdate() const{ return getBasicSetting("localdocs/automaticUpdate").toBool(); } QString MySettings::networkAttribution() const { return getBasicSetting("network/attribution" ).toString(); } ChatTheme MySettings::chatTheme() const { return ChatTheme (getEnumSetting("chatTheme", chatThemeNames)); } @@ -473,6 +476,7 @@ void MySettings::setLocalDocsFileExtensions(const QStringList &value) { setBasic void MySettings::setLocalDocsUseRemoteEmbed(bool value) { setBasicSetting("localdocs/useRemoteEmbed", value, "localDocsUseRemoteEmbed"); } void MySettings::setLocalDocsNomicAPIKey(const QString &value) { setBasicSetting("localdocs/nomicAPIKey", value, "localDocsNomicAPIKey"); } void MySettings::setLocalDocsEmbedDevice(const QString &value) { setBasicSetting("localdocs/embedDevice", value, "localDocsEmbedDevice"); } +void MySettings::setLocalDocsAutomaticUpdate(bool value) { setBasicSetting("localdocs/automaticUpdate",value, "localDocsAutomaticUpdate"); } void MySettings::setNetworkAttribution(const QString &value) { setBasicSetting("network/attribution", value, "networkAttribution"); } void MySettings::setChatTheme(ChatTheme value) { setBasicSetting("chatTheme", chatThemeNames .value(int(value))); } diff --git a/gpt4all-chat/src/mysettings.h b/gpt4all-chat/src/mysettings.h index 85335f0b0696..caa5a278f14b 100644 --- a/gpt4all-chat/src/mysettings.h +++ b/gpt4all-chat/src/mysettings.h @@ -64,6 +64,7 @@ class MySettings : public QObject Q_PROPERTY(bool localDocsUseRemoteEmbed READ localDocsUseRemoteEmbed WRITE setLocalDocsUseRemoteEmbed NOTIFY localDocsUseRemoteEmbedChanged) Q_PROPERTY(QString localDocsNomicAPIKey READ localDocsNomicAPIKey WRITE setLocalDocsNomicAPIKey NOTIFY localDocsNomicAPIKeyChanged) Q_PROPERTY(QString localDocsEmbedDevice READ localDocsEmbedDevice WRITE setLocalDocsEmbedDevice NOTIFY localDocsEmbedDeviceChanged) + Q_PROPERTY(bool localDocsAutomaticUpdate READ localDocsAutomaticUpdate WRITE setLocalDocsAutomaticUpdate NOTIFY localDocsAutomaticUpdateChanged) Q_PROPERTY(QString networkAttribution READ networkAttribution WRITE setNetworkAttribution NOTIFY networkAttributionChanged) Q_PROPERTY(bool networkIsActive READ networkIsActive WRITE setNetworkIsActive NOTIFY networkIsActiveChanged) Q_PROPERTY(bool networkUsageStatsActive READ networkUsageStatsActive WRITE setNetworkUsageStatsActive NOTIFY networkUsageStatsActiveChanged) @@ -185,6 +186,8 @@ class MySettings : public QObject void setLocalDocsNomicAPIKey(const QString &value); QString localDocsEmbedDevice() const; void setLocalDocsEmbedDevice(const QString &value); + bool localDocsAutomaticUpdate() const; + void setLocalDocsAutomaticUpdate(bool value); // Network settings QString networkAttribution() const; @@ -232,6 +235,7 @@ class MySettings : public QObject void localDocsUseRemoteEmbedChanged(); void localDocsNomicAPIKeyChanged(); void localDocsEmbedDeviceChanged(); + void localDocsAutomaticUpdateChanged(); void networkAttributionChanged(); void networkIsActiveChanged(); void networkPortChanged();