Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Non-automatic localdocs. #3093

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions gpt4all-chat/qml/LocalDocsSettings.qml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,37 @@ MySettingsTab {
text: qsTr("LocalDocs Settings")
}

ColumnLayout {
spacing: 10
Label {
color: theme.styledTextColor
font.pixelSize: theme.fontSizeLarge
font.bold: true
text: qsTr("Behavior")
}

Rectangle {
Layout.fillWidth: true
height: 1
color: theme.settingsDivider
}
}

RowLayout {
MySettingsLabel {
id: automaticUpdateLabel
text: qsTr("Automatic Update")
helpText: qsTr("Whenever a file or folder changes it should automatically be re-indexed/embedded.")
}
MyCheckBox {
id: automaticUpdateBox
checked: MySettings.localDocsAutomaticUpdate
onClicked: {
MySettings.localDocsAutomaticUpdate = !MySettings.localDocsAutomaticUpdate
}
}
}

ColumnLayout {
spacing: 10
Label {
Expand Down
14 changes: 13 additions & 1 deletion gpt4all-chat/qml/LocalDocsView.qml
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ Rectangle {
if (model.currentEmbeddingsToIndex !== 0)
return theme.altProgressBackground

if (model.outOfDate)
return theme.altProgressBackground

if (model.forceIndexing)
return theme.red200

Expand Down Expand Up @@ -247,6 +250,9 @@ Rectangle {
if (model.currentEmbeddingsToIndex !== 0)
return theme.altProgressText

if (model.outOfDate)
return theme.altProgressText

if (model.forceIndexing)
return theme.textErrorColor

Expand All @@ -267,6 +273,9 @@ Rectangle {
if (model.forceIndexing)
return qsTr("REQUIRES UPDATE")

if (model.outOfDate)
return qsTr("OUT OF DATE")

if (model.installed)
return qsTr("READY")

Expand Down Expand Up @@ -305,8 +314,11 @@ Rectangle {
if (model.forceIndexing)
return qsTr("This collection requires an update after version change")

if (model.outOfDate)
return qsTr("This collection is out of date and needs to be rebuilt")

if (model.installed)
return qsTr("Automatically reindexes upon changes to the folder")
return qsTr("This collection is up to date")

return qsTr("Installation in progress")
}
Expand Down
71 changes: 60 additions & 11 deletions gpt4all-chat/src/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1014,8 +1014,9 @@ bool Database::initDb(const QString &modelPath, const QList<CollectionItem> &old
return true;
}

Database::Database(int chunkSize, QStringList extensions)
Database::Database(bool automaticUpdate, int chunkSize, QStringList extensions)
: QObject(nullptr)
, m_automaticUpdate(automaticUpdate)
, m_chunkSize(chunkSize)
, m_scannedFileExtensions(std::move(extensions))
, m_scanIntervalTimer(new QTimer(this))
Expand Down Expand Up @@ -1100,6 +1101,7 @@ void Database::updateFolderToIndex(int folder_id, size_t countForFolder, bool se
sendChunkList(); // send any remaining embedding chunks to llm
item.indexing = false;
item.installed = true;
item.outOfDate = false;

// Set the last update if we are done
if (item.startUpdate > item.lastUpdate && item.currentEmbeddingsToIndex == 0)
Expand Down Expand Up @@ -1599,6 +1601,41 @@ void Database::enqueueDocumentInternal(DocumentInfo &&info, bool prepend)
queue.insert(prepend ? queue.begin() : queue.end(), std::move(info));
}

bool Database::isOutOfDate(int folder_id, std::list<DocumentInfo> &&infos) const
{
for (auto &info : infos) {
// Update info
info.file.stat();

// If the doc has since been deleted or no longer readable, then we schedule more work and return
// leaving the cleanup for the cleanup handler
if (!info.file.exists() || !info.file.isReadable())
return true;

const qint64 document_time = info.file.fileTime(QFile::FileModificationTime).toMSecsSinceEpoch();
const QString document_path = info.file.canonicalFilePath();
const bool currentlyProcessing = info.currentlyProcessing;

// Check and see if we already have this document
QSqlQuery q(m_db);
int existing_id = -1;
qint64 existing_time = -1;
if (!selectDocument(q, document_path, &existing_id, &existing_time)) {
handleDocumentError("ERROR: Cannot select document", existing_id, document_path, q.lastError());
continue;
}

// If not, then we are out of date
if (existing_id == -1)
return true;

// If we have the document and the time does not match, then we are out of date
if (document_time != existing_time)
return true;
}
return false;
}

void Database::enqueueDocuments(int folder_id, std::list<DocumentInfo> &&infos)
{
// enqueue all documents
Expand Down Expand Up @@ -1766,7 +1803,7 @@ void Database::scanQueue()
return updateFolderToIndex(folder_id, countForFolder);
}

void Database::scanDocuments(int folder_id, const QString &folder_path)
void Database::scanDocuments(int folder_id, const QString &folder_path, bool forceIndexing)
{
#if defined(DEBUG)
qDebug() << "scanning folder for documents" << folder_path;
Expand All @@ -1791,9 +1828,14 @@ void Database::scanDocuments(int folder_id, const QString &folder_path)

if (!infos.empty()) {
CollectionItem item = guiCollectionItem(folder_id);
item.indexing = true;
const bool shouldIndex = m_automaticUpdate || forceIndexing;
if (shouldIndex)
item.indexing = true;
else
item.outOfDate = isOutOfDate(folder_id, std::move(infos));
updateGuiForCollectionItem(item);
enqueueDocuments(folder_id, std::move(infos));
if (shouldIndex)
enqueueDocuments(folder_id, std::move(infos));
} else {
updateFolderToIndex(folder_id, 0, false);
}
Expand Down Expand Up @@ -1847,7 +1889,7 @@ void Database::addCurrentFolders()
for (const auto &i : collections) {
if (!i.forceIndexing) {
addFolderToWatch(i.folder_path);
scanDocuments(i.folder_id, i.folder_path);
scanDocuments(i.folder_id, i.folder_path, false /*forceIndexing*/);
}
}

Expand Down Expand Up @@ -1982,7 +2024,7 @@ void Database::forceIndexing(const QString &collection, const QString &embedding
item.forceIndexing = false;
updateGuiForCollectionItem(item);
addFolderToWatch(folder.second);
scanDocuments(folder.first, folder.second);
scanDocuments(folder.first, folder.second, true /*forceIndexing*/);
}
}

Expand Down Expand Up @@ -2014,7 +2056,7 @@ void Database::forceRebuildFolder(const QString &path)
item.currentEmbeddingsToIndex = item.totalEmbeddingsToIndex = 0;
updateGuiForCollectionItem(item);

scanDocuments(folder_id, path);
scanDocuments(folder_id, path, true /*forceIndexing*/);
}

bool Database::addFolder(const QString &collection, const QString &path, const QString &embedding_model)
Expand Down Expand Up @@ -2058,7 +2100,7 @@ bool Database::addFolder(const QString &collection, const QString &path, const Q
// note: this is the existing embedding model if the collection was found
if (!item->embeddingModel.isNull()) {
addFolderToWatch(path);
scanDocuments(folder_id, path);
scanDocuments(folder_id, path, true /*forceIndexing*/);
}
}
return true;
Expand Down Expand Up @@ -2665,7 +2707,7 @@ void Database::changeFileExtensions(const QStringList &extensions)

for (const auto &i: std::as_const(collections)) {
if (!i.forceIndexing)
scanDocuments(i.folder_id, i.folder_path);
scanDocuments(i.folder_id, i.folder_path, false /*forceIndexing*/);
}
}

Expand Down Expand Up @@ -2702,6 +2744,13 @@ void Database::directoryChanged(const QString &path)
updateCollectionStatistics();

// Rescan the documents associated with the folder
if (folder_id != -1)
scanDocuments(folder_id, path);
if (folder_id != -1) {
if (m_automaticUpdate) {
scanDocuments(folder_id, path, false /*forceIndexing*/);
} else {
CollectionItem item = guiCollectionItem(folder_id);
item.outOfDate = true;
updateGuiForCollectionItem(item);
}
}
}
7 changes: 5 additions & 2 deletions gpt4all-chat/src/database.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ struct CollectionItem {
bool installed = false;
bool indexing = false;
bool forceIndexing = false;
bool outOfDate = false;
QString error;

// progress
Expand Down Expand Up @@ -189,7 +190,7 @@ class Database : public QObject
{
Q_OBJECT
public:
Database(int chunkSize, QStringList extensions);
Database(bool automaticUpdate, int chunkSize, QStringList extensions);
~Database() override;

bool isValid() const { return m_databaseValid; }
Expand All @@ -198,7 +199,7 @@ public Q_SLOTS:
void start();
bool scanQueueInterrupted() const;
void scanQueueBatch();
void scanDocuments(int folder_id, const QString &folder_path);
void scanDocuments(int folder_id, const QString &folder_path, bool forceIndexing);
void forceIndexing(const QString &collection, const QString &embedding_model);
void forceRebuildFolder(const QString &path);
bool addFolder(const QString &collection, const QString &path, const QString &embedding_model);
Expand Down Expand Up @@ -250,6 +251,7 @@ private Q_SLOTS:
DocumentInfo dequeueDocument();
void removeFolderFromDocumentQueue(int folder_id);
void enqueueDocumentInternal(DocumentInfo &&info, bool prepend = false);
bool isOutOfDate(int folder_id, std::list<DocumentInfo> &&infos) const;
void enqueueDocuments(int folder_id, std::list<DocumentInfo> &&infos);
void scanQueue();
bool cleanDB();
Expand Down Expand Up @@ -287,6 +289,7 @@ private Q_SLOTS:

private:
QSqlDatabase m_db;
bool m_automaticUpdate;
int m_chunkSize;
QStringList m_scannedFileExtensions;
QTimer *m_scanIntervalTimer;
Expand Down
3 changes: 2 additions & 1 deletion gpt4all-chat/src/localdocs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ LocalDocs::LocalDocs()
connect(MySettings::globalInstance(), &MySettings::localDocsFileExtensionsChanged, this, &LocalDocs::handleFileExtensionsChanged);

// Create the DB with the chunk size from settings
m_database = new Database(MySettings::globalInstance()->localDocsChunkSize(),
m_database = new Database(MySettings::globalInstance()->localDocsAutomaticUpdate(),
MySettings::globalInstance()->localDocsChunkSize(),
MySettings::globalInstance()->localDocsFileExtensions());

connect(this, &LocalDocs::requestStart, m_database,
Expand Down
5 changes: 5 additions & 0 deletions gpt4all-chat/src/localdocsmodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ QVariant LocalDocsModel::data(const QModelIndex &index, int role) const
return item.embeddingModel;
case UpdatingRole:
return item.indexing || item.currentEmbeddingsToIndex != 0;
case OutOfDateRole:
return item.outOfDate;
}

return QVariant();
Expand Down Expand Up @@ -149,6 +151,7 @@ QHash<int, QByteArray> LocalDocsModel::roleNames() const
roles[FileCurrentlyProcessingRole] = "fileCurrentlyProcessing";
roles[EmbeddingModelRole] = "embeddingModel";
roles[UpdatingRole] = "updating";
roles[OutOfDateRole] = "outOfDate";
return roles;
}

Expand Down Expand Up @@ -200,6 +203,8 @@ void LocalDocsModel::updateCollectionItem(const CollectionItem &item)
changed.append(FileCurrentlyProcessingRole);
if (stored.embeddingModel != item.embeddingModel)
changed.append(EmbeddingModelRole);
if (stored.outOfDate != item.outOfDate)
changed.append(OutOfDateRole);

// preserve collection name as we ignore it for matching
QString collection = stored.collection;
Expand Down
3 changes: 2 additions & 1 deletion gpt4all-chat/src/localdocsmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ class LocalDocsModel : public QAbstractListModel
LastUpdateRole,
FileCurrentlyProcessingRole,
EmbeddingModelRole,
UpdatingRole
UpdatingRole,
OutOfDateRole
};

explicit LocalDocsModel(QObject *parent = nullptr);
Expand Down
4 changes: 4 additions & 0 deletions gpt4all-chat/src/mysettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ static const QVariantMap basicDefaults {
{ "localdocs/useRemoteEmbed", false },
{ "localdocs/nomicAPIKey", "" },
{ "localdocs/embedDevice", "Auto" },
{ "localdocs/automaticUpdate",false },
{ "network/attribution", "" },
};

Expand Down Expand Up @@ -224,6 +225,7 @@ void MySettings::restoreLocalDocsDefaults()
setLocalDocsUseRemoteEmbed(basicDefaults.value("localdocs/useRemoteEmbed").toBool());
setLocalDocsNomicAPIKey(basicDefaults.value("localdocs/nomicAPIKey").toString());
setLocalDocsEmbedDevice(basicDefaults.value("localdocs/embedDevice").toString());
setLocalDocsAutomaticUpdate(basicDefaults.value("localdocs/automaticUpdate").toBool());
}

void MySettings::eraseModel(const ModelInfo &info)
Expand Down Expand Up @@ -455,6 +457,7 @@ QStringList MySettings::localDocsFileExtensions() const { return getBasicSetting
bool MySettings::localDocsUseRemoteEmbed() const { return getBasicSetting("localdocs/useRemoteEmbed").toBool(); }
QString MySettings::localDocsNomicAPIKey() const { return getBasicSetting("localdocs/nomicAPIKey" ).toString(); }
QString MySettings::localDocsEmbedDevice() const { return getBasicSetting("localdocs/embedDevice" ).toString(); }
bool MySettings::localDocsAutomaticUpdate() const{ return getBasicSetting("localdocs/automaticUpdate").toBool(); }
QString MySettings::networkAttribution() const { return getBasicSetting("network/attribution" ).toString(); }

ChatTheme MySettings::chatTheme() const { return ChatTheme (getEnumSetting("chatTheme", chatThemeNames)); }
Expand All @@ -473,6 +476,7 @@ void MySettings::setLocalDocsFileExtensions(const QStringList &value) { setBasic
void MySettings::setLocalDocsUseRemoteEmbed(bool value) { setBasicSetting("localdocs/useRemoteEmbed", value, "localDocsUseRemoteEmbed"); }
void MySettings::setLocalDocsNomicAPIKey(const QString &value) { setBasicSetting("localdocs/nomicAPIKey", value, "localDocsNomicAPIKey"); }
void MySettings::setLocalDocsEmbedDevice(const QString &value) { setBasicSetting("localdocs/embedDevice", value, "localDocsEmbedDevice"); }
void MySettings::setLocalDocsAutomaticUpdate(bool value) { setBasicSetting("localdocs/automaticUpdate",value, "localDocsAutomaticUpdate"); }
void MySettings::setNetworkAttribution(const QString &value) { setBasicSetting("network/attribution", value, "networkAttribution"); }

void MySettings::setChatTheme(ChatTheme value) { setBasicSetting("chatTheme", chatThemeNames .value(int(value))); }
Expand Down
4 changes: 4 additions & 0 deletions gpt4all-chat/src/mysettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class MySettings : public QObject
Q_PROPERTY(bool localDocsUseRemoteEmbed READ localDocsUseRemoteEmbed WRITE setLocalDocsUseRemoteEmbed NOTIFY localDocsUseRemoteEmbedChanged)
Q_PROPERTY(QString localDocsNomicAPIKey READ localDocsNomicAPIKey WRITE setLocalDocsNomicAPIKey NOTIFY localDocsNomicAPIKeyChanged)
Q_PROPERTY(QString localDocsEmbedDevice READ localDocsEmbedDevice WRITE setLocalDocsEmbedDevice NOTIFY localDocsEmbedDeviceChanged)
Q_PROPERTY(bool localDocsAutomaticUpdate READ localDocsAutomaticUpdate WRITE setLocalDocsAutomaticUpdate NOTIFY localDocsAutomaticUpdateChanged)
Q_PROPERTY(QString networkAttribution READ networkAttribution WRITE setNetworkAttribution NOTIFY networkAttributionChanged)
Q_PROPERTY(bool networkIsActive READ networkIsActive WRITE setNetworkIsActive NOTIFY networkIsActiveChanged)
Q_PROPERTY(bool networkUsageStatsActive READ networkUsageStatsActive WRITE setNetworkUsageStatsActive NOTIFY networkUsageStatsActiveChanged)
Expand Down Expand Up @@ -185,6 +186,8 @@ class MySettings : public QObject
void setLocalDocsNomicAPIKey(const QString &value);
QString localDocsEmbedDevice() const;
void setLocalDocsEmbedDevice(const QString &value);
bool localDocsAutomaticUpdate() const;
void setLocalDocsAutomaticUpdate(bool value);

// Network settings
QString networkAttribution() const;
Expand Down Expand Up @@ -232,6 +235,7 @@ class MySettings : public QObject
void localDocsUseRemoteEmbedChanged();
void localDocsNomicAPIKeyChanged();
void localDocsEmbedDeviceChanged();
void localDocsAutomaticUpdateChanged();
void networkAttributionChanged();
void networkIsActiveChanged();
void networkPortChanged();
Expand Down