Skip to content

Commit

Permalink
langchain[patch]: fix: make GithubRepoLoader robust against .gitmodul…
Browse files Browse the repository at this point in the history
…es files w/o newline (langchain-ai#5045)

* fix: make GithubRepoLoader robust against .gitmodules files without line ending

* fixed linting issue: no parameter reassign
  • Loading branch information
the-powerpointer authored Apr 11, 2024
1 parent a993c92 commit b9f6325
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion langchain/src/document_loaders/web/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,13 +312,18 @@ export class GithubRepoLoader
private async parseGitmodules(
gitmodulesContent: string
): Promise<SubmoduleInfo[]> {
let validGitmodulesContent = gitmodulesContent;
// in case the .gitmodules file does not end with a newline, we add one to make the regex work
if (!validGitmodulesContent.endsWith("\n")) {
validGitmodulesContent += "\n";
}
// catches the initial line of submodule entries
const submodulePattern = /\[submodule "(.*?)"]\n((\s+.*?\s*=\s*.*?\n)*)/g;
// catches the properties of a submodule
const keyValuePattern = /\s+(.*?)\s*=\s*(.*?)\s/g;

const submoduleInfos = [];
for (const [, name, propertyLines] of gitmodulesContent.matchAll(
for (const [, name, propertyLines] of validGitmodulesContent.matchAll(
submodulePattern
)) {
if (!name || !propertyLines) {
Expand Down

0 comments on commit b9f6325

Please sign in to comment.