generated from obsidianmd/obsidian-sample-plugin
-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #91 from khaosdoctor/fix/general
General fixes and updates
- Loading branch information
Showing
4 changed files
with
275 additions
and
332 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,133 +1,54 @@ | ||
const electronPkg = require("electron"); | ||
import { request } from "obsidian"; | ||
import { requestUrl } from 'obsidian' | ||
|
||
function blank(text: string): boolean { | ||
return text === undefined || text === null || text === ""; | ||
return text === undefined || text === null || text === '' | ||
} | ||
|
||
function notBlank(text: string): boolean { | ||
return !blank(text); | ||
return !blank(text) | ||
} | ||
|
||
// async wrapper to load a url and settle on load finish or fail | ||
async function load(window: any, url: string): Promise<void> { | ||
return new Promise<void>((resolve, reject) => { | ||
window.webContents.on("did-finish-load", (event: any) => resolve(event)); | ||
window.webContents.on("did-fail-load", (event: any) => reject(event)); | ||
window.loadURL(url); | ||
}); | ||
} | ||
|
||
async function electronGetPageTitle(url: string): Promise<string> { | ||
const { remote } = electronPkg; | ||
const { BrowserWindow } = remote; | ||
|
||
try { | ||
const window = new BrowserWindow({ | ||
width: 1000, | ||
height: 600, | ||
webPreferences: { | ||
webSecurity: false, | ||
nodeIntegration: true, | ||
images: false, | ||
}, | ||
show: false, | ||
}); | ||
window.webContents.setAudioMuted(true); | ||
|
||
await load(window, url); | ||
|
||
try { | ||
const title = window.webContents.getTitle(); | ||
window.destroy(); | ||
|
||
if (notBlank(title)) { | ||
return title; | ||
} else { | ||
return url; | ||
} | ||
} catch (ex) { | ||
window.destroy(); | ||
return url; | ||
} | ||
} catch (ex) { | ||
console.error(ex); | ||
return "Site Unreachable"; | ||
} | ||
} | ||
|
||
async function nonElectronGetPageTitle(url: string): Promise<string> { | ||
async function scrape(url: string): Promise<string> { | ||
try { | ||
const html = await request({ url }); | ||
const response = await requestUrl(url) | ||
if (!response.headers['content-type'].includes('text/html')) return getUrlFinalSegment(url) | ||
const html = response.text | ||
|
||
const doc = new DOMParser().parseFromString(html, "text/html"); | ||
const title = doc.querySelectorAll("title")[0]; | ||
const doc = new DOMParser().parseFromString(html, 'text/html') | ||
const title = doc.querySelector('title') | ||
|
||
if (title == null || blank(title?.innerText)) { | ||
if (blank(title?.innerText)) { | ||
// If site is javascript based and has a no-title attribute when unloaded, use it. | ||
var noTitle = title?.getAttr("no-title"); | ||
var noTitle = title?.getAttr('no-title') | ||
if (notBlank(noTitle)) { | ||
return noTitle; | ||
return noTitle | ||
} | ||
|
||
// Otherwise if the site has no title/requires javascript simply return Title Unknown | ||
return url; | ||
return url | ||
} | ||
|
||
return title.innerText; | ||
return title.innerText | ||
} catch (ex) { | ||
console.error(ex); | ||
|
||
return "Site Unreachable"; | ||
console.error(ex) | ||
return 'Site Unreachable' | ||
} | ||
} | ||
|
||
function getUrlFinalSegment(url: string): string { | ||
try { | ||
const segments = new URL(url).pathname.split('/'); | ||
const last = segments.pop() || segments.pop(); // Handle potential trailing slash | ||
return last; | ||
const segments = new URL(url).pathname.split('/') | ||
const last = segments.pop() || segments.pop() // Handle potential trailing slash | ||
return last | ||
} catch (_) { | ||
return "File" | ||
} | ||
} | ||
|
||
async function tryGetFileType(url: string) { | ||
try { | ||
const response = await fetch(url, { method: "HEAD" }); | ||
|
||
// Ensure site returns an ok status code before scraping | ||
if (!response.ok) { | ||
return "Site Unreachable"; | ||
} | ||
|
||
// Ensure site is an actual HTML page and not a pdf or 3 gigabyte video file. | ||
let contentType = response.headers.get("content-type"); | ||
if (!contentType.includes("text/html")) { | ||
return getUrlFinalSegment(url); | ||
} | ||
return null; | ||
} catch (err) { | ||
return null; | ||
return 'File' | ||
} | ||
} | ||
|
||
export default async function getPageTitle(url: string): Promise<string> { | ||
// If we're on Desktop use the Electron scraper | ||
if (!(url.startsWith("http") || url.startsWith("https"))) { | ||
url = "https://" + url; | ||
export default async function getPageTitle(url: string) { | ||
if (!(url.startsWith('http') || url.startsWith('https'))) { | ||
url = 'https://' + url | ||
} | ||
|
||
// Try to do a HEAD request to see if the site is reachable and if it's an HTML page | ||
// If we error out due to CORS, we'll just try to scrape the page anyway. | ||
let fileType = await tryGetFileType(url); | ||
if (fileType) { | ||
return fileType; | ||
} | ||
|
||
if (electronPkg != null) { | ||
return electronGetPageTitle(url); | ||
} else { | ||
return nonElectronGetPageTitle(url); | ||
} | ||
return scrape(url) | ||
} |
Oops, something went wrong.