Skip to content

Commit

Permalink
Merge pull request #91 from khaosdoctor/fix/general
Browse files Browse the repository at this point in the history
General fixes and updates
  • Loading branch information
zolrath authored Jan 4, 2024
2 parents 48e7aae + b4aa46f commit 0368e49
Show file tree
Hide file tree
Showing 4 changed files with 275 additions and 332 deletions.
34 changes: 14 additions & 20 deletions main.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import { EditorExtensions } from "editor-enhancements";
import { Plugin, MarkdownView, Editor } from "obsidian";
import { CheckIf } from "checkif"
import { EditorExtensions } from "editor-enhancements"
import { Editor, Plugin } from "obsidian"
import getPageTitle from "scraper"
import {
AutoLinkTitleSettings,
AutoLinkTitleSettingTab,
AutoLinkTitleSettings,
DEFAULT_SETTINGS,
} from "./settings";
import { CheckIf } from "checkif";
import getPageTitle from "scraper";
} from "./settings"
import { randomBytes } from 'crypto'

interface PasteFunction {
(this: HTMLElement, ev: ClipboardEvent): void;
Expand Down Expand Up @@ -76,8 +77,8 @@ export default class AutoLinkTitle extends Plugin {
}
// If the cursor is on the URL part of a markdown link, fetch title and replace existing link title
else if (CheckIf.isLinkedUrl(selectedText)) {
var link = this.getUrlFromLink(selectedText);
this.convertUrlToTitledLink(editor, link);
const link = this.getUrlFromLink(selectedText)
this.convertUrlToTitledLink(editor, link)
}
}

Expand All @@ -91,15 +92,15 @@ export default class AutoLinkTitle extends Plugin {

// Simulate standard paste but using editor.replaceSelection with clipboard text since we can't seem to dispatch a paste event.
async manualPasteUrlWithTitle(editor: Editor): Promise<void> {
const clipboardText = await navigator.clipboard.readText()

// Only attempt fetch if online
if (!navigator.onLine) {
editor.replaceSelection(clipboardText);
return;
}

var clipboardText = await navigator.clipboard.readText();
if (clipboardText == null || clipboardText == "") return;
if (clipboardText == null || clipboardText == '') return

// If its not a URL, we return false to allow the default paste handler to take care of it.
// Similarly, image urls don't have a meaningful <title> attribute so downloading it
Expand Down Expand Up @@ -220,8 +221,8 @@ export default class AutoLinkTitle extends Plugin {
const title = await getPageTitle(url);
return title.replace(/(\r\n|\n|\r)/gm, "").trim();
} catch (error) {
// console.error(error)
return "Site Unreachable";
console.error(error)
return 'Error fetching title'
}
}

Expand All @@ -230,15 +231,8 @@ export default class AutoLinkTitle extends Plugin {
return urlRegex.exec(link)[2];
}

// Custom hashid by @shabegom
private createBlockHash(): string {
let result = "";
var characters = "abcdefghijklmnopqrstuvwxyz0123456789";
var charactersLength = characters.length;
for (var i = 0; i < 4; i++) {
result += characters.charAt(Math.floor(Math.random() * charactersLength));
}
return result;
return randomBytes(6).toString('hex')
}

onunload() {
Expand Down
7 changes: 3 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
"@rollup/plugin-node-resolve": "^11.2.1",
"@rollup/plugin-typescript": "^8.2.1",
"@types/node": "^14.14.37",
"obsidian": "^0.12.17",
"obsidian": "^1.4.11",
"rollup": "^2.32.1",
"tslib": "^2.2.0",
"typescript": "^4.2.4"
},
"dependencies": {}
"typescript": "^5.2.2"
}
}
127 changes: 24 additions & 103 deletions scraper.ts
Original file line number Diff line number Diff line change
@@ -1,133 +1,54 @@
const electronPkg = require("electron");
import { request } from "obsidian";
import { requestUrl } from 'obsidian'

function blank(text: string): boolean {
return text === undefined || text === null || text === "";
return text === undefined || text === null || text === ''
}

function notBlank(text: string): boolean {
return !blank(text);
return !blank(text)
}

// async wrapper to load a url and settle on load finish or fail
async function load(window: any, url: string): Promise<void> {
return new Promise<void>((resolve, reject) => {
window.webContents.on("did-finish-load", (event: any) => resolve(event));
window.webContents.on("did-fail-load", (event: any) => reject(event));
window.loadURL(url);
});
}

async function electronGetPageTitle(url: string): Promise<string> {
const { remote } = electronPkg;
const { BrowserWindow } = remote;

try {
const window = new BrowserWindow({
width: 1000,
height: 600,
webPreferences: {
webSecurity: false,
nodeIntegration: true,
images: false,
},
show: false,
});
window.webContents.setAudioMuted(true);

await load(window, url);

try {
const title = window.webContents.getTitle();
window.destroy();

if (notBlank(title)) {
return title;
} else {
return url;
}
} catch (ex) {
window.destroy();
return url;
}
} catch (ex) {
console.error(ex);
return "Site Unreachable";
}
}

async function nonElectronGetPageTitle(url: string): Promise<string> {
async function scrape(url: string): Promise<string> {
try {
const html = await request({ url });
const response = await requestUrl(url)
if (!response.headers['content-type'].includes('text/html')) return getUrlFinalSegment(url)
const html = response.text

const doc = new DOMParser().parseFromString(html, "text/html");
const title = doc.querySelectorAll("title")[0];
const doc = new DOMParser().parseFromString(html, 'text/html')
const title = doc.querySelector('title')

if (title == null || blank(title?.innerText)) {
if (blank(title?.innerText)) {
// If site is javascript based and has a no-title attribute when unloaded, use it.
var noTitle = title?.getAttr("no-title");
var noTitle = title?.getAttr('no-title')
if (notBlank(noTitle)) {
return noTitle;
return noTitle
}

// Otherwise if the site has no title/requires javascript simply return Title Unknown
return url;
return url
}

return title.innerText;
return title.innerText
} catch (ex) {
console.error(ex);

return "Site Unreachable";
console.error(ex)
return 'Site Unreachable'
}
}

function getUrlFinalSegment(url: string): string {
try {
const segments = new URL(url).pathname.split('/');
const last = segments.pop() || segments.pop(); // Handle potential trailing slash
return last;
const segments = new URL(url).pathname.split('/')
const last = segments.pop() || segments.pop() // Handle potential trailing slash
return last
} catch (_) {
return "File"
}
}

async function tryGetFileType(url: string) {
try {
const response = await fetch(url, { method: "HEAD" });

// Ensure site returns an ok status code before scraping
if (!response.ok) {
return "Site Unreachable";
}

// Ensure site is an actual HTML page and not a pdf or 3 gigabyte video file.
let contentType = response.headers.get("content-type");
if (!contentType.includes("text/html")) {
return getUrlFinalSegment(url);
}
return null;
} catch (err) {
return null;
return 'File'
}
}

export default async function getPageTitle(url: string): Promise<string> {
// If we're on Desktop use the Electron scraper
if (!(url.startsWith("http") || url.startsWith("https"))) {
url = "https://" + url;
export default async function getPageTitle(url: string) {
if (!(url.startsWith('http') || url.startsWith('https'))) {
url = 'https://' + url
}

// Try to do a HEAD request to see if the site is reachable and if it's an HTML page
// If we error out due to CORS, we'll just try to scrape the page anyway.
let fileType = await tryGetFileType(url);
if (fileType) {
return fileType;
}

if (electronPkg != null) {
return electronGetPageTitle(url);
} else {
return nonElectronGetPageTitle(url);
}
return scrape(url)
}
Loading

0 comments on commit 0368e49

Please sign in to comment.