Skip to content

Commit

Permalink
feat: getCoreContentText for any websites
Browse files Browse the repository at this point in the history
  • Loading branch information
josStorer committed Mar 25, 2023
1 parent ca32f87 commit 727ff2f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
45 changes: 45 additions & 0 deletions src/utils/get-core-content-text.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
function getArea(e) {
const rect = e.getBoundingClientRect()
return rect.width * rect.height
}

function findLargestElement(e) {
let maxArea = 0
let largestElement = null
const limitedArea = 0.8 * getArea(e)

function traverseDOM(node) {
if (node.nodeType === Node.ELEMENT_NODE) {
const area = getArea(node)

if (area > maxArea && area < limitedArea) {
maxArea = area
largestElement = node
}

Array.from(node.children).forEach(traverseDOM)
}
}

traverseDOM(e)
return largestElement
}

export function getCoreContentText() {
const largestElement = findLargestElement(document.body)
const secondLargestElement = findLargestElement(largestElement)
console.log(largestElement)
console.log(secondLargestElement)

if (!largestElement) return

let ret
if (secondLargestElement && getArea(secondLargestElement) > 0.5 * getArea(largestElement)) {
ret = secondLargestElement.textContent
console.log('use second')
} else {
ret = largestElement.textContent
console.log('use first')
}
return ret.trim().replaceAll(' ', '').replaceAll('\n\n', '').replaceAll(',,', '')
}
1 change: 1 addition & 0 deletions src/utils/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export * from './ends-with-question-mark'
export * from './fetch-sse'
export * from './get-client-position'
export * from './get-conversation-pairs'
export * from './get-core-content-text'
export * from './get-possible-element-by-query-selector'
export * from './init-session'
export * from './is-firefox.mjs'
Expand Down

0 comments on commit 727ff2f

Please sign in to comment.