-
-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Christian Kellner
authored and
Christian Kellner
committed
Jun 12, 2024
1 parent
b075e09
commit 06c4ebb
Showing
2 changed files
with
35 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,47 @@ | ||
import utils from '../utils.js'; | ||
|
||
let appliedBlackList = []; | ||
|
||
function normalize(o) { | ||
const id = o.id.substring(o.id.indexOf('-') + 1, o.id.length); | ||
const size = o.size || 'N/A m²'; | ||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €'); | ||
const address = o.address || 'No address available'; | ||
const title = o.title || 'No title available'; | ||
const link = `https://immo.swp.de/immobilien/${id}`; | ||
const description = o.description; | ||
return Object.assign(o, { id, address, price, size, title, link, description }); | ||
const id = o.id.substring(o.id.indexOf('-') + 1, o.id.length); | ||
const size = o.size || 'N/A m²'; | ||
const price = (o.price || '--- €').replace('Preis auf Anfrage', '--- €'); | ||
const title = o.title || 'No title available'; | ||
const link = `https://immo.swp.de/immobilien/${id}`; | ||
const description = o.description; | ||
return Object.assign(o, {id, price, size, title, link, description}); | ||
} | ||
|
||
function applyBlacklist(o) { | ||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList); | ||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList); | ||
return titleNotBlacklisted && descNotBlacklisted; | ||
const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList); | ||
const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList); | ||
return titleNotBlacklisted && descNotBlacklisted; | ||
} | ||
|
||
const config = { | ||
url: null, | ||
crawlContainer: '.js-serp-item', | ||
sortByDateParam: 's=most_recently_updated_first', | ||
crawlFields: { | ||
id: '@id', | ||
price: 'div.item__spec.item-spec-price | trim', | ||
size: 'div.item__spec.item-spec-area | trim', | ||
title: 'a.js-item-title-link@title', | ||
address: 'div.item__locality | removeNewline | trim', | ||
description: 'div.item__main-info-points.clearfix p small | removeNewline | trim', | ||
}, | ||
paginate: 'li.page-item.pagination__item a.page-link@href', | ||
normalize: normalize, | ||
filter: applyBlacklist, | ||
url: null, | ||
crawlContainer: '.js-serp-item', | ||
sortByDateParam: 's=most_recently_updated_first', | ||
crawlFields: { | ||
id: '.js-bookmark-btn@data-id', | ||
price: 'div.align-items-start div:first-child | trim', | ||
size: 'div.align-items-start div:nth-child(3) | trim', | ||
title: '.card-title h2 | trim', | ||
link: '.ci-search-result__link@href', | ||
description: '.js-show-more-item-sm | removeNewline | trim', | ||
}, | ||
paginate: 'li.page-item.pagination__item a.page-link@href', | ||
normalize: normalize, | ||
filter: applyBlacklist, | ||
}; | ||
export const init = (sourceConfig, blacklist) => { | ||
config.enabled = sourceConfig.enabled; | ||
config.url = sourceConfig.url; | ||
appliedBlackList = blacklist || []; | ||
config.enabled = sourceConfig.enabled; | ||
config.url = sourceConfig.url; | ||
appliedBlackList = blacklist || []; | ||
}; | ||
export const metaInformation = { | ||
name: 'Immo Südwest Presse', | ||
baseUrl: 'https://immo.swp.de/', | ||
id: 'immoswp', | ||
name: 'Immo Südwest Presse', | ||
baseUrl: 'https://immo.swp.de/', | ||
id: 'immoswp', | ||
}; | ||
export { config }; | ||
export {config}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters