diff --git a/lib/ad.ts b/lib/ad.ts index 284264e..b8d4bf0 100644 --- a/lib/ad.ts +++ b/lib/ad.ts @@ -26,7 +26,7 @@ export class Ad extends AdInfo { * (e.g., storing ad URLs entered by a user for delayed scraping). * * `Ad.isScraped()` returns `false` for `Ad` objects constructed in this - * way unless `scraped` is passed as `true`or they are subsequently scraped + * way unless `scraped` is passed as `true` or they are subsequently scraped * by calling `Ad.scrape()`, which causes the scraper to replace the ad's * information with what is found at its URL. * diff --git a/lib/backends/html-searcher.ts b/lib/backends/html-searcher.ts index 2bf936c..84a97ae 100644 --- a/lib/backends/html-searcher.ts +++ b/lib/backends/html-searcher.ts @@ -10,100 +10,55 @@ import { Ad } from "../ad"; import { BANNED, HTML_REQUEST_HEADERS, POSSIBLE_BAD_MARKUP } from "../constants"; import { AdInfo } from "../scraper"; import { PageResults, ResolvedSearchParameters } from "../search"; +import { getLargeImageURL } from "../helpers"; const KIJIJI_BASE_URL = "https://www.kijiji.ca"; const KIJIJI_SEARCH_URL = KIJIJI_BASE_URL + "/b-search.html"; -const IMG_REGEX = /\/s\-l\d+\.jpg$/; const LOCATION_REGEX = /(.+)(\/.*)$/; -/* Converts a date from a Kijiji ad result into a date object - (e.g., "< x hours ago", "yesterday", "dd/mm/yyyy") */ - function dateFromRelativeDateString(dateString: string): Date { - if (dateString) { - dateString = dateString.toLowerCase().replace(/\//g, " "); - - const split = dateString.split(" "); - const d = new Date(); - - if (split.length === 3) { - // dd/mm/yyyy format - d.setHours(0, 0, 0, 0); - d.setDate(parseInt(split[0])); - d.setMonth(parseInt(split[1]) - 1); - d.setFullYear(parseInt(split[2])); - return d; - } else if (split.length === 4) { - // "< x hours/minutes ago" format - const num = parseInt(split[1]); - const timeUnit = split[2]; - - if (timeUnit === "minutes") { - d.setMinutes(d.getMinutes() - num); - d.setSeconds(0, 0); - } else if (timeUnit === "hours") { - d.setHours(d.getHours() - num, 0, 0, 0); - } - return d; - } else if (dateString == "yesterday") { - d.setDate(d.getDate() - 1); - d.setHours(0, 0, 0, 0); - return d; - } - } - return new Date(NaN); -} - /* Extracts ad information from the HTML of a Kijiji ad results page */ function parseResultsHTML(html: string): Ad[] { const adResults: Ad[] = []; const $ = cheerio.load(html); - // Get info for each ad - const allAdElements = $(".regular-ad"); - const filteredAdElements = allAdElements.not(".third-party"); + if (html.trim().length === 0) { + return adResults; + } - filteredAdElements.each((_i, item) => { - const path = $(item).find("a.title").attr("href"); - const url = KIJIJI_BASE_URL + path; - const info: Partial = { - id: $(item).data("listing-id")?.toString() || "", - - title: $(item).find("a.title").text().trim(), - - image: ( - // `data-src` contains the URL of the image to lazy load - // - // `src` starts off with a placeholder image and will - // remain if the ad has no image - $(item).find(".image img").data("src") || $(item).find(".image img").attr("src") || "" - ).replace(IMG_REGEX, "/s-l2000.jpg"), - - date: dateFromRelativeDateString( - // For some reason, some categories (like anything under - // SERVICES) use different markup than usual - // - // The string split is needed to handle: - // - // - // Some date - //
- // Some location - // - // - // AKA "Some date\nSome location" - ($(item).find(".date-posted").text() || $(item).find(".posted").text()).trim().split("\n")[0] - ), - - // Pick a format, Kijiji - description: ($(item).find(".description > p").text() || $(item).find(".description").text()).trim() - }; + // Kijiji is nice and gives us an object containing ad info + const resultJson = $("script#__NEXT_DATA__").text().trim(); + if (!resultJson) { + throw new Error(`Kijiji result JSON not present. ${POSSIBLE_BAD_MARKUP}`); + } - if (!path) { - throw new Error(`Result ad has no URL. ${POSSIBLE_BAD_MARKUP}`); + const allAds: any[] | undefined = JSON.parse(resultJson) + .props + ?.pageProps + ?.listings; + if (allAds === undefined) { + throw new Error(`Result JSON could not be parsed. ${POSSIBLE_BAD_MARKUP}`); + } + + // All non-sponsored ads + const filteredAds = allAds.filter(ad => ad.adSource === "ORGANIC"); + + for (const ad of filteredAds) { + if (!ad.seoUrl || !ad.id || !ad.title || !ad.activationDate) { + throw new Error(`Result ad could not be parsed. ${POSSIBLE_BAD_MARKUP}`); } + const url = KIJIJI_BASE_URL + ad.seoUrl; + const info: Partial = { + id: ad.id, + title: ad.title.trim(), + image: getLargeImageURL((ad.imageUrls || [])[0] || ""), + date: new Date(ad.activationDate), + description: (ad.description || "").trim() + }; + adResults.push(new Ad(url, info)); - }); + } + return adResults; } @@ -154,7 +109,7 @@ export class HTMLSearcher { }) .then(body => ({ pageResults: parseResultsHTML(body), - isLastPage: body.indexOf('"isLastPage":true') !== -1 + isLastPage: body.indexOf("pagination-next-link") === -1 })); } } \ No newline at end of file diff --git a/lib/backends/test/api-scraper.spec.ts b/lib/backends/test/api-scraper.spec.ts index ecdfa10..d6447cc 100644 --- a/lib/backends/test/api-scraper.spec.ts +++ b/lib/backends/test/api-scraper.spec.ts @@ -133,8 +133,8 @@ describe("Ad API scraper", () => { it.each` test | xml ${"Bad markup"} | ${"Bad markup"} - ${"Missing id"} | ${createAdXML({})} - ${"Missing title"} | ${createAdXML({ id: "123" })} + ${"Missing ID"} | ${createAdXML({ title: "My ad title", date: new Date() })} + ${"Missing title"} | ${createAdXML({ id: "123", date: new Date() })} ${"Missing date"} | ${createAdXML({ id: "123", title: "My ad title" })} `("should fail to scrape invalid XML ($test)", async ({ xml }) => { mockResponse(xml); diff --git a/lib/backends/test/api-searcher.spec.ts b/lib/backends/test/api-searcher.spec.ts index 545a790..c981549 100644 --- a/lib/backends/test/api-searcher.spec.ts +++ b/lib/backends/test/api-searcher.spec.ts @@ -16,13 +16,13 @@ describe("Search result API scraper", () => { }); type MockAdInfo = { - url?: string; - id?: string; - title?: string; - date?: Date; + url: string; + id: string; + title: string; + date: Date; }; - const createAdXML = (info: MockAdInfo) => { + const createAdXML = (info: Partial) => { return ` ${info.url ? `` : ""} diff --git a/lib/backends/test/html-scraper.spec.ts b/lib/backends/test/html-scraper.spec.ts index c0f695e..ed76ea5 100644 --- a/lib/backends/test/html-scraper.spec.ts +++ b/lib/backends/test/html-scraper.spec.ts @@ -67,8 +67,8 @@ describe("Ad HTML scraper", () => { ${"Missing config property"} | ${createAdHTML({ abc: 123 })} ${"Missing adInfo property"} | ${createAdHTML({ config: {} })} ${"Missing VIP property"} | ${createAdHTML({ config: { adInfo: {} } })} - ${"Missing ID"} | ${createAdHTML({ config: { adInfo: {}, VIP: {} } })} - ${"Missing title"} | ${createAdHTML({ config: { adInfo: {}, VIP: { adId: 1234 } } })} + ${"Missing ID"} | ${createAdHTML({ config: { adInfo: { title: "Test" }, VIP: { sortingDate: 0 } } })} + ${"Missing title"} | ${createAdHTML({ config: { adInfo: {}, VIP: { adId: 1234, sortingDate: 0 } } })} ${"Missing date"} | ${createAdHTML({ config: { adInfo: { title: "Test" }, VIP: { adId: 1234 } } })} `("should fail to scrape invalid HTML ($test)", async ({ html }) => { mockResponse(html); diff --git a/lib/backends/test/html-searcher.spec.ts b/lib/backends/test/html-searcher.spec.ts index 5d4a5bf..50021ab 100644 --- a/lib/backends/test/html-searcher.spec.ts +++ b/lib/backends/test/html-searcher.spec.ts @@ -4,102 +4,41 @@ import fetch from "node-fetch"; import qs from "querystring"; import { ResolvedSearchParameters, SearchParameters } from "../../search"; import { HTMLSearcher } from "../html-searcher"; +import * as helpers from "../../helpers"; -type ResultInfo = { - isFeatured: boolean; - isThirdParty: boolean; +type MockListing = { + seoUrl: string; + id: string; title: string; - path: string; description: string; - imageAttributes: string; - datePosted: string; - id: string; -}; - -const defaultResultInfo: ResultInfo = { - isFeatured: false, - isThirdParty: false, - title: "", - path: "/someAd", - description: "", - imageAttributes: "", - datePosted: "", - id: "" -}; - -// Result pages in most categories use this markup -const createStandardResultHTML = (info: Partial): string => { - info = { ...defaultResultInfo, ...info }; - - return ` -
-
-
-
- -
- -
-
- - -
- Some location - ${info.datePosted} -
- -
${info.description}
-
-
-
-
-
- `; + imageUrls: string[]; + activationDate: string; + adSource: string; +} + +const createResultInfo = (listings: Partial[] = []) => { + return { + props: { + pageProps: { + listings + } + } + }; }; -// For some reason, some categories (like anything under -// SERVICES) use different markup classes than usual -const createServiceResultHTML = (info: Partial): string => { - info = { ...defaultResultInfo, ...info }; - +const createResultHTML = (resultInfo: any = createResultInfo()) => { return ` - - - - - - - - - - -
- ${info.title} -

${info.description}

-
-
- -
-
- ${info.datePosted}
- Some location -
+ + + + + `; }; -describe.each` - markup | createResultHTML - ${"standard result page markup"} | ${createStandardResultHTML} - ${"service result page markup"} | ${createServiceResultHTML} -`("Search result HTML scraper ($markup)", ({ createResultHTML }) => { +describe("Search result HTML scraper", () => { const fetchSpy = fetch as any as jest.Mock; afterEach(() => { @@ -161,7 +100,7 @@ describe.each` validateRequestHeaders(); } }); - + describe("search parameters", () => { it("should pass all defined params in search URL", async () => { const params = { @@ -173,7 +112,7 @@ describe.each` }; fetchSpy.mockResolvedValueOnce({ status: 200, url: "http://example.com/search/results" }); - fetchSpy.mockResolvedValueOnce({ text: () => "" }); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML() }); await search(params); @@ -207,8 +146,8 @@ describe.each` it("should be used for pagination", async () => { fetchSpy.mockResolvedValueOnce({ status: 200, url: "http://example.com/search/results" }); - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({}) }); - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({}) }); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML() }); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML() }); const searcher = new HTMLSearcher(); await searcher.getPageResults({ locationId: 0, categoryId: 0 }, 1); @@ -226,60 +165,24 @@ describe.each` }); describe("result page scraping", () => { - // Helpers for date tests - const nanDataValidator = (date: Date) => { - expect(Number.isNaN(date.getTime())).toBe(true); - }; - const makeSpecificDateValidator = (month: number, day: number, year: number) => { - return (date: Date) => { - const d = new Date(); - d.setMonth(month - 1); - d.setDate(day); - d.setFullYear(year); - d.setHours(0, 0, 0, 0); - - expect(date).toEqual(d); - } - }; - const makeMinutesAgoValidator = (minutes: number) => { - return (date: Date) => { - const minutesAgo = new Date(); - minutesAgo.setMinutes(minutesAgo.getMinutes() - minutes, 0, 0); - - expect(date).toEqual(minutesAgo); - } - }; - const makeHoursAgoValidator = (hours: number) => { - return (date: Date) => { - const hoursAgo = new Date(); - hoursAgo.setHours(hoursAgo.getHours() - hours, 0, 0, 0); - - expect(date).toEqual(hoursAgo); - } - }; - const makeDaysAgoValidator = (days: number) => { - return (date: Date) => { - const daysAgo = new Date(); - daysAgo.setDate(daysAgo.getDate() - days); - daysAgo.setHours(0, 0, 0, 0); - - expect(date).toEqual(daysAgo); - } - }; - const nowIshValidator = (date: Date) => { - const nowIsh = new Date(); - nowIsh.setSeconds(date.getSeconds()); - nowIsh.setMilliseconds(date.getMilliseconds()); - - expect(date).toEqual(nowIsh); - }; - beforeEach(() => { fetchSpy.mockResolvedValueOnce({ status: 200, url: "http://example.com/search/results" }); }); - it("should throw error if results page is invalid", async () => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({ path: "" }) }); + it.each` + test | expectedError | html + ${"Bad markup"} | ${"Kijiji result JSON not present"} | ${"Bad markup"} + ${"Missing __NEXT_DATA__"} | ${"Kijiji result JSON not present"} | ${""} + ${"Empty __NEXT_DATA__"} | ${"Result JSON could not be parsed"} | ${createResultHTML({})} + ${"Missing props property"} | ${"Result JSON could not be parsed"} | ${createResultHTML({ abc: 123 })} + ${"Missing pageProps property"} | ${"Result JSON could not be parsed"} | ${createResultHTML({ props: {} })} + ${"Missing listings property"} | ${"Result JSON could not be parsed"} | ${createResultHTML({ props: { pageProps: {} } })} + ${"Missing URL"} | ${"Result ad could not be parsed"} | ${createResultHTML(createResultInfo([{ id: "123", title: "abc", activationDate: "2023-09-06T23:57:42.565Z", adSource: "ORGANIC" }]))} + ${"Missing ID"} | ${"Result ad could not be parsed"} | ${createResultHTML(createResultInfo([{ seoUrl: "/some-path", title: "abc", activationDate: "2023-09-06T23:57:42.565Z", adSource: "ORGANIC" }]))} + ${"Missing title"} | ${"Result ad could not be parsed"} | ${createResultHTML(createResultInfo([{ seoUrl: "/some-path", id: "123", activationDate: "2023-09-06T23:57:42.565Z", adSource: "ORGANIC" }]))} + ${"Missing date"} | ${"Result ad could not be parsed"} | ${createResultHTML(createResultInfo([{ seoUrl: "/some-path", id: "123", title: "abc", adSource: "ORGANIC" }]))} + `("should throw error if results page is invalid ($test)", async ({ expectedError, html }) => { + fetchSpy.mockResolvedValueOnce({ text: () => html }); try { await search(); @@ -287,16 +190,22 @@ describe.each` } catch (err) { expect(err).toBeInstanceOf(Error); expect((err as Error).message).toBe( - "Result ad has no URL. It is possible that Kijiji changed their " + - "markup. If you believe this to be the case, please open an issue " + - "at: https://github.com/mwpenny/kijiji-scraper/issues" + `${expectedError}. It is possible that Kijiji changed their ` + + "markup. If you believe this to be the case, please open an " + + "issue at: https://github.com/mwpenny/kijiji-scraper/issues" ); validateRequestHeaders(); } }); it("should scrape ID", async () => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({ id: "123" }) }); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML(createResultInfo([{ + seoUrl: "/some-path", + id: "123", + title: "My ad title", + activationDate: (new Date()).toISOString(), + adSource: "ORGANIC" + }]))}); const { pageResults } = await search(); validateRequestHeaders(); @@ -306,89 +215,170 @@ describe.each` }); it("should scrape title", async () => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({ title: "My title" }) }); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML(createResultInfo([{ + seoUrl: "/some-path", + id: "123", + title: "My ad title", + description: "My ad description", + activationDate: (new Date()).toISOString(), + adSource: "ORGANIC" + }]))}); const { pageResults } = await search(); validateRequestHeaders(); expect(pageResults).toEqual([expect.objectContaining({ - title: "My title" + title: "My ad title" })]); }); it.each` - test | imageAttributes | expectedValue - ${"with data-src"} | ${'data-src="/image" src="blah"'} | ${"/image"} - ${"with src"} | ${'data-src="" src="/image"'} | ${"/image"} - ${"with no attributes"} | ${""} | ${""} - ${"upsize"} | ${'src="/image/s-l123.jpg"'} | ${"/image/s-l2000.jpg"} - `("should scrape image ($test)", async ({ imageAttributes, expectedValue }) => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({ imageAttributes }) }); + test | urls | expectedURL + ${"no images"} | ${undefined} | ${""} + ${"empty images"} | ${[]} | ${""} + ${"one image"} | ${["image1"]} | ${"image1_large"} + ${"multiple images"} | ${["image1", "image2"]} | ${"image1_large"} + `("should scrape image ($test)", async ({ urls, expectedURL }) => { + const getLargeImageURLSpy = jest.spyOn(helpers, "getLargeImageURL"); + getLargeImageURLSpy.mockImplementation(url => url ? url + "_large" : url); + + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML(createResultInfo([{ + seoUrl: "/some-path", + id: "123", + title: "My ad title", + description: "My ad description", + activationDate: (new Date()).toISOString(), + imageUrls: urls, + adSource: "ORGANIC" + }]))}); const { pageResults } = await search(); validateRequestHeaders(); expect(pageResults).toEqual([expect.objectContaining({ - image: expectedValue + image: expectedURL })]); expect(pageResults[0].isScraped()).toBe(false); + + getLargeImageURLSpy.mockRestore(); }); - it.each` - test | datePosted | validator - ${"no date"} | ${""} | ${nanDataValidator} - ${"invalid"} | ${"invalid"} | ${nanDataValidator} - ${"dd/mm/yyyy"} | ${"7/9/2020"} | ${makeSpecificDateValidator(9, 7, 2020)} - ${"minutes ago"} | ${"< 5 minutes ago"} | ${makeMinutesAgoValidator(5)} - ${"hours ago"} | ${"< 2 hours ago"} | ${makeHoursAgoValidator(2)} - ${"invalid ago"} | ${"< 1 parsec ago"} | ${nowIshValidator} - ${"yesterday"} | ${"yesterday"} | ${makeDaysAgoValidator(1)} - `("should scrape date ($test)", async ({ datePosted, validator }) => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({ datePosted }) }); + it("should scrape date", async () => { + const date = new Date(); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML(createResultInfo([{ + seoUrl: "/some-path", + id: "123", + title: "My ad title", + description: "My ad description", + activationDate: date.toISOString(), + adSource: "ORGANIC" + }]))}); const { pageResults } = await search(); validateRequestHeaders(); + expect(pageResults.length).toBe(1); - validator(pageResults[0].date); - expect(pageResults[0].isScraped()).toBe(false); + + const result = pageResults[0]; + expect(result.date).toEqual(date); + expect(result.isScraped()).toBe(false); }); it("should scrape description", async () => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({ description: "My desc" }) }); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML(createResultInfo([{ + seoUrl: "/some-path", + id: "123", + title: "My ad title", + description: "My ad description", + activationDate: (new Date()).toISOString(), + adSource: "ORGANIC" + }]))}); const { pageResults } = await search(); validateRequestHeaders(); expect(pageResults).toEqual([expect.objectContaining({ - description: "My desc" + description: "My ad description" })]); expect(pageResults[0].isScraped()).toBe(false); }); it("should scrape url", async () => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({ path: "/myad" }) }); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML(createResultInfo([{ + seoUrl: "/some-path", + id: "123", + title: "My ad title", + description: "My ad description", + activationDate: (new Date()).toISOString(), + adSource: "ORGANIC" + }]))}); const { pageResults } = await search(); validateRequestHeaders(); expect(pageResults).toEqual([expect.objectContaining({ - url: "https://www.kijiji.ca/myad" + url: "https://www.kijiji.ca/some-path" })]); expect(pageResults[0].isScraped()).toBe(false); }); it("should exclude featured ads", async () => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({}) + createResultHTML({ isFeatured: true }) }); + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML(createResultInfo([ + { + seoUrl: "/some-path-1", + id: "123", + title: "My ad title", + description: "My ad description", + activationDate: (new Date()).toISOString(), + adSource: "ORGANIC" + }, + { + seoUrl: "/some-path-2", + id: "456", + title: "Non-organic ad", + description: "My ad description", + activationDate: (new Date()).toISOString(), + adSource: "MONSANTO" + } + ]))}); const { pageResults } = await search(); validateRequestHeaders(); expect(pageResults.length).toBe(1); + expect(pageResults[0].id).toBe("123"); expect(pageResults[0].isScraped()).toBe(false); }); - it("should exclude third-party ads", async () => { - fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML({}) + createResultHTML({ isThirdParty: true }) }); + it("should scrape each result ad", async () => { + fetchSpy.mockResolvedValueOnce({ text: () => createResultHTML(createResultInfo([ + { + seoUrl: "/some-path-1", + id: "1", + title: "Ad 1", + activationDate: (new Date(123)).toISOString(), + adSource: "ORGANIC" + }, + { + seoUrl: "/some-path-2", + id: "2", + title: "Ad 2", + activationDate: (new Date(123)).toISOString(), + adSource: "ORGANIC" + } + ]))}); const { pageResults } = await search(); validateRequestHeaders(); - expect(pageResults.length).toBe(1); + + expect(pageResults).toEqual([ + expect.objectContaining({ + id: "1", + title: "Ad 1" + }), + expect.objectContaining({ + id: "2", + title: "Ad 2" + }) + ]); + expect(pageResults[0].isScraped()).toBe(false); + expect(pageResults[1].isScraped()).toBe(false); }); it.each` @@ -396,9 +386,17 @@ describe.each` ${true} ${false} `("should detect last page (isLastPage=$isLastPage)", async ({ isLastPage }) => { - let mockResponse = createResultHTML({}); - if (isLastPage) { - mockResponse += '"isLastPage":true'; + let mockResponse = createResultHTML(createResultInfo([{ + seoUrl: "/some-path", + id: "123", + title: "My ad title", + description: "My ad description", + activationDate: (new Date()).toISOString(), + adSource: "ORGANIC" + }])); + + if (!isLastPage) { + mockResponse += "pagination-next-link"; } fetchSpy.mockResolvedValueOnce({ text: () => mockResponse }); diff --git a/lib/helpers.ts b/lib/helpers.ts index 3747495..8be979a 100644 --- a/lib/helpers.ts +++ b/lib/helpers.ts @@ -3,7 +3,7 @@ import cheerio from "cheerio"; -const IMG_REGEX = /\/\$_\d+\.(?:JPG|PNG)$/; +const IMG_REGEX = /\?rule=kijijica-\d+-/; /** * Kijiji scraping method @@ -42,10 +42,10 @@ export function isNumber(value: string): boolean { }; export function getLargeImageURL(url: string): string { - // Kijiji/eBay image URLs typically end with "$_dd.JPG", where "dd" is a - // number between 0 and 140 indicating the desired image size and - // quality. "57" is up to 1024x1024, the largest I've found. - return url.replace(IMG_REGEX, "/$_57.JPG"); + // Kijiji image URLs typically end with "?rule=kijijica--", + // where "" is a number indicating the width. 960px is the largest + // I've found to work. + return url.replace(IMG_REGEX, "?rule=kijijica-960-"); } export function cleanAdDescription(text: string): string { diff --git a/lib/search.ts b/lib/search.ts index 446c417..1b8fa2f 100644 --- a/lib/search.ts +++ b/lib/search.ts @@ -279,8 +279,9 @@ export function search(params: SearchParameters, options: SearchOptions & Scrape } else { await Promise.all(results.map(ad => { if (!ad.isScraped()) { - ad.scrape(); + return ad.scrape(); } + return Promise.resolve(); })); } } diff --git a/lib/test/helpers.spec.ts b/lib/test/helpers.spec.ts index 796bb8c..b3ab55a 100644 --- a/lib/test/helpers.spec.ts +++ b/lib/test/helpers.spec.ts @@ -56,10 +56,10 @@ describe("Helpers", () => { }); it.each` - test | url | expectedURL - ${"regular URL"} | ${"http://example.com"} | ${"http://example.com"} - ${"upsize JPG"} | ${"http://example.com/images/$_12.JPG"} | ${"http://example.com/images/$_57.JPG"} - ${"upsize PNG"} | ${"http://example.com/images/$_34.PNG"} | ${"http://example.com/images/$_57.JPG"} + test | url | expectedURL + ${"regular URL"} | ${"http://example.com"} | ${"http://example.com"} + ${"upsize JPG"} | ${"http://example.com/image?rule=kijijica-100-jpg"} | ${"http://example.com/image?rule=kijijica-960-jpg"} + ${"upsize WEBP"} | ${"http://example.com/image?rule=kijijica-640-webp"} | ${"http://example.com/image?rule=kijijica-960-webp"} `("getLargeImageURL should upsize image URLs ($test)", ({ url, expectedURL }) => { expect(getLargeImageURL(url)).toBe(expectedURL); }); diff --git a/package.json b/package.json index 61bcc92..abf7d86 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "kijiji-scraper", - "version": "6.3.2", + "version": "6.3.3", "description": "A scraper for Kijiji ads", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -26,7 +26,7 @@ "prepare": "npm run build", "test": "jest --config jest.config.js", "test-integration": "jest ./tests/integrationTests.spec.js", - "prepublishOnly": "npm run test" + "prepack": "npm run test" }, "repository": { "type": "git",