Skip to content

Commit

Permalink
Revert encoding fix #482 (#495)
Browse files Browse the repository at this point in the history
  • Loading branch information
s0ph1e authored Jun 11, 2022
1 parent c6f60b8 commit 3bd2555
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 25 deletions.
3 changes: 2 additions & 1 deletion lib/config/defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ const config = {
],
request: {
throwHttpErrors: false,
responseType: 'buffer',
encoding: 'binary',
//cookieJar: true,
decompress: true,
headers: {
'user-agent': defaultRequestUserAgent
Expand Down
3 changes: 1 addition & 2 deletions lib/plugins/save-resource-to-fs-plugin.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ class SaveResourceToFileSystemPlugin {
registerAction('saveResource', async ({resource}) => {
const filename = path.join(absoluteDirectoryPath, resource.getFilename());
const text = resource.getText();
const encoding = typeof text === 'string' ? 'utf-8' : 'binary';
await fs.outputFile(filename, text, { encoding });
await fs.outputFile(filename, text, { encoding: 'binary' });
loadedResources.push(resource);
});

Expand Down
23 changes: 6 additions & 17 deletions lib/request.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
import got from 'got';
import logger from './logger.js';
import types from './config/resource-types.js';
import { extend, isPlainObject, getTypeByMime } from './utils/index.js';

const TEXT_RESOURCE_TYPES = [types.html, types.css];
import { extend, isPlainObject } from './utils/index.js';

function getMimeType (contentType) {
return contentType ? contentType.split(';')[0] : null;
}

function defaultResponseHandler ({response, type}) {
if (TEXT_RESOURCE_TYPES.includes(type)) {
return response.body.toString();
}
return response.body;
function defaultResponseHandler ({response}) {
return Promise.resolve(response.body);
}

function transformResult (result) {
switch (true) {
case typeof result === 'string' || Buffer.isBuffer(result):
case typeof result === 'string':
return {
body: result,
metadata: null
Expand Down Expand Up @@ -47,19 +41,14 @@ async function getRequest ({url, referer, options = {}, afterResponse = defaultR

const response = await got(requestOptions);
logger.debug(`[request] received response for ${response.url}, statusCode ${response.statusCode}`);

const mimeType = getMimeType(response.headers['content-type']);
const resourceType = getTypeByMime(mimeType);

const responseHandlerResult = transformResult(await afterResponse({ response, type: resourceType }));
const responseHandlerResult = transformResult(await afterResponse({response}));

if (!responseHandlerResult) {
return null;
}
return {
url: response.url,
type: resourceType,
mimeType,
mimeType: getMimeType(response.headers['content-type']),
body: responseHandlerResult.body,
metadata: responseHandlerResult.metadata
};
Expand Down
4 changes: 2 additions & 2 deletions lib/scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
} from './plugins/index.js';

import * as utils from './utils/index.js';
const { extend, union, urlsEqual, getTypeByFilename, series } = utils;
const { extend, union, urlsEqual, getTypeByMime, getTypeByFilename, series } = utils;
import NormalizedUrlMap from './utils/normalized-url-map.js';

const actionNames = [
Expand Down Expand Up @@ -170,7 +170,7 @@ class Scraper {
self.requestedResourcePromises.set(responseData.url, requestPromise);
}

resource.setType(responseData.type);
resource.setType(getTypeByMime(responseData.mimeType));

const { filename } = await self.runActions('generateFilename', { resource, responseData });
resource.setFilename(filename);
Expand Down
3 changes: 2 additions & 1 deletion test/functional/encoding/hieroglyphs.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import scrape from 'website-scraper';
const testDirname = './test/functional/encoding/.tmp';
const mockDirname = './test/functional/encoding/mocks';

describe('Functional: Korean characters are properly encoded/decoded', function() {
// TODO: enable test when encoding issue is fixed
xdescribe('Functional: Korean characters are properly encoded/decoded', function() {
const options = {
urls: [
'http://example.com/',
Expand Down
4 changes: 2 additions & 2 deletions test/unit/scraper-init-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ describe('Scraper initialization', function () {

s.options.request.should.containEql({
throwHttpErrors: false,
responseType: 'buffer',
encoding: 'binary',
decompress: true,
https: {
rejectUnauthorized: false
Expand All @@ -143,7 +143,7 @@ describe('Scraper initialization', function () {

s.options.request.should.eql({
throwHttpErrors: true,
responseType: 'buffer',
encoding: 'binary',
decompress: true,
https: {
rejectUnauthorized: false
Expand Down

0 comments on commit 3bd2555

Please sign in to comment.