Skip to content
This repository has been archived by the owner on May 1, 2023. It is now read-only.

Commit

Permalink
Some support for non admin users
Browse files Browse the repository at this point in the history
Signed-off-by: Joshua Castle <26531652+Kas-tle@users.noreply.github.com>
  • Loading branch information
Kas-tle committed Apr 26, 2023
1 parent d61b7b8 commit 3faad9d
Show file tree
Hide file tree
Showing 13 changed files with 304 additions and 96 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ dist/
# Output
target/
config.json
config2.json
config3.json
credentials.json
package-lock.json
*.sqlite
Expand Down
32 changes: 23 additions & 9 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import packageJson from './package.json';
import { getConfig } from './src/util/config';
import { deleteFiles, ensureDirectory } from './src/util/files';
import { databaseConnection, initializeTables, insertRow, isModuleScraped, queryModuleIDs } from './src/util/database';
import { authenticateAPI, authenticateSite, getSiteID } from './src/scrapers/authenticate';
import { authenticateAPI, authenticateSite, getSiteID, isSiteAdmin } from './src/scrapers/authenticate';
import { getForums } from './src/scrapers/forums';
import { getNews } from './src/scrapers/news';
import { getAllTickets } from './src/scrapers/tickets';
Expand Down Expand Up @@ -36,6 +36,15 @@ async function main(): Promise<void> {
// Login to site and get PHPSESSID and csrf_token
const siteAuth = config.siteAuth ? config.siteAuth : await authenticateSite(config.domain, config.email, config.password);

// Check if we are admin
let adminMode: boolean = true;
if (siteAuth !== null && config.adminMode !== false) {
adminMode = await isSiteAdmin(config.domain, siteAuth);
} else {
adminMode = false;
}
statusMessage(MessageType.Info, `Admin Mode: ${adminMode}`);

// Get site ID
const siteID = await getSiteID(config.domain);
statusMessage(MessageType.Info, `Site ID: ${siteID}`);
Expand All @@ -48,15 +57,15 @@ async function main(): Promise<void> {
await initializeTables(database);

// Notifier Mode
if (config.notifier && config.notifier.enabled === true) {
if (siteAuth != null && config.notifier && config.notifier.enabled === true && config.apiKey) {
await startNotifier(database, config.domain, config.apiKey, siteAuth, config.notifier.messageSubject, config.notifier.messageBody);
deleteFiles(['./target/recovery/notifier_progress.json']);
}

// Get site data
if (await isModuleScraped(database, 'site_data')) {
statusMessage(MessageType.Critical, 'Site data already scraped, moving on...');
} else {
} else if (siteAuth != null && adminMode) {
await getSiteData(config.domain, siteAuth, database, siteID);
await insertRow(database, 'scrapers', 'site_data', true);
}
Expand All @@ -70,17 +79,20 @@ async function main(): Promise<void> {
statusMessage(MessageType.Critical, 'HTML module disabled, skipping forum scraping...');
} else if (await isModuleScraped(database, 'html')) {
statusMessage(MessageType.Critical, 'HTML already scraped, skipping forum scraping...');
} else {
} else if (siteAuth != null && adminMode) {
statusMessage(MessageType.Info, 'Scraping HTML modules...');
await getHTMLModules(config.domain, siteAuth, database, htmlModuleIDs);
await insertRow(database, 'scrapers', 'html', true);
deleteFiles(['./target/recovery/html_progress.json']);
statusMessage(MessageType.Completion, 'Finished HTML module scraping');
} else {
statusMessage(MessageType.Info, 'Cannot scrape HTML modules without admin credentials, skipping...');
}

// Get forums
let forumModuleIDs = await queryModuleIDs(database, 'forum');
config.excludeForumModuleIDs ? forumModuleIDs.filter(id => !config.excludeForumModuleIDs?.includes(id)) : {};
config.manualForumModuleIDs && config.manualForumModuleIDs.length > 0 ? forumModuleIDs.push(...config.manualForumModuleIDs) : {};
if (forumModuleIDs.length === 0) {
statusMessage(MessageType.Critical, 'No forum module IDs for site, skipping forum scraping...');
} else if (config.disabledModules?.forums) {
Expand Down Expand Up @@ -110,6 +122,7 @@ async function main(): Promise<void> {
// Get wikis
let wikiModuleIDs = await queryModuleIDs(database, 'wiki');
config.excludedWikiModuleIDs ? wikiModuleIDs.filter(id => !config.excludedWikiModuleIDs?.includes(id)) : {};
config.manualWikiModuleIDs && config.manualWikiModuleIDs.length > 0 ? wikiModuleIDs.push(...config.manualWikiModuleIDs) : {};
if (wikiModuleIDs.length === 0) {
statusMessage(MessageType.Critical, 'No wiki module IDs for site, skipping wiki scraping...');
} else if (config.disabledModules?.wikis) {
Expand All @@ -127,6 +140,7 @@ async function main(): Promise<void> {
// Get news
let newsModuleIDs = await queryModuleIDs(database, 'news');
config.excludeNewsModuleIDs ? newsModuleIDs.filter(id => !config.excludeNewsModuleIDs?.includes(id)) : {};
config.manualNewsModuleIDs && config.manualNewsModuleIDs.length > 0 ? newsModuleIDs.push(...config.manualNewsModuleIDs) : {};
if (newsModuleIDs.length === 0) {
statusMessage(MessageType.Critical, 'No news module IDs for site, skipping news scraping...');
} else if (config.disabledModules?.news) {
Expand All @@ -149,7 +163,7 @@ async function main(): Promise<void> {
statusMessage(MessageType.Info, 'Scraping application responses...');
await getApplicationResponses(database, config.domain, sessionID, siteAuth, siteID);
statusMessage(MessageType.Info, 'Scraping applications...');
await getApplications(database, config.domain, siteAuth);
siteAuth ? await getApplications(database, config.domain, siteAuth) : {};
await insertRow(database, 'scrapers', 'applications', true);
deleteFiles(['./target/recovery/remaining_applications.json', './target/recovery/application_ids.json']);
statusMessage(MessageType.Completion, 'Finished application scraping');
Expand All @@ -162,7 +176,7 @@ async function main(): Promise<void> {
statusMessage(MessageType.Critical, 'Comments already scraped, skipping comment scraping...');
} else {
statusMessage(MessageType.Info, 'Scraping comments...');
await getComments(database, config.domain, siteAuth)
siteAuth ? await getComments(database, config.domain, siteAuth) : {};
await insertRow(database, 'scrapers', 'comments', true);
deleteFiles(['./target/recovery/comments.json']);
statusMessage(MessageType.Completion, 'Finished comment scraping');
Expand All @@ -175,7 +189,7 @@ async function main(): Promise<void> {
statusMessage(MessageType.Critical, 'Tickets already scraped, skipping ticket scraping...');
} else {
statusMessage(MessageType.Info, 'Scraping tickets...');
await getAllTickets(database, config.domain, config.apiKey, sessionID, siteAuth, config.excludeTicketModuleIDs ?? null);
await getAllTickets(database, config.domain, config.apiKey, sessionID, siteAuth, adminMode, config.excludeTicketModuleIDs ?? null, config.manualTicketModuleIDs ?? null);
await insertRow(database, 'scrapers', 'tickets', true);
deleteFiles(['./target/recovery/module_tickets.json']);
statusMessage(MessageType.Completion, 'Finished ticket scraping');
Expand All @@ -190,7 +204,7 @@ async function main(): Promise<void> {
statusMessage(MessageType.Info, 'Scraping users...');
await isModuleScraped(database, 'users') ? {} : await getUsers(database, config.domain, config.apiKey, config.disabledModules.users);
await insertRow(database, 'scrapers', 'users', true);
await isModuleScraped(database, 'user_data') ? {} : await getAdditionalUserData(config.domain, sessionID, siteAuth, database, config.disabledModules.users);
await isModuleScraped(database, 'user_data') ? {} : await getAdditionalUserData(config.domain, sessionID, siteAuth, database, config.disabledModules.users, adminMode);
await insertRow(database, 'scrapers', 'user_data', true);
deleteFiles(['./target/recovery/user_tags.json', './target/recovery/user_data.json']);
statusMessage(MessageType.Completion, 'Finished user scraping');
Expand All @@ -212,7 +226,7 @@ async function main(): Promise<void> {
statusMessage(MessageType.Info, 'Scraping files...');
const disabledFileModules = config.disabledModules?.files;
if (!await isModuleScraped(database, 's3_files') && ((typeof disabledFileModules === 'object') ? !(disabledFileModules.s3) : true)) {
await getS3Files(config.domain, database, siteAuth, siteID);
await getS3Files(config.domain, database, siteAuth, siteID, adminMode);
await insertRow(database, 'scrapers', 's3_files', true);
}
if (!await isModuleScraped(database, 'wiki_files') && ((typeof disabledFileModules === 'object') ? !(disabledFileModules.wiki) : true)) {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "enjinscraper",
"version": "1.5.4",
"version": "1.6.0",
"description": "Scrapes an Enjin site via the Enjin API",
"repository": "https://github.com/Kas-tle/EnjinScraper.git",
"author": "Joshua Castle <packages@kastle.dev",
Expand Down
2 changes: 1 addition & 1 deletion src/interfaces/applications.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export namespace Applications {
}
}

interface UserData {
export interface UserData {
[key: string]: string | number | boolean | string[] | number[] | boolean[]
}

Expand Down
4 changes: 2 additions & 2 deletions src/scrapers/applications.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ export async function getApplications(database: Database, domain: string, siteAu
}
}

export async function getApplicationResponses(database: Database, domain: string, sessionID: string, siteAuth: SiteAuth, siteID: string) {
export async function getApplicationResponses(database: Database, domain: string, sessionID: string, siteAuth: SiteAuth | null, siteID: string) {
const applicationTypes = await getApplicationTypes(domain);
statusMessage(MessageType.Info, `Found ${applicationTypes.length} application types: ${applicationTypes.join(', ')}`);

Expand Down Expand Up @@ -329,7 +329,7 @@ export async function getApplicationResponses(database: Database, domain: string
null
]

if (result.comments > 0) {
if (result.comments > 0 && siteAuth !== null) {
const commentCid = await getApplicationCommentsCid(domain, siteAuth, result.application_id);
values[values.length - 2] = commentCid.comments_cid;
values[values.length - 1] = commentCid.admin_comments_cid;
Expand Down
96 changes: 70 additions & 26 deletions src/scrapers/authenticate.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import fs from 'fs';
import path from 'path';
import * as readline from 'readline';
import * as cheerio from 'cheerio';
import { Site } from "../interfaces/site";
import { User } from "../interfaces/user";
import { enjinRequest, getRequest, postRequest } from '../util/request';
import { SiteAuth } from '../interfaces/generic';
import { MessageType, statusMessage } from '../util/console';
import { getErrorMessage } from '../util/error';

export async function authenticateAPI(domain: string, email: string, password: string): Promise<string> {
const params = {
Expand All @@ -27,40 +29,72 @@ export async function authenticateAPI(domain: string, email: string, password: s
return data.result.session_id;
}

export async function authenticateSite(domain: string, email: string, password: string): Promise<SiteAuth> {
const loginResponse = await getRequest(domain, '/login', {}, '/authenticateSite/loginResponse');
const setCookie = loginResponse.headers['set-cookie'];
const cf_bm_token = setCookie!.find((cookie: string) => cookie.includes('__cf_bm'))!.split(';')[0];
const lastviewed = setCookie!.find((cookie: string) => cookie.includes('lastviewed'))!.split(';')[0];

const $ = cheerio.load(loginResponse.data);
const formName = $('div.input input[type="password"]').attr('name');

const formData = new URLSearchParams({
m: '0',
do: '',
username: email,
[formName!]: password
function booleanPromptUser(question: string): Promise<void> {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise(resolve => {
rl.question(`${question} (y/n) `, (answer) => {
if (answer.toLowerCase() === 'y') {
statusMessage(MessageType.Plain, 'Continuing...')
resolve();
} else if (answer.toLowerCase() === 'n') {
statusMessage(MessageType.Plain, 'Exiting...')
rl.close();
process.exit(0);
} else {
statusMessage(MessageType.Plain, 'Invalid input. Please enter y or n.');
booleanPromptUser(question).then(resolve);
}
});
});
}

const postLoginResponse = await postRequest(domain, '/login', formData, {
Cookie: `${lastviewed}; enjin_browsertype=web; ${cf_bm_token}`,
}, '/authenticateSite');
export async function authenticateSite(domain: string, email: string, password: string): Promise<SiteAuth | null> {
try {
const loginResponse = await getRequest(domain, '/login', {}, '/authenticateSite/loginResponse');
const setCookie = loginResponse.headers['set-cookie'];
const cf_bm_token = setCookie!.find((cookie: string) => cookie.includes('__cf_bm'))!.split(';')[0];
const lastviewed = setCookie!.find((cookie: string) => cookie.includes('lastviewed'))!.split(';')[0];

const phpSessID = postLoginResponse.headers['set-cookie']!.find((cookie: string) => cookie.includes('PHPSESSID'))!.split(';')[0];
const $ = cheerio.load(loginResponse.data);
const formName = $('div.input input[type="password"]').attr('name');

const homeResponse = await getRequest(domain, '/', {
Cookie: `${lastviewed}; ${phpSessID}; enjin_browsertype=web; ${cf_bm_token}; login_temp=1`,
}, '/authenticateSite/homeResponse');
const formData = new URLSearchParams({
m: '0',
do: '',
username: email,
[formName!]: password
});

const csrfToken = homeResponse.headers['set-cookie']!.find((cookie: string) => cookie.includes('csrf_token'))!.split(';')[0];
const postLoginResponse = await postRequest(domain, '/login', formData, {
Cookie: `${lastviewed}; enjin_browsertype=web; ${cf_bm_token}`,
}, '/authenticateSite');

const config = JSON.parse(fs.readFileSync(path.join(process.cwd(), './config.json')).toString());
config.siteAuth = { phpSessID, csrfToken };
fs.writeFileSync(path.join(process.cwd(), './config.json'), JSON.stringify(config, null, 4));
const phpSessID = postLoginResponse.headers['set-cookie']!.find((cookie: string) => cookie.includes('PHPSESSID'))!.split(';')[0];

const homeResponse = await getRequest(domain, '/', {
Cookie: `${lastviewed}; ${phpSessID}; enjin_browsertype=web; ${cf_bm_token}; login_temp=1`,
}, '/authenticateSite/homeResponse');

const csrfToken = homeResponse.headers['set-cookie']!.find((cookie: string) => cookie.includes('csrf_token'))!.split(';')[0];

statusMessage(MessageType.Completion, `Authenticated with PHPSESSID and CSRF token`);
return { phpSessID, csrfToken };
const config = JSON.parse(fs.readFileSync(path.join(process.cwd(), './config.json')).toString());
config.siteAuth = { phpSessID, csrfToken };
fs.writeFileSync(path.join(process.cwd(), './config.json'), JSON.stringify(config, null, 4));

statusMessage(MessageType.Completion, `Authenticated with PHPSESSID and CSRF token`);
return { phpSessID, csrfToken };
} catch (error) {
statusMessage(MessageType.Error, `Error authenticating: ${getErrorMessage(error)}`);
statusMessage(MessageType.Info, 'This seriously limit the info we can scrape from the site.');
statusMessage(MessageType.Info, 'If you have 2FA enabled, you should disable it and try again later.');
await booleanPromptUser('Do you still want to continue?')

return null;
}
}

export async function getSiteID(domain: string): Promise<string> {
Expand All @@ -73,4 +107,14 @@ export async function getSiteID(domain: string): Promise<string> {
const { result } = data;

return result.latest_user.site_id;
}

export async function isSiteAdmin(domain: string, siteAuth: SiteAuth): Promise<boolean> {
const adminResponse = await getRequest(domain, '/admin', {
Cookie: `${siteAuth.phpSessID}; enjin_browsertype=web; ${siteAuth.csrfToken}`,
}, '/isSiteAdmin');

const $ = cheerio.load(adminResponse.data);

return !($('.header_text_text').text() === 'Error');
}
9 changes: 9 additions & 0 deletions src/scrapers/comments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ export async function getComments(database: Database, domain: string, siteAuth:
const commentResponse = await getRequest(domain, `/ajax.php?s=comments&op=load&start=0&comment_cid=${commentCids[i]}&pageSize=-1&subPageSize=-1`, {
Cookie: `${siteAuth.phpSessID}; ${siteAuth.csrfToken}`,
}, '/getComments');

if (commentResponse.data.error) {
statusMessage(MessageType.Error, `Error getting comments for comment cid ${commentCids[i]}: ${commentResponse.data.error.code} ${commentResponse.data.error.message}`);
if (commentResponse.data.error === "The action you requested has expired, please reload the page and try again.") {
statusMessage(MessageType.Critical, 'Invalid siteAuth; please delete the siteAuth object from the config.json file and try again');
process.kill(process.pid, 'SIGINT');
}
}

const response: CommentResponse = commentResponse.data;

const commentsDB: CommentsDB[] = [];
Expand Down
Loading

0 comments on commit 3faad9d

Please sign in to comment.