From bddfeb700796fd79f72b1d03b51122ad13d9c0a5 Mon Sep 17 00:00:00 2001 From: Amir Latypov Date: Wed, 12 Feb 2025 18:38:07 +0300 Subject: [PATCH] [vakifbank-tr] Additional logs --- src/plugins/vakifbank-tr/api.ts | 14 +++++++++-- src/plugins/vakifbank-tr/converters.ts | 26 ++++++++++++++++---- src/plugins/vakifbank-tr/index.ts | 10 +++++++- src/plugins/vakifbank-tr/pdfToStr.js | 33 ++++++++++++++++++++++---- 4 files changed, 71 insertions(+), 12 deletions(-) diff --git a/src/plugins/vakifbank-tr/api.ts b/src/plugins/vakifbank-tr/api.ts index 57dcdcf2f..f099d8df5 100644 --- a/src/plugins/vakifbank-tr/api.ts +++ b/src/plugins/vakifbank-tr/api.ts @@ -6,21 +6,26 @@ import { parseDateAndTimeFromPdfText, parseDateFromPdfText, parseFormattedNumber export async function parsePdfVakifStatement (): Promise> { const blob = await getPdfDocuments() + console.log('Validate documents') validateDocuments(blob) const pdfStrings = await parsePdfFromBlob({ blob }) + console.log('Start parsing pdfStrings') return parsePdfStatements(pdfStrings) } export function parsePdfStatements (pdfStrings: string[]): Array<{ account: VakifStatementAccount, transactions: VakifStatementTransaction[] }> { const result = [] - for (const textItem of pdfStrings) { + for (const [index, textItem] of pdfStrings.entries()) { + console.log(`Processing statement ${index + 1} of ${pdfStrings.length}`) + if (!isVakifBankStatement(textItem)) { throw new TemporaryError('Похоже, это не выписка VakifBank') } + try { result.push(parseSinglePdfString(textItem)) } catch (e) { - console.error(e) + console.error(`Error processing statement ${index + 1}:`, e) throw e } } @@ -51,6 +56,7 @@ async function getPdfDocuments (): Promise { export function parseSinglePdfString (text: string, statementUid?: string): { account: VakifStatementAccount, transactions: VakifStatementTransaction[] } { const balanceAmount = extractBalance(text) + const rawAccount: VakifStatementAccount = { balance: balanceAmount, id: extractAccountId(text), @@ -58,7 +64,11 @@ export function parseSinglePdfString (text: string, statementUid?: string): { ac title: 'Vakifbank *' + extractAccountId(text).slice(-4), date: extractStatementDate(text) } + console.log('Formatted account info:', rawAccount) + const rawTransactions = extractTransactions(text) + console.log(`Extracted ${rawTransactions.length} transactions`) + const parsedContent = { account: rawAccount, transactions: rawTransactions diff --git a/src/plugins/vakifbank-tr/converters.ts b/src/plugins/vakifbank-tr/converters.ts index 9bcccf924..ea6e332e2 100644 --- a/src/plugins/vakifbank-tr/converters.ts +++ b/src/plugins/vakifbank-tr/converters.ts @@ -31,16 +31,32 @@ export function parseDateAndTimeFromPdfText (dateStr: string, timeStr: string): } export function convertVakifPdfStatementTransaction (accountId: string, rawTransaction: VakifStatementTransaction[]): TransactionWithId[] { - const result: TransactionWithId[] = [] + console.log(`Converting ${rawTransaction.length} raw transactions for account ID: ${accountId}`) + const result: TransactionWithId[] = [] const chunks = chunksByStatementUid(rawTransaction) - for (const transactions of chunks) { - if (transactions.length !== 1 && transactions.length !== 2) continue + console.log(`Identified ${chunks.length} transaction chunks`) + + for (const [index, transactions] of chunks.entries()) { + console.log(`Processing chunk ${index + 1}/${chunks.length}: ${transactions.length} transactions`) - const transaction = buildTransaction(accountId, transactions) - result.push(transaction) + if (transactions.length !== 1 && transactions.length !== 2) { + console.log(`Skipping chunk ${index + 1} due to unexpected length: ${transactions.length}`) + continue + } + + try { + console.log('Start converting transaction', transactions[0].originString.slice(0, 100), '...') + const transaction = buildTransaction(accountId, transactions) + result.push(transaction) + console.log(`Successfully built transaction ${transaction.statementUid}`) + } catch (error) { + // better ignore 1 transaction rather than fail completely. + console.error(`Error processing chunk ${index + 1} for account ${accountId}:`, error) + } } + console.log(`Finished conversion: ${result.length} transactions created for account ${accountId}`) return result } diff --git a/src/plugins/vakifbank-tr/index.ts b/src/plugins/vakifbank-tr/index.ts index 1df6fa2cb..89406dc44 100644 --- a/src/plugins/vakifbank-tr/index.ts +++ b/src/plugins/vakifbank-tr/index.ts @@ -18,14 +18,22 @@ export const scrape: ScrapeFunc = async ({ fromDate, isFirstRun }) ZenMoney.setData('auth', auth) ZenMoney.saveData() const rawAccountsAndTransactions: null | Array<{ account: VakifStatementAccount, transactions: VakifStatementTransaction[] }> = await parsePdfVakifStatement() + console.log(`Parsed ${rawAccountsAndTransactions ? rawAccountsAndTransactions.length : 0} statements from PDF`) const transactions: TransactionWithId[] = [] const accounts: Account[] = [] if (rawAccountsAndTransactions !== null) { - for (const { account: rawAccount, transactions: rawTransactions } of rawAccountsAndTransactions) { + for (const [index, { account: rawAccount, transactions: rawTransactions }] of rawAccountsAndTransactions.entries()) { + console.log(`Processing account ${index + 1}: ID ${rawAccount.id}`) + const account = convertPdfStatementAccount(rawAccount) + console.log('Converted account:', account) + const currentTransactions = convertVakifPdfStatementTransaction(rawAccount.id, rawTransactions).filter(x => x.transaction.date.getTime() - fromDate.getTime() >= 0) + + console.log(`Converted to ${currentTransactions.length} transactions`) + accounts.push(account) transactions.push(...currentTransactions) } diff --git a/src/plugins/vakifbank-tr/pdfToStr.js b/src/plugins/vakifbank-tr/pdfToStr.js index 1e5ce7f24..93b90d5fa 100644 --- a/src/plugins/vakifbank-tr/pdfToStr.js +++ b/src/plugins/vakifbank-tr/pdfToStr.js @@ -1,9 +1,34 @@ import pdf from 'pdf-extraction/lib/pdf-extraction' export async function parsePdfFromBlob ({ blob }) { - const sources = await Promise.all(blob.map(async dataPart => { - const data = await pdf(await dataPart.arrayBuffer()) - return data.text - })) + const sources = await Promise.all( + blob.map(async (dataPart, index) => { + try { + console.log(`Processing data part ${index + 1} of ${blob.length}`) + + console.log(`dataPart type: ${dataPart.constructor.name}`) + console.log('dataPart content:', dataPart) + + console.log('Converting Blob to ArrayBuffer...') + const arrayBuffer = await dataPart.arrayBuffer() + console.log('ArrayBuffer successfully created:', arrayBuffer.length) + + console.log('Parsing PDF from ArrayBuffer...') + const data = await pdf(arrayBuffer) + console.log('PDF parsed successfully:', data) + + console.log('Extracting text from parsed PDF...') + const text = data.text + console.log('Extracted text:', text) + + console.log(`Successfully processed data part ${index + 1}`) + return text + } catch (error) { + console.error(`Failed to process data part ${index + 1}:`, error) + throw error + } + }) + ) + return sources }