Skip to content

Commit

Permalink
Trade Republic German language parser (#310)
Browse files Browse the repository at this point in the history
* add basic parser for german language, still needs some adjustments

* update tests, fix logic for fee records

* swap string replacement with proper culture usage

* add testdata, update tests

* add more test files and test cases

---------

Co-authored-by: mbks <mbks@mbks.mbks>
Co-authored-by: VibeNL <30174292+VibeNL@users.noreply.github.com>
  • Loading branch information
3 people authored Nov 11, 2024
1 parent f2dfa51 commit d150200
Show file tree
Hide file tree
Showing 12 changed files with 313 additions and 23 deletions.
18 changes: 18 additions & 0 deletions Parsers.UnitTests/Parsers.UnitTests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,24 @@
<None Update="TestFiles\TradeRepublic\EN\CashTransactions\single_repay_bond.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestFiles\TradeRepublic\DE\BuyOrders\single_buy_stock_fraction.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestFiles\TradeRepublic\DE\BuyOrders\single_buy_stock_full.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestFiles\TradeRepublic\DE\BuyOrders\single_buy_savingsplan_etf.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestFiles\TradeRepublic\DE\BuyOrders\single_limit_buy_stock.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestFiles\TradeRepublic\DE\SellOrders\single_sell_stock.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestFiles\TradeRepublic\DE\CashTransactions\single_dividend_stock.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="TestFiles\TradeRepublic\NL\montly_statement.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
209 changes: 209 additions & 0 deletions Parsers.UnitTests/TradeRepublic/TradeRepublicInvoiceParserDETests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
using AutoFixture;
using FluentAssertions;
using GhostfolioSidekick.Model;
using GhostfolioSidekick.Model.Accounts;
using GhostfolioSidekick.Model.Activities;
using GhostfolioSidekick.Parsers.PDFParser.PdfToWords;
using GhostfolioSidekick.Parsers.TradeRepublic;

namespace GhostfolioSidekick.Parsers.UnitTests.TradeRepublic
{
public class TradeRepublicInvoiceParserDETests
{
private readonly Account account;
private readonly TestHoldingsCollection holdingsAndAccountsCollection;

public TradeRepublicInvoiceParserDETests()
{
var fixture = new Fixture();
account = fixture
.Build<Account>()
.With(x => x.Balance, new Balance(DateTime.Now, new Money(Currency.EUR, 0)))
.Create();
holdingsAndAccountsCollection = new TestHoldingsCollection(account);
}

[Fact]
public async Task CanParseActivities_TestFiles_True()
{
// Arrange, use the real parser to test the real files
var parser = new TradeRepublicInvoiceParserDE(new PdfToWordsParser());
foreach (var file in Directory.GetFiles("./TestFiles/TradeRepublic/DE", "*.pdf", SearchOption.AllDirectories))
{
// Act
var canParse = await parser.CanParse(file);

// Assert
canParse.Should().BeTrue($"File {file} cannot be parsed");
}
}

// BuyOrders
[Fact]
public async Task ConvertActivitiesForAccount_TestFileSingleBuyStockFull_Converted()
{
// Arrange
var parser = new TradeRepublicInvoiceParserDE(new PdfToWordsParser());

// Act
await parser.ParseActivities("./TestFiles/TradeRepublic/DE/BuyOrders/single_buy_stock_full.pdf", holdingsAndAccountsCollection, account.Name);

// Assert
holdingsAndAccountsCollection.PartialActivities.Should().BeEquivalentTo(
[PartialActivity.CreateBuy(
Currency.EUR,
new DateTime(2024, 08, 01, 0, 0, 0, DateTimeKind.Utc),
[PartialSymbolIdentifier.CreateStockBondAndETF("US67066G1040")],
1m,
101.50m,
new Money(Currency.EUR, 101.50m),
"Trade_Republic_US67066G1040_2024-08-01"),
PartialActivity.CreateFee(
Currency.EUR,
new DateTime(2024, 08, 01, 0, 0, 0, DateTimeKind.Utc),
1m,
new Money(Currency.EUR, 1m),
"Trade_Republic_US67066G1040_2024-08-01")
]);
}

[Fact]
public async Task ConvertActivitiesForAccount_TestFileSingleBuyStockFraction_Converted()
{
// Arrange
var parser = new TradeRepublicInvoiceParserDE(new PdfToWordsParser());

// Act
await parser.ParseActivities("./TestFiles/TradeRepublic/DE/BuyOrders/single_buy_stock_fraction.pdf", holdingsAndAccountsCollection, account.Name);

// Assert
holdingsAndAccountsCollection.PartialActivities.Should().BeEquivalentTo(
[PartialActivity.CreateBuy(
Currency.EUR,
new DateTime(2024, 08, 01, 0, 0, 0, DateTimeKind.Utc),
[PartialSymbolIdentifier.CreateStockBondAndETF("US0079031078")],
0.410846m,
121.70m,
new Money(Currency.EUR, 50.00m),
"Trade_Republic_US0079031078_2024-08-01"),
PartialActivity.CreateFee(
Currency.EUR,
new DateTime(2024, 08, 01, 0, 0, 0, DateTimeKind.Utc),
1m,
new Money(Currency.EUR, 1m),
"Trade_Republic_US0079031078_2024-08-01")
]);
}

[Fact]
public async Task ConvertActivitiesForAccount_TestFileSingleBuySavingsplan_Converted()
{
// Arrange
var parser = new TradeRepublicInvoiceParserDE(new PdfToWordsParser());

// Act
await parser.ParseActivities("./TestFiles/TradeRepublic/DE/BuyOrders/single_buy_savingsplan_etf.pdf", holdingsAndAccountsCollection, account.Name);

// Assert
holdingsAndAccountsCollection.PartialActivities.Should().BeEquivalentTo(
[PartialActivity.CreateBuy(
Currency.EUR,
new DateTime(2024, 09, 02, 0, 0, 0, DateTimeKind.Utc),
[PartialSymbolIdentifier.CreateStockBondAndETF("IE00B52VJ196")],
0.694251m,
72.02m,
new Money(Currency.EUR, 50.00m),
"Trade_Republic_IE00B52VJ196_2024-09-02")
]);
}

[Fact]
public async Task ConvertActivitiesForAccount_TestFileSingleLimitBuyStock_Converted()
{
// Arrange
var parser = new TradeRepublicInvoiceParserDE(new PdfToWordsParser());

// Act
await parser.ParseActivities("./TestFiles/TradeRepublic/DE/BuyOrders/single_limit_buy_stock.pdf", holdingsAndAccountsCollection, account.Name);

// Assert
holdingsAndAccountsCollection.PartialActivities.Should().BeEquivalentTo(
[PartialActivity.CreateBuy(
Currency.EUR,
new DateTime(2024, 08, 02, 0, 0, 0, DateTimeKind.Utc),
[PartialSymbolIdentifier.CreateStockBondAndETF("JP3756600007")],
1m,
48.95m,
new Money(Currency.EUR, 48.95m),
"Trade_Republic_JP3756600007_2024-08-02"),
PartialActivity.CreateFee(
Currency.EUR,
new DateTime(2024, 08, 02, 0, 0, 0, DateTimeKind.Utc),
1m,
new Money(Currency.EUR, 1m),
"Trade_Republic_JP3756600007_2024-08-02")
]);
}

// SellOrders
// TODO sell-orders don't seem to be implemented in the parser yet
/*[Fact]
public async Task ConvertActivitiesForAccount_TestFileSingleSellStock_Converted()
{
// Arrange
var parser = new TradeRepublicInvoiceParserDE(new PdfToWordsParser());
// Act
await parser.ParseActivities("./TestFiles/TradeRepublic/DE/SellOrders/single_sell_stock.pdf", holdingsAndAccountsCollection, account.Name);
// Assert
holdingsAndAccountsCollection.PartialActivities.Should().BeEquivalentTo(
[PartialActivity.CreateSell(
Currency.EUR,
new DateTime(2024, 08, 13, 0, 0, 0, DateTimeKind.Utc),
[PartialSymbolIdentifier.CreateStockBondAndETF("US0079031078")],
1m,
127.88m,
new Money(Currency.EUR, 127.88m),
"Trade_Republic_US0079031078_2024-08-13"),
PartialActivity.CreateFee(
Currency.EUR,
new DateTime(2024, 08, 13, 0, 0, 0, DateTimeKind.Utc),
1m,
new Money(Currency.EUR, 1m),
"Trade_Republic_US0079031078_2024-08-13")
]);
}*/

// CashTransactions
// TODO the dividend document in the german TR strangely uses dots (.) for the amounts
// whereas all other documents use commas...
// also my example contains a currency exchange, leading to the wrong value being read
/*[Fact]
public async Task ConvertActivitiesForAccount_TestFileSingleDividend_Converted()
{
// Arrange
var parser = new TradeRepublicInvoiceParserDE(new PdfToWordsParser());
// Act
await parser.ParseActivities("./TestFiles/TradeRepublic/DE/CashTransactions/single_dividend_stock.pdf", holdingsAndAccountsCollection, account.Name);
// Assert
holdingsAndAccountsCollection.PartialActivities.Should().BeEquivalentTo(
[PartialActivity.CreateDividend(
Currency.USD,
new DateTime(2024, 10, 03, 0, 0, 0, DateTimeKind.Utc),
[PartialSymbolIdentifier.CreateStockBondAndETF("US67066G1040")],
0.02m,
new Money(Currency.USD, 0.2m),
"Trade_Republic_US67066G1040_2024-10-03"),
PartialActivity.CreateFee(
Currency.EUR,
new DateTime(2024, 10, 03, 0, 0, 0, DateTimeKind.Utc),
0.01m,
new Money(Currency.EUR, 0.01m),
"Trade_Republic_US67066G1040_2024-10-03")
]);
}*/
}
}
54 changes: 31 additions & 23 deletions Parsers/TradeRepublic/TradeRepublicInvoiceParserBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@ public abstract class TradeRepublicInvoiceParserBase : PdfBaseParser
protected abstract string Keyword_Booking { get; }
protected abstract string Keyword_Security { get; }
protected abstract string Keyword_Number { get; }
protected abstract string SECURITIES_SETTLEMENT { get; }
protected abstract string DIVIDEND { get; }
protected abstract string INTEREST_PAYMENT { get; }
protected abstract string REPAYMENT { get; }
protected abstract string ACCRUED_INTEREST { get; }
protected abstract string EXTERNAL_COST_SURCHARGE { get; }
protected abstract string WITHHOLDING_TAX { get; }
protected abstract string DATE { get; }
protected abstract CultureInfo CULTURE { get; }

private List<string> TableKeyWords
{
Expand Down Expand Up @@ -66,10 +74,10 @@ protected override bool CanParseRecords(List<SingleWordToken> words)
}

if (
IsCheckWords("SECURITIES SETTLEMENT", words, i) ||
IsCheckWords("DIVIDEND", words, i) ||
IsCheckWords("INTEREST PAYMENT", words, i) ||
IsCheckWords("REPAYMENT", words, i))
IsCheckWords(SECURITIES_SETTLEMENT, words, i) ||
IsCheckWords(DIVIDEND, words, i) ||
IsCheckWords(INTEREST_PAYMENT, words, i) ||
IsCheckWords(REPAYMENT, words, i))
{
foundSecurities = true;
}
Expand Down Expand Up @@ -166,27 +174,27 @@ protected override List<PartialActivity> ParseRecords(List<SingleWordToken> word
return activities;
}

private static int ParseFeeRecords(List<SingleWordToken> words, int i, DateTime dateTime, List<PartialActivity> activities)
private int ParseFeeRecords(List<SingleWordToken> words, int i, DateTime dateTime, List<PartialActivity> activities)
{
int skip;
if (IsCheckWords("Accrued interest", words, i))
if (IsCheckWords(ACCRUED_INTEREST, words, i))
{
skip = 2;
skip = ACCRUED_INTEREST.Split(" ").Length;
}
else if (IsCheckWords("External cost surcharge", words, i))
else if (IsCheckWords(EXTERNAL_COST_SURCHARGE, words, i))
{
skip = 3;
skip = EXTERNAL_COST_SURCHARGE.Split(" ").Length;
}
else if (IsCheckWords("Withholding tax for US issuer", words, i))
else if (IsCheckWords(WITHHOLDING_TAX, words, i))
{
skip = 5;
skip = WITHHOLDING_TAX.Split(" ").Length;
}
else
{
return i;
}

var price = Math.Abs(decimal.Parse(words[i + skip].Text, CultureInfo.InvariantCulture));
var price = Math.Abs(decimal.Parse(words[i + skip].Text, CULTURE));
var currencySymbol = words[i + skip + 1].Text;
var currency = new Currency(currencySymbol);

Expand All @@ -212,9 +220,9 @@ private int ParseSecurityRecord(List<SingleWordToken> words, int i, DateTime dat
new Currency(words[i + 4].Text),
dateTime,
[PartialSymbolIdentifier.CreateStockBondAndETF(isin)],
decimal.Parse(words[i + 1].Text, CultureInfo.InvariantCulture),
decimal.Parse(words[i + 3].Text, CultureInfo.InvariantCulture),
new Money(new Currency(words[i + 4].Text), decimal.Parse(words[i + 5].Text, CultureInfo.InvariantCulture)),
decimal.Parse(words[i + 1].Text, CULTURE),
decimal.Parse(words[i + 3].Text, CULTURE),
new Money(new Currency(words[i + 4].Text), decimal.Parse(words[i + 5].Text, CULTURE)),
id));

return i + 6;
Expand All @@ -229,9 +237,9 @@ private int ParseSecurityRecord(List<SingleWordToken> words, int i, DateTime dat
new Currency(words[i + 6].Text),
dateTime,
[PartialSymbolIdentifier.CreateStockBondAndETF(isin)],
decimal.Parse(words[i + 1].Text, CultureInfo.InvariantCulture),
decimal.Parse(words[i + 3].Text, CultureInfo.InvariantCulture) / 100,
new Money(new Currency(words[i + 6].Text), decimal.Parse(words[i + 5].Text, CultureInfo.InvariantCulture)),
decimal.Parse(words[i + 1].Text, CULTURE),
decimal.Parse(words[i + 3].Text, CULTURE) / 100,
new Money(new Currency(words[i + 6].Text), decimal.Parse(words[i + 5].Text, CULTURE)),
id));

return i + 6;
Expand All @@ -246,8 +254,8 @@ private int ParseSecurityRecord(List<SingleWordToken> words, int i, DateTime dat
new Currency(words[i + 6].Text),
dateTime,
[PartialSymbolIdentifier.CreateStockBondAndETF(isin)],
decimal.Parse(words[i + 5].Text, CultureInfo.InvariantCulture),
new Money(new Currency(words[i + 6].Text), decimal.Parse(words[i + 5].Text, CultureInfo.InvariantCulture)),
decimal.Parse(words[i + 5].Text, CULTURE),
new Money(new Currency(words[i + 6].Text), decimal.Parse(words[i + 5].Text, CULTURE)),
id));

return i + 6;
Expand All @@ -263,8 +271,8 @@ private int ParseSecurityRecord(List<SingleWordToken> words, int i, DateTime dat
new Currency(words[i + 2].Text),
dateTime,
[PartialSymbolIdentifier.CreateStockBondAndETF(isin)],
decimal.Parse(words[i + 1].Text, CultureInfo.InvariantCulture),
new Money(new Currency(words[i + 2].Text), decimal.Parse(words[i + 1].Text, CultureInfo.InvariantCulture)),
decimal.Parse(words[i + 1].Text, CULTURE),
new Money(new Currency(words[i + 2].Text), decimal.Parse(words[i + 1].Text, CULTURE)),
id));

return i + 2;
Expand Down
35 changes: 35 additions & 0 deletions Parsers/TradeRepublic/TradeRepublicInvoiceParserDE.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using GhostfolioSidekick.Parsers.PDFParser.PdfToWords;
using System.Globalization;

namespace GhostfolioSidekick.Parsers.TradeRepublic
{
public class TradeRepublicInvoiceParserDE : TradeRepublicInvoiceParserBase
{
// DE
protected override string Keyword_Position => "POSITION";
protected override string Keyword_Quantity => "ANZAHL";
protected override string Keyword_Price => "PREIS";
protected override string Keyword_Amount => "BETRAG";
protected override string Keyword_Nominal => string.Empty;
protected override string Keyword_Income => "ERTRAG";
protected override string Keyword_Coupon => string.Empty;
protected override string Keyword_Total => "GESAMT";
protected override string Keyword_AverageRate => "DURCHSCHNITTSKURS";
protected override string Keyword_Booking => "BUCHUNG";
protected override string Keyword_Security => string.Empty;
protected override string Keyword_Number => string.Empty;
protected override string SECURITIES_SETTLEMENT => "WERTPAPIERABRECHNUNG";
protected override string DIVIDEND => "DIVIDENDE";
protected override string INTEREST_PAYMENT => string.Empty;
protected override string REPAYMENT => string.Empty;
protected override string ACCRUED_INTEREST => string.Empty;
protected override string EXTERNAL_COST_SURCHARGE => "Fremdkostenzuschlag";
protected override string WITHHOLDING_TAX => "Kapitalertragssteuer";
protected override string DATE => "DATUM";
protected override CultureInfo CULTURE => new CultureInfo("de");

public TradeRepublicInvoiceParserDE(IPdfToWordsParser parsePDfToWords) : base(parsePDfToWords)
{
}
}
}
Loading

0 comments on commit d150200

Please sign in to comment.