From def6b7d78e945780f29f9bad16bb88db073f8a3a Mon Sep 17 00:00:00 2001 From: Sinthoras39 Date: Sun, 7 Jan 2024 15:38:38 +0100 Subject: [PATCH 1/4] Add PDF import support for books I took the PR for the epub support as template. I used PyPDf as library, the license should be fine: https://pypi.org/project/PyPDF2/ --- lute/book/forms.py | 4 ++-- lute/book/routes.py | 4 +++- lute/book/service.py | 16 ++++++++++++++++ lute/templates/book/create_new.html | 2 +- requirements.txt | 7 +++++-- tests/acceptance/book.feature | 12 ++++++++++++ tests/acceptance/sample_files/Hola.pdf | Bin 0 -> 22442 bytes tests/acceptance/sample_files/invalid.pdf | Bin 0 -> 16 bytes 8 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 tests/acceptance/sample_files/Hola.pdf create mode 100644 tests/acceptance/sample_files/invalid.pdf diff --git a/lute/book/forms.py b/lute/book/forms.py index f6f9e6fe..ceb137e6 100644 --- a/lute/book/forms.py +++ b/lute/book/forms.py @@ -27,8 +27,8 @@ class NewBookForm(FlaskForm): "Text file", validators=[ FileAllowed( - ["txt", "epub"], - "Please upload a valid .txt or .epub file.", + ["txt", "epub", "pdf"], + "Please upload a valid '.txt', '.epub' or '.pdf' file.", ) ], ) diff --git a/lute/book/routes.py b/lute/book/routes.py index e467681a..5586a326 100644 --- a/lute/book/routes.py +++ b/lute/book/routes.py @@ -60,8 +60,10 @@ def _get_file_content(filefielddata): ext = (ext or "").lower() if ext == ".txt": return service.get_textfile_content(filefielddata) - if ext == ".epub": + elif ext == ".epub": return service.get_epub_content(filefielddata) + elif ext == ".pdf": + return service.get_pdf_content_from_form(filefielddata) raise ValueError(f'Unknown file extension "{ext}"') diff --git a/lute/book/service.py b/lute/book/service.py index 7be21c70..e9e0dff0 100644 --- a/lute/book/service.py +++ b/lute/book/service.py @@ -13,6 +13,7 @@ from openepub import Epub, EpubError from werkzeug.utils import secure_filename from lute.book.model import Book +from pypdf import PdfReader class BookImportException(Exception): @@ -82,6 +83,21 @@ def get_epub_content(epub_file_field_data): return content +def get_pdf_content_from_form(pdf_file_field_data): + "Get content as a single string from a PDF file using PyPDF2." + content = "" + try: + pdf_reader = PdfReader(pdf_file_field_data) + + for page in pdf_reader.pages: + content += page.extract_text() + + return content + except Exception as e: + msg = f"Could not parse {pdf_file_field_data.filename} (error: {str(e)})" + raise BookImportException(message=msg, cause=e) from e + + def book_from_url(url): "Parse the url and load a new Book." s = None diff --git a/lute/templates/book/create_new.html b/lute/templates/book/create_new.html index 8ceca8f1..6022f47a 100644 --- a/lute/templates/book/create_new.html +++ b/lute/templates/book/create_new.html @@ -33,7 +33,7 @@ - {{ form.textfile.label }} (.txt, .epub) + {{ form.textfile.label }} (.txt, .epub, .pdf) {{ form.textfile() }} diff --git a/requirements.txt b/requirements.txt index 5a15205b..059fe709 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -astroid==2.15.6 +astroid==2.15.6 attrs==23.1.0 beautifulsoup4==4.12.2 black==23.10.1 @@ -8,6 +8,7 @@ cffi==1.16.0 cfgv==3.4.0 charset-normalizer==3.3.1 click==8.1.7 +colorama==0.4.6 coverage==7.3.1 dill==0.3.7 distlib==0.3.7 @@ -37,6 +38,7 @@ mccabe==0.7.0 mypy-extensions==1.0.0 natto-py==1.0.1 nodeenv==1.8.0 +openepub==0.0.6 outcome==1.3.0.post0 packaging==23.1 parse==1.19.1 @@ -50,6 +52,7 @@ pre-commit==3.5.0 pycparser==2.21 pyee==11.0.1 pylint==2.17.5 +pypdf==3.17.4 PySocks==1.7.1 pytest==7.4.2 pytest-base-url==2.0.0 @@ -81,5 +84,5 @@ Werkzeug==2.3.7 wrapt==1.15.0 wsproto==1.2.0 WTForms==3.0.1 +xmltodict==0.13.0 zipp==3.17.0 -openepub==0.0.6 diff --git a/tests/acceptance/book.feature b/tests/acceptance/book.feature index 1ccc5c0b..1e7f55e1 100644 --- a/tests/acceptance/book.feature +++ b/tests/acceptance/book.feature @@ -43,6 +43,18 @@ Feature: Books and stats are available Given a Spanish book "Hola" from file invalid.epub Then the page contains "Could not parse invalid.epub" + Scenario: I can import a PDF file. + Given I visit "/" + Given a Spanish book "Hola" from file Hola.pdf + Then the page title is Reading "Hola" + And the reading pane shows: + Tengo/ /un/ /amigo/. + + Scenario: Invalid PDF files are rejected. + Given I visit "/" + Given a Spanish book "Hola" from file invalid.pdf + Then the page contains "Could not parse invalid.pdf" + Scenario: Books and stats are shown on the first page. Given I visit "/" Given a Spanish book "Hola" with content: diff --git a/tests/acceptance/sample_files/Hola.pdf b/tests/acceptance/sample_files/Hola.pdf new file mode 100644 index 0000000000000000000000000000000000000000..aad41d9c58e85928d906e75aaff279d8dbfacb31 GIT binary patch literal 22442 zcmeIa1yt1A_cyGRgoGlEFf>wAbV_%3=g{3HD2=3)lpshbjUXM;9nvY%A>ESi;Pra_ z-S|KES?_w@^{jXOhFLS;?`H3P_SyUFJ#fx^HjRRa7&DlK6`f{babOr73;+S_j4aW4 zd4Ym1&gOQG^s2U2wsvl|kAT9CFhgexJ6jP$XBfQ*Hv|M>1%W{vU{(+-l!FljqJuww zaKXil4XrJV9AN-<76^m|^avkPAXb^<_erlACsg_&3w3fZ}9!|y-#l@)$mve+m0Khj4N`zaBGBqdlUZcO;Q)AA#Y>}Grkd)fEk*=9L4PH zoMDbo5CH!Ct{bjh+1b&>*!ef3oB*Id=6f^zkGKfQ0_E%+Z49k}#)bg!A1pWa{9yU+ zk{e5bLWWMT?-zpKAB-v%HZUhSn46NFjiIfAvaAYF1mW!Xya3MMM)?^tuABS+yF&Jtfc)a^n@RjS!|#&=3fVcrBm3KW1L@xQ)){CB z28sb#;cLs-89sLo76==J3(N}O0CTc{S;0^MCxnF!#Ks8$12~}UEMPV;6s-Hx(J~gc zR)2)}#%KR-P!TQ>C5VhL%-U1R(b-K(-NJ#(#a{654FQ2zI61)_;9o6)8^_57=79W@ zB{$LfVTqW9HGCZd#jN4EA_BWv4nP^0t(mhq0K&=%x82DZUN~&f-BSyrWvyC3cY}7$ z@VK!TB-9qLQJ+;vdu|w}7_KT!HET#uyOg90@Mx}0)F_%W-;HrUI*pIVpL>`^m~^IA z;}>tX^N@wT7W1hNGYn4997H_Hwt3}nX2c-$j$T@NdM;&ZV1 z@t_S`uWBp3~N0VgCErY6HIKAW%b73&6n+Us4cOE;jfT*Kef`0AG%VU~Rxn z;Z*uF6C1oTg2C+Y<)!<}kbkbR!iLU<)^=v!S2f(i-)pS1p$YsK00rIU;n&bzz{YzjvtE$41jQOg8pOuY#4A$)T1~S>Nphl$NDB_f@tN0{8Ri1apl<$ z+#d^}F=toFT73Bss4tzFE_4^sdHT&|VkIxapYcwS z*MXX~)v>gO&*{c9t6=JMZzKRD+yiEA-!o=%OU=+nV8Bj~&-v{}QL&K^>BnO>oB*6j z4DM)`xe-<4u%-w2$1TJg)Z9`>esUviGl6m>_wf<|E2=GOI+YS*w3@xHGw`Yk_u>`nh$ zsWkDsOyR;Lyhss)$Zjnz&*iJO2%MDkXHN)4(=YL1UgoCuB7TZvk? z#@F?rq|YKWz6!=jw#VkiGzOACM^ZDE*7uvXZ#4$VnW4(oTviz&+Y{+vA!?k7y1&YL zAtfk{PrFLJBfQbFdrx&e3K^{)Jh(P?6$M2L4VZ_vEi5_ZkFz4cZJ zW|4UH1y<4LmR(L#s`#uJ(;*q1LK*rXGNx(7J0;h`_A%wL^h9(6sCS(4VqcheLa4rJ zd%vU^45OO4GhjfzDuKLjfGyNeqS}jFxslgD7XZ~ko(qcIg_3qmS8bEG=u4mlxvfPw zn>kwSzgY6(#3!l);o>(#2m`k4F}(@qHp+HMFG9VgFgw4!Kc$*IswwNLwYR}Az>2KL zttK#@uH9pf5JYIn=ajyqXpax=S~j_-DSaI6CE-^mr26W|;VC8!}RgNe207F=c!V_szPFmzb;Ykq$YFgy;i_-wJ=z^dBm}-N6xH2;F>| zVt}+Q;!%xp++xhRlRNKmN>hhSzCdzop#@yr@wr&+-L&X-t3A0#DzC7QNE+Qvw7kaa zZ-mdynIi^ir=-S*bP2a}=+^TOpHPTvJlcE^1fBts;^`3xK4{A-LT}-nwViD_Bgm$- zNGlieSuYDcpjmyhI+qsEwb`{Cdm_UR{WiT6v*f~g1&C|9OS)dj*?q2nf^FdbfM$>W zf>6&-f3MU>g#J@TYi%fbcE*D_t$<#d127W+1AJ(pA(dcA2w?2wzRS}E`gXrMX-23) zsG+}w!`}8zr@)IhglT{d@I_22Wg`W7bQC>07*8SDzdSl&*fZ4v_mwoyI^|ufbwChB zO_FT~$03MMBC8P<}+hYjVXiH)jC8oJzkOcAsrhgraF7p%m44`WEG!)(!%!t1dC8U;K8>ZIZ z-5?HNv%f>#WqkP1GxAEU7chJ1mL+k);NgdbE~_%Y{pnJu7sqa_9{Z`6G-Ca;@L_T^ z4h#-D-&>e9=UtCxFAvRLhvZrirwSD|#D5Np_mLVOo*iTyTjLKrs@lHJs>L7oVy};g zLoE(DY4P$)ze-A_Ie9(GDLB=7t3GBdwqtN1jm`rk$liqWjoW-lavxvv7DW^)b}W`< zdnH~jzEbe?7qljEy?fy}Fk@rloRX`7Wjc~RWHu$t=k1R;<6r$4P5Z5&Q z+Rq2kl{&bKq5K?U&__ukI{$TQ;-Hr4!i4LB&wS=LbBy!S@y6GezPAf~c)lU(2tODp zaG<}h+4ax}Lo0`HYHpTLO^|%00n=?gYR%{ND{9=wDzrP|k9={9TKQIWHw8$VIU8WR zPc@#uQ5%HVH51RJr3RnB38M0Tp-^?ZgO}u}PC7L>;UsNIm9yojO5Ej3+9_{={AUc= z(`|ejJGt$kt0lxPMgwLhIex!mH!sJ#mh)=5aV($S8DUOMtadq7kzX=SM!0&B)jd0= zmzdeT9pu}-H!#MBMzv5p=qeWDM(vA6R7Li!N{G>(fDcip}J);1Zt_X9FbU(xxESmw)(mK}b&29YgL3C0(aFqm?Qt{vxt`E(HB&nY- z@3s5Y-o@g#j)SOG@_}pLC=6lRGi}>LBVP9)X0fO@Nb}L2 zm#IC2VM+cz@{FFi57*3Gj5NYC?hI%s@_Oh$yHX?pZiUnh7N zR&g*q$F_z&8$vb-N^@xlAR8(q6x(I>qDwp95gb_4J4`=bB-#@^93vA4uHvLI-M7E% z6JnNOc3QnB?ml#i=4lW77`7Hxk0j7`usSeL;C}3mc!l?*taAQ=r+V%uf%Aw8=kYP- z6b?*GpZSdlk|M1}+pXkL#xu_v9^g1-84zQ@@@=}NI!WHUqPzn7bVaqiga-t8_(+IX z-suZ{J$-v7X4O#Lbql%RSn@p((c&sjwT-3f0QdwL7Y8fxrFFiy&=3jk z895+J7QWnXjVClyW?3GIcIs|6MSDT>%9P!&3dfpA18+xbVtQzE?^`A;+hM4DsF4!u zipwFTTl00O8}2wq8_nm4_i247u8eQ87pdH+H%lmUlGVSdW9ob&AwbD2zz%+X$=(4i z7FfLE4U6DO@wBV28leg=uNr4$;(<8k-=AwNIayF54|}6%O(o}FSo%V3^At;``M3@J zSkz6_Yte+j^=*kq30Jmh<BZKCv^tN#~Bo!GI*1JT@1>Z2&uol?y4qGgfVs5 z-P!SUXHH;mK0EH&qv!nQ*Mu4|l3mDxE&J=vI;x8_G%}B~XuaQXwomesGceAky>_^c z))frsxuaWEf%qxNQO7$ys{ZpV8m5{7Eq`4JN4&a0pxNDWM>gVIuo{G4H|A4|R8X!o zi%Qft#Yk+#TKm3`9%KT3_FBe5n67YY)fIMIzVh-`*yR?+%OeJ6s?v8KY<3!`Uoxb7 zWLPtZDj`>WaR7%fvdu+hR+?)9slJF4SRYjuMD+=)wt(A*lRc5KBXX2RsCIJ{e2S|E zAw7r5n%YJR1IxauLNSU@xdQ169lWgd7$S+EG+Y~Hi|jMyw|!b2%~^S1rvU65sG;`l z+U=!6z|vrPy*1k-CKmoO?n7oNF_Fa)VRz}}N*E`X zV34?(MVfJ4wy1JyOJ|pP3xPSOW4o)O@PxkQ0$=*TYctKAjLD~42ZUTJCBc)@q^)hq zANw$d5p1Q^7f>otz{>N9D}II7vC()aWY{DCb|f39oI|Nb)%T>zQr-}B|OgWHLug;VO#NjD~H%91{31m zJYF{{x&vbD5{jM&i$5X5@yzLN_wAg;w}*_E66V5wL$%F|VMhMe`b37)7u~lDXz_K{ z9$g_H#n39AN^-c$`df4F&9l(ps~Wv!GBE@)*N1T{NuH@Ed{8nOHl=?-O9rSR#k1MS|#ZfjJ9OoMNd|QkSSw zuH+2a8YPaBckwhz84J;UM-j<-=P7&5s_EI|j98I~sK!>){Z2VYwqQ@R8bVBdiC60>pXLrOlJbJB7l2pxqp{PBz**_Y@v2UkNFO3z2x9s zE#c1BqC%G_?+5G$A7j?`s_X8kZKYdJ73AUKB)u*8$Sh}v;FeXB2_Y_Ugk^ob{Dz#7 ztc9?wwHQtm5izjuE>_)s#)Sb`bNM=`Pri7WWwo8?)p%a6ja0dkGyN$dXRMv|eHyFZ zv&~>{9Cxkua&nSWjEPQ9dkW-E@#Odk>ygKy{eYBrw>r8SDbBt7EIv%Jiob2H7_Jzv zn4_I~4~ZJS#f4~lNP_deE9ckHH)Q;2r{UfOdGUCI;_OErSh%lD zK(qn+Q8uiJ4vvzT6(^ZsTb9^`f|o+SE*L^m}adi!4$w?o-`IRjH0*Z z=WqTIw_Ktp0rRY!b;=mDh+t@4YLJ?~zMtn2x>RKfV*?DNji*-%$oKNdg?)^_P=HQu zm3PGVCF)L`QNUEmy~guAi;CZ!5H8VlbjVPQeN609dy}V#N3d6ymuIL}MD!RMh?5iM z(=vYIt)Uuv>2ZD#kB5YRPYv#R=HjJo&mQ&FR-9YN3z~iVX2y_!q=S7-Fj_8&?5f+b zR5tr8s78KZ_6su_id9@+=45ik9=czC6az?ri(-a_wAE=I)@*x>FZO8eV_Zmk4%w%P zZkT%(k>I3BCZAw~ryxlEjX-8ns@#R^tB9l4{nUGR<`n=&8YW&U`McbzB zsj8`paGHjWmZI_4-HErFJ$Q=X@3u;DwTl!(9Tw; zys6+Gf5xITdV#1I?d#{BesRV@FGlwEDpiiS&Kc<#7*$5!U?w|?<-K!qV0K{md^X=n z74$}|=^eM-Q+R2&96Q0#vknSCWZ!P6u`|*Szx5sjVn(368z_K<4-Pi@m*_ z!ib-lzcZ(VpKJRA?-=%HNJR9FpU=XPH1n4|y1z2g^l24K=nj=oPtZ83QR?1*k!;HG zgaXFQZu7Z-C6SXOV`?BjQBiTBExVUPM_|XjF z{a&QAOx)9=iLZGD<~4FDB+2oM>w)@Rxo6Su^lppWYGH8>TkA<< zb)5_hjjn!_sTQNiAdZE0qzM;%BV=1^;rjSsfJ`m)`8}VJn3$Lp+nP{o+7tua$#NnE zx(-UYAp)8w4Ax>%=k_%H4`}ue6JNb~Gj_&}yqYG_AYf{`R&ELzt;JJ*LMwz7du**- ze@`E#d#^>TRjd`IHONx|_#~54e&AlEb}w|CI_o6Lnl*)Se>0K0?%tf4gHQ$OXAQ&q zEohUV3Pls1pl*a%Go56|3a_>7k}K5kjkhW6%qQPFx zqQFN_pzPtpE@-n61=e~rBWXjY>w~VM!@HpALE@+JRE!+A$HLK_oABO#tT8atU#025 z&dsJG7{&^2dG-hd|MwP+x>kfnt=F=w^5xftA%V-oEkP4M z?SO`c2DEad4ebs0Z=nt|@yh$T%cgf*d_w#`2E8s7@G{HJX`g7ovls%3+?v%4G4)T1WD% zsOqS!dLjMFdP4#6Yo;tNV^_^GWuh(^=a;G4ooDK0vA<+5?{=J3zXQ!Oe2*pCP-?|1|KG~4`>dIouXqK+KG5d zy5f0k*sws>rq-tx@9V4%4F|)ND0bp40_(*2X)s(hV;yK6b@-XTE{hiuQO@))Nzsp{ z;;Hpb!SFs>Rx+r~7sP8!O5CkV7|W^4akU*(VKv9-w?m))q;o`({5T(GRy1QutF&Qy zr-8DZ)St+lYj7g)6H=yR{_6BeZ&Jy-DApsgF~kgK<+v2?WBYT*yVRR~OT`>kup@k= zq-u20BPH~8qWvt#G?ht}{={w~WiG@43pL>1J^;;fl`txsF zEq$E>>aT+@>O=7G|AsBUQ_P%`-X$p?=-h9EvG(Sk9;zUu3)M!SNYOfUnWCDK)BJo zl2UAg`z7|64I`megZRvwTCM~9leEFk%@pn9a^LEMuSxoDbK^-kQR`S{WUd1G2j1tW z`>{RpK!Mts*g_MPY2oz`pO zGND6#_kunJf<=#!u~E{o(3_B|uO*7PH>vndMw!CLs|VZ$Ps-(YWk=@;^G~DIopx-f zY>1VnS*bt7L%c@D;^l^LMaG6uVDFXN0}xYQ2%nuNODGGy9qHAyY2RlWif2m|E%SNo zi{%MI#Q=nhiq*$`h-PA9{=j^iq412I0ejI?1E6BA=Y>pEFri#pEcDhMnxbK7nv+MV zA|On-O*?CCi+@-R+G{rS;+C)LtAA!&;X=f0o>1fenNq2hcB{L$Xi`~(J1ppVdzc)4 zsHj?dsOTydv;z~@cQCa#iE%22saT_kc`nMTNV`98C9#*Sr_gKLf~IgL%DhlTr*(zc zu|6C^YGypJx%utfu+g&(MsEwy_(i?9Q zN;g3oc+`_XPB-osMJR6PmcKzKGo*MuSo&DvWFB&ihj9%5|h2D#KF+mT80aq z0QV`Y>V_v4!qR}a*pzkJ8b1S7mC#Pj zYo;P6FOhEE6hWUyUvddqkCPOkU54y+x63&ALwhkOqj1zbQZOZ`%`*mX#fGxJR!tzLMW ze8Fc3=#b8AMx*&!#!sx#b?L41Dlo{ zTtZ5|5nA$)DK)lnXO{x=cWD>W9-=LIOi+ z!x$UG51&(`bNj>0Kjp-`F&E~!)cQN!Wvq|ayN*8`+Zvy1bnajIdcvG?Fey7)-}Le*KYe%{=t)%wnN5#Bzo?!|S~G6`bh=NQIMPqBCf>|-WWIHowC_m+~ZZB@b@|V!PBp}w}dqx&?#Gz zM``i|PfzFGkBE2|kf5rbyYuM~>G1q0&ISGT>3%jVd%b))soJms>M3(vHd9=+4F0xg zY0|4@H4NNzQd8vceI!PwTEwe3p6s;865o_Lk1ORw!&X~v#quPKUt?oVn3egEESrp?2`=3Bx7uUX2RdgAEYrEkf7 znk(h?objGdz02ZOShv}5g!#&Jjb$lQ%uuh8 zIqtaZxTU*%oiV;Dy)&tcKz@wG!<*5SJkeW>I%qUMvPn>6pp+*&$VZXl5G8=(S-p>j zd53~)Xy}`bU^3W68>cJH;LPW2>mw&v*mSyd)u1u-$Z1tet*By=gs zrtWu@^jzg$#AkioHbI_`iP4O0IOvW!OMMLBnGr^5(91KKL=dJI#=7+cd3#G*^c!WA z05q?(57@nP7xGZ2>LK4lykQ=6TK+GUh~b%&iEFBFQ>6S(9Jc35s0n?@P+YXXVfnDJ)*yhJCts~& zoAmUFye^g%;Q_h1Ub~jK2N%p{gv*@->vNF*!w1SX^myTS4hC|j6r?mr5`BfCV6{NG zc;B2P4W8$W`E_|PGG%#~ty@&Pd$*T_*?_@7KUs!ZsE=(nkA`JTb2c{`M@grM2bUC= z)-dkz!Q!Umy15kgm2u3Y4yWVQ36{N7-R08`o#0UnZqoLZQkU4Q*cz_{iS6!ctfJ>w zq9ziGM=}R0a>`$cJ&Ine67$%Tk=s8}iPO``ZM3&pMr#iYjdHip)wR1*1`D467mg&J zVgR?+LXPFu-!BSTcf4@=_~=&pL8wH)*1}sP2Wc<2o#DY`Z*nc1?t2U|JLFxiX4SLX zN$==3J~>)u#eNc~37DsM5HWVwQ(`u;i=%(9l)5T6IDoY}j<>36Bm`Y{d$8JP(*u8n z(IR~{Y}86se}Tgkd>PT^QJ6SrdhUfz!0Nhs#M;y5f>D09KuMJKWrjw2G7K#tIf7Oa zxRF&|7>~CtW6TPLPH&^0GCY7}wc_PbOGuQB;tePuLK!zY6?E$&$H2)}O+)RE#e>}h z#}T;e40aIEF*Y&Q5LofWhjrO7jOsrj>AH5F49dCr6-;I4y-YVIlXrji>LEYK;YtQQ z5NM}4+iQzAi94T;T%~qzy9{IoJHuCBPPq3f;qc<=q$%1N27;+RR_WA!VH0WQeOK*F zh~4comG|$kZNg1;C_kvgn^ikICB^ZVmJaca6OIJpzTX=Z!$qgD{bp{Lmp>}wgDYd- zQUsg!VlW-QpY*7!J5bI{tI&)%A;9U@nzP!M==kHtahx^8@pED z6IuaN?&kLG!-z%Y=VJzgu%w{GT6By&8?xN`7G^f)4w`DP^Q8g_Rdx=GK=zoTA#C+Y zbSbqX{pru2hf9nd&+oS_V|k`SCU4$jv37WU>tiUA+78LZ7q(Z(tdYYo(~UTlzP*of`Qmviy4hc*6U7iQtqx#5=$VP` znt9#{e-_=!XLsqbHXUnXvl-DDCdgx1i68*E5-ja^SJ!3ed={}SO<_~ChQ)^^b(eCt z$t)9E?Uh?Fnc{!yw0t0pg{cBMFn&97*SbzMfgHUNg-3{AC^{a=`WeHc+BioH6+PSZ z#M52jb8Uv&+Whni0lQMGv9N;3S4j`?!P-tu6FQpP3v_rs_-~gi_@8#GoL4;Ut@KHn zCPOatLlsH$ z1reJXBGwiwp6cp@!dIR?M5FAzJ2I{hx}|gZYMV(Ah*B2NOPV=c>oPgq9aS?|i6-!r z45e0Lm-JnXwOI12=)Pn_s`&jadLtS(lEjeo~|}(pQ}?aq`S#QU{teHyguCMJha7F37@ME5YM5)aRLlXL^Mkoirz! zytVoDu2V(lVFcd$fQ0rCN;6!j|NAbofDy7@m`^wJ)rHWfkKIK2?D?C9j)}}UPLE_c zrju}zx0sCat2>%vG84_%b6HTn<#Zt3cjNnvR*PosfMpgPtShc7Ksi8?*7IccwcFL0 z#0rs?jx^Nu1<&2eX}uE(y+Q`+eT=3jdi#svt7Y$2XbzUwKXHoE(lex;a4UL?I>xl= z8p)8}KY8w;2=p;GCBRSiNKZ-z&IFdGJaMhfuILEEuk5JBXCYx*?IS;Wen7zd_44*P z3!(^;onxv^g>1dNV1)uQWfq2NW7&1TlOp~}#@a}L!mg!c5!9UdbyF&Wv4Z}Wl5ECk z>GyaG5QA=JLxw&h^$_uo^eu7k9w%T?Z6Eviqfe)Qm-SBF$@&?bmV3qaD7DU^<@lvrgZJeWiJlhe6v%AuC5o+%fI6r9b>W@H)5+au zvvZAm#pA4E+jD!sSi7BT{nhP|d454f`~eI%;pk&M>d+#g2KmkaZF?i7=Q-YO13BJZ zb&vG-!V}t`bXoT&ZY}w|gD#uywa!(kcC>Q&%s8P$zFF@V8Mbk`b41&=F}9kZC40Y? zkGDCz&N^Vfas>BOZEz(+`)Gbd61dDB+1SR{YEiAnkX@vTJZY{ZV`~LS3@yw_X1$k` zq-kWbAX`;{OQUU4ZN1HJN>WY639EiQpQH6D_o5AfbW40jj3-y#jj4%GD7UQSi%}E~ z70;Zq25W!7BO?LBRTPpZhCXIwl!3h(jDe-i8O`n)8pe4C?DPk92M=^BQpb{3Sq{&+ z5Bvgss-sz@JRHl6&{w+AhBD_v9})I;FC3H-%ZkNz<(AleEWn{_6$|R*--p~oD{B`t zQ)A!$q-)_dyap26W4%4JX@O4LJ}j5BK5T?Mu$_8k%e0)G!7Qdsx}yIM8XWG+)6UV4 z5zIY;UQ5@p(pd^yeL6c|y1H_Uqn%?mK&>i@Xuk9*Pb`nVVC84?_bZufWYgxdl1xsG ziGy-aE@R&v1ShWHKd`4_4D^n4_9+ajZjLS)T5qYswi)t?|eL6Dt4Y%Gnab{d&nWE`nj(9t7>-EsG3v1`~u^fWCo7n2UkM^lC zm-LFS>BX#@Q#$N)R9i3XNzKv4!(+zfz(h9ttC2ayr%Nf;&$2grFUHfSi|Z61mm)_; zn;aw8ZrI4D8_rrt#~gUqcDxI!>|UQxSviPt;(59^qp_7LVAZQ8CM+)*EU1~Uyv zXyid@;Jpsa4GM0##(ideJ>X~W#;|t$^|;5K7CMuJZkoR=t9`ZFKh-4&U0V*1&v=TL{woon)RF)C*a->kV{on3Ta9NLnH#UOCbz!7Z#HZ~f~ zX9?pVdlaMQ9fR~;vQ#0QPuG0@yJR9bL7-P6gESq1Zdf16RV>h2H||DuJb#`aS?X81 znVLtAx-;035p5IJ{0{sw_+=d*yIH?VrmHD~!`rNa4V5>F?j%%OS)`nBcUaI>_=mESI}T#~AnPw&FpW)xpa-rI7i zpB3jjRk~gXtBK>W+6t~yfFk!K1BOw}?uigSAPahh>V<QWIW4bpWE}ePWzO&V@^0V;5e2|FH@2{2Yjn>^+|U>k(~PXe356!Y01MXVFN#pr zDGwKG4i{e+ndG1ve@uNiPn{!AHD=f!;UByyVR7wzbYVvFF^;^U%n*!=R%d zDnK*9*`6idu(N_OK_%2q=Vj_-eZd#v7UJ`X|p%kRiFF{VSd<5to|LI__1>br;cPVE!X*b^q(XM zoHP0EYc?#FS?_DI9F%%;y~W2*?l;B_>*C|JU37S6y07K4`GM(3Yojhf45K>TU*D*w-qTjW9me4 zAJwR8onABW4G|;Sk>W)yec>!~#EGr_ImF#I&#_12QT}Kzw?uRf*q0_#h`l>EguKSx z?K4;)-Gt@T1DET|s{irhXk*6Gy(Voyy0J&EFK$Jq04eX{W#w+H^};T=G-7t=$;qSC zx%<6W$-U%Wz2{}d*46EGz?ZZVyKhf&ujuLp(5X9Pz&q|1LhXLJ(ztP?dyxy29WP$S zy!q^(ZB;*aaxjGG!_=l;57b$?Oz=%QX#lKr)N#b~w*}o+Ukra~;85;dQAu@?Sjm*ZX(0D;EZeetROEx>p+YUYCSBK zT?qe*UTC>OGL5PDtnKZX2Q{XdS#*|=i`^w3$hJ#vcyiapRzb#;G*8P4nE%I+T;GfM?pi-W|p_*P;KS0Lr>u*1}4a@ zq(OB)$qa!Ev0$GwkAneO-s9@LcXuX^;?3jAJ37>b&e3VEx{Ojo9p>Lx^$T=h7H!>g zYvr0&oSV9E%bZa0{xm+d9oD$0He2nqa8eH3eu%+$X1<+jUiUmVR#K+2WA#0`|3mn9 zO%0?Ez}&40rDD273V(-fU%*3t4<4i#AaLHo&bL%hl9 z=|-Ljy=F4>;scI3AKq0UOdwZ`w&A@0#EZIHfxsG7nqiPdT&6pdNE8ymVG2dGV+saG zp!jzx5t}f*aS=2j#2njO=->w)-%)M}6^cH=rB?imO_6@^AlqGmFvtCEIPb>wQkiGJ zuYVFLvAff#)NyQGVk3-Ah^fKroSWmkItWs;(f4j{1Fu!BK&NB(Xj+X z*QF6 z^=rm)qw-Aub8@36AIci0H1)A9u=g6K2>K|tbnD+hdPCWDUo;25?FzBHcW|2Nq;s4x zsUvRsZHB`$GVJO3OUa1#KKNIxFc26SteC7!MB$&ho!vgpT&hbA4haKJsk{(X6Y=*g z0(VsGD+}xzb|>G%dd8TUrYdCH+`qDO(AkPEA2HiTRlc7K30#~{sKU33+&^bC*&MYz zzr1X^%Dh5cc>HWT3>BpnHJh6TO}Dh+P)FkkOvI{XC*VLbwq{Dfw`;X+$I}QNc1g8aeLaat{ErDM}%tgi!F#HO8Vope}6J4C$V2`>7QwHYKUigA5sZ zD~Dn`Q!5D8@D!u=A|V# znt9rw2mkT9P=HKoO`z&@`|hDX@&yCI6$fztOWNOnO8yT1b%X74w6J%!a|8RM{uB)nFSoM<3TS7M;gK&!J%08 z_SP_)8}GpZc|f=Z6+3ZB5m`g~pU60%h&tTZ8x#KUH~^0Ex|#CtkU6;A4Z{s`4gyq% zL&R>tU>q!*PzVUB3x^lN5kC+%I7*QX4&(vD5m`{UGj#!QOb!PGz{(0`;bi4tX9sYw zY6I9nH-JIN@51owo1b5lal)ZooHuO0qIrIS#N)Hc@Cs67_*^D8psWYNG@1w zJ*yZ+d{23tAF$9>9Fvi7y=PApu2d`0+{VnoQh1 z-4Msj=W7%SV%@wL2b-_H?T}WMmL(?BEY)vY=GkFSA~a@G<6Av>Jg+VU(WaGJ+*<54 zVIQyXy(I~ZD*Bt``yCDV17D^4I}Q?Z6PX{&^q+8>aNyrhp8r?CO~`l5*#91I6Y?E~ z`A1%TM{)g{SF*6*@Lj)S`dHaHIDf8b4Oy8#mJ6?dyS<^Y70el61kWH_KFXciMoNH% z2_L08n=DAyUKnO>A>ru=Q}&coG4`}F<}#ra;78|m=XSTTw}FoWaJR9xb>epCqcprp zJZ|{)4H-zuj}8FvI+~brD~X8xV1nQAQJOnD+j9efZf8NC)5`?}L4Xhl zGn|9j$-~y!(4E=ViR!zKA6kA-Q8<+Ohc*-ApUUiA9Id~rGBF0ie#?3%AiQJ(DFHVY z7#i8RIP+2dPzP6NV$2PP##+N}Z2ax<-BLko=RYVv#Bx<&Sy7LFE4s)L#U?8~UGm zf0O!Uk^ZLq`y79v{+=y1_CMFItrN@l%waLMvjMupKc3(poA7n}Bdu)gEleE#f!F5u zk1|sWIFi@F1&%HMP4;isJUm~ye=iF+n1247RshT(Ff)il1Tk3vvK=`Tg$2~L5$j;8{w;0*jGyk;u z_dxw%Q_-SNxlc|8Lxd{>PRPyfqRZrQ6Mh&|lgmL4Mh}{JB5&`+vg! z|J&LAzKi$k4&48JJKNtQ@^5#xA@Ggvo2}?y_M*RUI{ujA|F9PgVf$lq(LGf+y2r8iqW)CHpjsS(1;j$ZR zau_LiKpOa@vHdgxs^lm2ftiyOHWU`h0i{~=d&NyxBR?&fDA3V?f3H8g*mrp1i+yN2 zB}uXZTf7QJUztjN^o+oh+@McBME%=miZiN53Ir!tY1J2vUi7sP>9qF&_o>Bq15Wtb zaG>!6*n%l`e1Bc{-`fKIx)6Re5Cr~q4s)}!{I@L)|Hh+Y@P2_esRQpW_Mv2@-Y+AtQMO4EeB*n}TXX4AfZAlkhgalzR>sNkw6L`OUOO(B6y9 zu@`}ISjG_Zo3j8kaL?c0P>k9vLWb64l7TKnPSiI%+SfmX5Qyt2< zF6&gPM$zh&Cl>5><`+B~e-p3Y_wls>(C=N=ex3q+zx`jYcks>m*3YvSS%vB62;jR< z2Go~4n&ens$zf2g6}?o?{&JA{2C(*}>Utmt`kcKSzT-`oU5`km8NND?KALmMS8<+38`!TPt4XvX##SW0ii zdesx}TR>a9Z_^S4#udVXsI)r7Gn1JIm@$$@_G1|*yjoB$PqbTjWwcGtOPDVZACHKW z{?%>njxbYncoR%0I_UQY8-SgS4ax>E1^lGlG^M-M1Az&!$FTQtnG=!gDJKprel@!tDWfKvGLd00XLZV`v ztgJ#@Y(h{$P7w|+Q7(2URG1aQ|G(zY1>PJze=oZ5hO_VY4xm literal 0 HcmV?d00001 diff --git a/tests/acceptance/sample_files/invalid.pdf b/tests/acceptance/sample_files/invalid.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b733da8e1f82343c4862c36b735c902d19a3795e GIT binary patch literal 16 XcmWGZ%}dW$D9uwy%*{;C*W&^JF%Jc1 literal 0 HcmV?d00001 From 15dceb42d6a5974fc6be8ef1f6419f9b032af9f9 Mon Sep 17 00:00:00 2001 From: Sinthoras39 Date: Mon, 8 Jan 2024 14:05:32 +0100 Subject: [PATCH 2/4] fix failed test --- tests/acceptance/sample_files/Hola.pdf | Bin 22442 -> 1433 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/acceptance/sample_files/Hola.pdf b/tests/acceptance/sample_files/Hola.pdf index aad41d9c58e85928d906e75aaff279d8dbfacb31..ada400c6da3381bc5a2e9d640ef28d20b6f1feeb 100644 GIT binary patch literal 1433 zcmb7E-FDhI5Wd$_%-J3`^c0Yd?I0SKoP?i*5>iS?v)TL#TY$4hVr98WdeIkX?|R>7 z?TA3iNp?@p`ohwTW=1ogzL{LV*)F;zkL138`}zY0Vy?6~;3L=*QfO{O1ZV+~3bUDz z2ByZ$-%@&edt1^A+k}rwp_&mF98jYhQmYZ#cA<>ofYj?m$jGX#ygJ_Hsc0(F;-uz z@JJX8MJ+^hDDJSN&WumQ?s9D`*A84@fwAbV_%3=g{3HD2=3)lpshbjUXM;9nvY%A>ESi;Pra_ z-S|KES?_w@^{jXOhFLS;?`H3P_SyUFJ#fx^HjRRa7&DlK6`f{babOr73;+S_j4aW4 zd4Ym1&gOQG^s2U2wsvl|kAT9CFhgexJ6jP$XBfQ*Hv|M>1%W{vU{(+-l!FljqJuww zaKXil4XrJV9AN-<76^m|^avkPAXb^<_erlACsg_&3w3fZ}9!|y-#l@)$mve+m0Khj4N`zaBGBqdlUZcO;Q)AA#Y>}Grkd)fEk*=9L4PH zoMDbo5CH!Ct{bjh+1b&>*!ef3oB*Id=6f^zkGKfQ0_E%+Z49k}#)bg!A1pWa{9yU+ zk{e5bLWWMT?-zpKAB-v%HZUhSn46NFjiIfAvaAYF1mW!Xya3MMM)?^tuABS+yF&Jtfc)a^n@RjS!|#&=3fVcrBm3KW1L@xQ)){CB z28sb#;cLs-89sLo76==J3(N}O0CTc{S;0^MCxnF!#Ks8$12~}UEMPV;6s-Hx(J~gc zR)2)}#%KR-P!TQ>C5VhL%-U1R(b-K(-NJ#(#a{654FQ2zI61)_;9o6)8^_57=79W@ zB{$LfVTqW9HGCZd#jN4EA_BWv4nP^0t(mhq0K&=%x82DZUN~&f-BSyrWvyC3cY}7$ z@VK!TB-9qLQJ+;vdu|w}7_KT!HET#uyOg90@Mx}0)F_%W-;HrUI*pIVpL>`^m~^IA z;}>tX^N@wT7W1hNGYn4997H_Hwt3}nX2c-$j$T@NdM;&ZV1 z@t_S`uWBp3~N0VgCErY6HIKAW%b73&6n+Us4cOE;jfT*Kef`0AG%VU~Rxn z;Z*uF6C1oTg2C+Y<)!<}kbkbR!iLU<)^=v!S2f(i-)pS1p$YsK00rIU;n&bzz{YzjvtE$41jQOg8pOuY#4A$)T1~S>Nphl$NDB_f@tN0{8Ri1apl<$ z+#d^}F=toFT73Bss4tzFE_4^sdHT&|VkIxapYcwS z*MXX~)v>gO&*{c9t6=JMZzKRD+yiEA-!o=%OU=+nV8Bj~&-v{}QL&K^>BnO>oB*6j z4DM)`xe-<4u%-w2$1TJg)Z9`>esUviGl6m>_wf<|E2=GOI+YS*w3@xHGw`Yk_u>`nh$ zsWkDsOyR;Lyhss)$Zjnz&*iJO2%MDkXHN)4(=YL1UgoCuB7TZvk? z#@F?rq|YKWz6!=jw#VkiGzOACM^ZDE*7uvXZ#4$VnW4(oTviz&+Y{+vA!?k7y1&YL zAtfk{PrFLJBfQbFdrx&e3K^{)Jh(P?6$M2L4VZ_vEi5_ZkFz4cZJ zW|4UH1y<4LmR(L#s`#uJ(;*q1LK*rXGNx(7J0;h`_A%wL^h9(6sCS(4VqcheLa4rJ zd%vU^45OO4GhjfzDuKLjfGyNeqS}jFxslgD7XZ~ko(qcIg_3qmS8bEG=u4mlxvfPw zn>kwSzgY6(#3!l);o>(#2m`k4F}(@qHp+HMFG9VgFgw4!Kc$*IswwNLwYR}Az>2KL zttK#@uH9pf5JYIn=ajyqXpax=S~j_-DSaI6CE-^mr26W|;VC8!}RgNe207F=c!V_szPFmzb;Ykq$YFgy;i_-wJ=z^dBm}-N6xH2;F>| zVt}+Q;!%xp++xhRlRNKmN>hhSzCdzop#@yr@wr&+-L&X-t3A0#DzC7QNE+Qvw7kaa zZ-mdynIi^ir=-S*bP2a}=+^TOpHPTvJlcE^1fBts;^`3xK4{A-LT}-nwViD_Bgm$- zNGlieSuYDcpjmyhI+qsEwb`{Cdm_UR{WiT6v*f~g1&C|9OS)dj*?q2nf^FdbfM$>W zf>6&-f3MU>g#J@TYi%fbcE*D_t$<#d127W+1AJ(pA(dcA2w?2wzRS}E`gXrMX-23) zsG+}w!`}8zr@)IhglT{d@I_22Wg`W7bQC>07*8SDzdSl&*fZ4v_mwoyI^|ufbwChB zO_FT~$03MMBC8P<}+hYjVXiH)jC8oJzkOcAsrhgraF7p%m44`WEG!)(!%!t1dC8U;K8>ZIZ z-5?HNv%f>#WqkP1GxAEU7chJ1mL+k);NgdbE~_%Y{pnJu7sqa_9{Z`6G-Ca;@L_T^ z4h#-D-&>e9=UtCxFAvRLhvZrirwSD|#D5Np_mLVOo*iTyTjLKrs@lHJs>L7oVy};g zLoE(DY4P$)ze-A_Ie9(GDLB=7t3GBdwqtN1jm`rk$liqWjoW-lavxvv7DW^)b}W`< zdnH~jzEbe?7qljEy?fy}Fk@rloRX`7Wjc~RWHu$t=k1R;<6r$4P5Z5&Q z+Rq2kl{&bKq5K?U&__ukI{$TQ;-Hr4!i4LB&wS=LbBy!S@y6GezPAf~c)lU(2tODp zaG<}h+4ax}Lo0`HYHpTLO^|%00n=?gYR%{ND{9=wDzrP|k9={9TKQIWHw8$VIU8WR zPc@#uQ5%HVH51RJr3RnB38M0Tp-^?ZgO}u}PC7L>;UsNIm9yojO5Ej3+9_{={AUc= z(`|ejJGt$kt0lxPMgwLhIex!mH!sJ#mh)=5aV($S8DUOMtadq7kzX=SM!0&B)jd0= zmzdeT9pu}-H!#MBMzv5p=qeWDM(vA6R7Li!N{G>(fDcip}J);1Zt_X9FbU(xxESmw)(mK}b&29YgL3C0(aFqm?Qt{vxt`E(HB&nY- z@3s5Y-o@g#j)SOG@_}pLC=6lRGi}>LBVP9)X0fO@Nb}L2 zm#IC2VM+cz@{FFi57*3Gj5NYC?hI%s@_Oh$yHX?pZiUnh7N zR&g*q$F_z&8$vb-N^@xlAR8(q6x(I>qDwp95gb_4J4`=bB-#@^93vA4uHvLI-M7E% z6JnNOc3QnB?ml#i=4lW77`7Hxk0j7`usSeL;C}3mc!l?*taAQ=r+V%uf%Aw8=kYP- z6b?*GpZSdlk|M1}+pXkL#xu_v9^g1-84zQ@@@=}NI!WHUqPzn7bVaqiga-t8_(+IX z-suZ{J$-v7X4O#Lbql%RSn@p((c&sjwT-3f0QdwL7Y8fxrFFiy&=3jk z895+J7QWnXjVClyW?3GIcIs|6MSDT>%9P!&3dfpA18+xbVtQzE?^`A;+hM4DsF4!u zipwFTTl00O8}2wq8_nm4_i247u8eQ87pdH+H%lmUlGVSdW9ob&AwbD2zz%+X$=(4i z7FfLE4U6DO@wBV28leg=uNr4$;(<8k-=AwNIayF54|}6%O(o}FSo%V3^At;``M3@J zSkz6_Yte+j^=*kq30Jmh<BZKCv^tN#~Bo!GI*1JT@1>Z2&uol?y4qGgfVs5 z-P!SUXHH;mK0EH&qv!nQ*Mu4|l3mDxE&J=vI;x8_G%}B~XuaQXwomesGceAky>_^c z))frsxuaWEf%qxNQO7$ys{ZpV8m5{7Eq`4JN4&a0pxNDWM>gVIuo{G4H|A4|R8X!o zi%Qft#Yk+#TKm3`9%KT3_FBe5n67YY)fIMIzVh-`*yR?+%OeJ6s?v8KY<3!`Uoxb7 zWLPtZDj`>WaR7%fvdu+hR+?)9slJF4SRYjuMD+=)wt(A*lRc5KBXX2RsCIJ{e2S|E zAw7r5n%YJR1IxauLNSU@xdQ169lWgd7$S+EG+Y~Hi|jMyw|!b2%~^S1rvU65sG;`l z+U=!6z|vrPy*1k-CKmoO?n7oNF_Fa)VRz}}N*E`X zV34?(MVfJ4wy1JyOJ|pP3xPSOW4o)O@PxkQ0$=*TYctKAjLD~42ZUTJCBc)@q^)hq zANw$d5p1Q^7f>otz{>N9D}II7vC()aWY{DCb|f39oI|Nb)%T>zQr-}B|OgWHLug;VO#NjD~H%91{31m zJYF{{x&vbD5{jM&i$5X5@yzLN_wAg;w}*_E66V5wL$%F|VMhMe`b37)7u~lDXz_K{ z9$g_H#n39AN^-c$`df4F&9l(ps~Wv!GBE@)*N1T{NuH@Ed{8nOHl=?-O9rSR#k1MS|#ZfjJ9OoMNd|QkSSw zuH+2a8YPaBckwhz84J;UM-j<-=P7&5s_EI|j98I~sK!>){Z2VYwqQ@R8bVBdiC60>pXLrOlJbJB7l2pxqp{PBz**_Y@v2UkNFO3z2x9s zE#c1BqC%G_?+5G$A7j?`s_X8kZKYdJ73AUKB)u*8$Sh}v;FeXB2_Y_Ugk^ob{Dz#7 ztc9?wwHQtm5izjuE>_)s#)Sb`bNM=`Pri7WWwo8?)p%a6ja0dkGyN$dXRMv|eHyFZ zv&~>{9Cxkua&nSWjEPQ9dkW-E@#Odk>ygKy{eYBrw>r8SDbBt7EIv%Jiob2H7_Jzv zn4_I~4~ZJS#f4~lNP_deE9ckHH)Q;2r{UfOdGUCI;_OErSh%lD zK(qn+Q8uiJ4vvzT6(^ZsTb9^`f|o+SE*L^m}adi!4$w?o-`IRjH0*Z z=WqTIw_Ktp0rRY!b;=mDh+t@4YLJ?~zMtn2x>RKfV*?DNji*-%$oKNdg?)^_P=HQu zm3PGVCF)L`QNUEmy~guAi;CZ!5H8VlbjVPQeN609dy}V#N3d6ymuIL}MD!RMh?5iM z(=vYIt)Uuv>2ZD#kB5YRPYv#R=HjJo&mQ&FR-9YN3z~iVX2y_!q=S7-Fj_8&?5f+b zR5tr8s78KZ_6su_id9@+=45ik9=czC6az?ri(-a_wAE=I)@*x>FZO8eV_Zmk4%w%P zZkT%(k>I3BCZAw~ryxlEjX-8ns@#R^tB9l4{nUGR<`n=&8YW&U`McbzB zsj8`paGHjWmZI_4-HErFJ$Q=X@3u;DwTl!(9Tw; zys6+Gf5xITdV#1I?d#{BesRV@FGlwEDpiiS&Kc<#7*$5!U?w|?<-K!qV0K{md^X=n z74$}|=^eM-Q+R2&96Q0#vknSCWZ!P6u`|*Szx5sjVn(368z_K<4-Pi@m*_ z!ib-lzcZ(VpKJRA?-=%HNJR9FpU=XPH1n4|y1z2g^l24K=nj=oPtZ83QR?1*k!;HG zgaXFQZu7Z-C6SXOV`?BjQBiTBExVUPM_|XjF z{a&QAOx)9=iLZGD<~4FDB+2oM>w)@Rxo6Su^lppWYGH8>TkA<< zb)5_hjjn!_sTQNiAdZE0qzM;%BV=1^;rjSsfJ`m)`8}VJn3$Lp+nP{o+7tua$#NnE zx(-UYAp)8w4Ax>%=k_%H4`}ue6JNb~Gj_&}yqYG_AYf{`R&ELzt;JJ*LMwz7du**- ze@`E#d#^>TRjd`IHONx|_#~54e&AlEb}w|CI_o6Lnl*)Se>0K0?%tf4gHQ$OXAQ&q zEohUV3Pls1pl*a%Go56|3a_>7k}K5kjkhW6%qQPFx zqQFN_pzPtpE@-n61=e~rBWXjY>w~VM!@HpALE@+JRE!+A$HLK_oABO#tT8atU#025 z&dsJG7{&^2dG-hd|MwP+x>kfnt=F=w^5xftA%V-oEkP4M z?SO`c2DEad4ebs0Z=nt|@yh$T%cgf*d_w#`2E8s7@G{HJX`g7ovls%3+?v%4G4)T1WD% zsOqS!dLjMFdP4#6Yo;tNV^_^GWuh(^=a;G4ooDK0vA<+5?{=J3zXQ!Oe2*pCP-?|1|KG~4`>dIouXqK+KG5d zy5f0k*sws>rq-tx@9V4%4F|)ND0bp40_(*2X)s(hV;yK6b@-XTE{hiuQO@))Nzsp{ z;;Hpb!SFs>Rx+r~7sP8!O5CkV7|W^4akU*(VKv9-w?m))q;o`({5T(GRy1QutF&Qy zr-8DZ)St+lYj7g)6H=yR{_6BeZ&Jy-DApsgF~kgK<+v2?WBYT*yVRR~OT`>kup@k= zq-u20BPH~8qWvt#G?ht}{={w~WiG@43pL>1J^;;fl`txsF zEq$E>>aT+@>O=7G|AsBUQ_P%`-X$p?=-h9EvG(Sk9;zUu3)M!SNYOfUnWCDK)BJo zl2UAg`z7|64I`megZRvwTCM~9leEFk%@pn9a^LEMuSxoDbK^-kQR`S{WUd1G2j1tW z`>{RpK!Mts*g_MPY2oz`pO zGND6#_kunJf<=#!u~E{o(3_B|uO*7PH>vndMw!CLs|VZ$Ps-(YWk=@;^G~DIopx-f zY>1VnS*bt7L%c@D;^l^LMaG6uVDFXN0}xYQ2%nuNODGGy9qHAyY2RlWif2m|E%SNo zi{%MI#Q=nhiq*$`h-PA9{=j^iq412I0ejI?1E6BA=Y>pEFri#pEcDhMnxbK7nv+MV zA|On-O*?CCi+@-R+G{rS;+C)LtAA!&;X=f0o>1fenNq2hcB{L$Xi`~(J1ppVdzc)4 zsHj?dsOTydv;z~@cQCa#iE%22saT_kc`nMTNV`98C9#*Sr_gKLf~IgL%DhlTr*(zc zu|6C^YGypJx%utfu+g&(MsEwy_(i?9Q zN;g3oc+`_XPB-osMJR6PmcKzKGo*MuSo&DvWFB&ihj9%5|h2D#KF+mT80aq z0QV`Y>V_v4!qR}a*pzkJ8b1S7mC#Pj zYo;P6FOhEE6hWUyUvddqkCPOkU54y+x63&ALwhkOqj1zbQZOZ`%`*mX#fGxJR!tzLMW ze8Fc3=#b8AMx*&!#!sx#b?L41Dlo{ zTtZ5|5nA$)DK)lnXO{x=cWD>W9-=LIOi+ z!x$UG51&(`bNj>0Kjp-`F&E~!)cQN!Wvq|ayN*8`+Zvy1bnajIdcvG?Fey7)-}Le*KYe%{=t)%wnN5#Bzo?!|S~G6`bh=NQIMPqBCf>|-WWIHowC_m+~ZZB@b@|V!PBp}w}dqx&?#Gz zM``i|PfzFGkBE2|kf5rbyYuM~>G1q0&ISGT>3%jVd%b))soJms>M3(vHd9=+4F0xg zY0|4@H4NNzQd8vceI!PwTEwe3p6s;865o_Lk1ORw!&X~v#quPKUt?oVn3egEESrp?2`=3Bx7uUX2RdgAEYrEkf7 znk(h?objGdz02ZOShv}5g!#&Jjb$lQ%uuh8 zIqtaZxTU*%oiV;Dy)&tcKz@wG!<*5SJkeW>I%qUMvPn>6pp+*&$VZXl5G8=(S-p>j zd53~)Xy}`bU^3W68>cJH;LPW2>mw&v*mSyd)u1u-$Z1tet*By=gs zrtWu@^jzg$#AkioHbI_`iP4O0IOvW!OMMLBnGr^5(91KKL=dJI#=7+cd3#G*^c!WA z05q?(57@nP7xGZ2>LK4lykQ=6TK+GUh~b%&iEFBFQ>6S(9Jc35s0n?@P+YXXVfnDJ)*yhJCts~& zoAmUFye^g%;Q_h1Ub~jK2N%p{gv*@->vNF*!w1SX^myTS4hC|j6r?mr5`BfCV6{NG zc;B2P4W8$W`E_|PGG%#~ty@&Pd$*T_*?_@7KUs!ZsE=(nkA`JTb2c{`M@grM2bUC= z)-dkz!Q!Umy15kgm2u3Y4yWVQ36{N7-R08`o#0UnZqoLZQkU4Q*cz_{iS6!ctfJ>w zq9ziGM=}R0a>`$cJ&Ine67$%Tk=s8}iPO``ZM3&pMr#iYjdHip)wR1*1`D467mg&J zVgR?+LXPFu-!BSTcf4@=_~=&pL8wH)*1}sP2Wc<2o#DY`Z*nc1?t2U|JLFxiX4SLX zN$==3J~>)u#eNc~37DsM5HWVwQ(`u;i=%(9l)5T6IDoY}j<>36Bm`Y{d$8JP(*u8n z(IR~{Y}86se}Tgkd>PT^QJ6SrdhUfz!0Nhs#M;y5f>D09KuMJKWrjw2G7K#tIf7Oa zxRF&|7>~CtW6TPLPH&^0GCY7}wc_PbOGuQB;tePuLK!zY6?E$&$H2)}O+)RE#e>}h z#}T;e40aIEF*Y&Q5LofWhjrO7jOsrj>AH5F49dCr6-;I4y-YVIlXrji>LEYK;YtQQ z5NM}4+iQzAi94T;T%~qzy9{IoJHuCBPPq3f;qc<=q$%1N27;+RR_WA!VH0WQeOK*F zh~4comG|$kZNg1;C_kvgn^ikICB^ZVmJaca6OIJpzTX=Z!$qgD{bp{Lmp>}wgDYd- zQUsg!VlW-QpY*7!J5bI{tI&)%A;9U@nzP!M==kHtahx^8@pED z6IuaN?&kLG!-z%Y=VJzgu%w{GT6By&8?xN`7G^f)4w`DP^Q8g_Rdx=GK=zoTA#C+Y zbSbqX{pru2hf9nd&+oS_V|k`SCU4$jv37WU>tiUA+78LZ7q(Z(tdYYo(~UTlzP*of`Qmviy4hc*6U7iQtqx#5=$VP` znt9#{e-_=!XLsqbHXUnXvl-DDCdgx1i68*E5-ja^SJ!3ed={}SO<_~ChQ)^^b(eCt z$t)9E?Uh?Fnc{!yw0t0pg{cBMFn&97*SbzMfgHUNg-3{AC^{a=`WeHc+BioH6+PSZ z#M52jb8Uv&+Whni0lQMGv9N;3S4j`?!P-tu6FQpP3v_rs_-~gi_@8#GoL4;Ut@KHn zCPOatLlsH$ z1reJXBGwiwp6cp@!dIR?M5FAzJ2I{hx}|gZYMV(Ah*B2NOPV=c>oPgq9aS?|i6-!r z45e0Lm-JnXwOI12=)Pn_s`&jadLtS(lEjeo~|}(pQ}?aq`S#QU{teHyguCMJha7F37@ME5YM5)aRLlXL^Mkoirz! zytVoDu2V(lVFcd$fQ0rCN;6!j|NAbofDy7@m`^wJ)rHWfkKIK2?D?C9j)}}UPLE_c zrju}zx0sCat2>%vG84_%b6HTn<#Zt3cjNnvR*PosfMpgPtShc7Ksi8?*7IccwcFL0 z#0rs?jx^Nu1<&2eX}uE(y+Q`+eT=3jdi#svt7Y$2XbzUwKXHoE(lex;a4UL?I>xl= z8p)8}KY8w;2=p;GCBRSiNKZ-z&IFdGJaMhfuILEEuk5JBXCYx*?IS;Wen7zd_44*P z3!(^;onxv^g>1dNV1)uQWfq2NW7&1TlOp~}#@a}L!mg!c5!9UdbyF&Wv4Z}Wl5ECk z>GyaG5QA=JLxw&h^$_uo^eu7k9w%T?Z6Eviqfe)Qm-SBF$@&?bmV3qaD7DU^<@lvrgZJeWiJlhe6v%AuC5o+%fI6r9b>W@H)5+au zvvZAm#pA4E+jD!sSi7BT{nhP|d454f`~eI%;pk&M>d+#g2KmkaZF?i7=Q-YO13BJZ zb&vG-!V}t`bXoT&ZY}w|gD#uywa!(kcC>Q&%s8P$zFF@V8Mbk`b41&=F}9kZC40Y? zkGDCz&N^Vfas>BOZEz(+`)Gbd61dDB+1SR{YEiAnkX@vTJZY{ZV`~LS3@yw_X1$k` zq-kWbAX`;{OQUU4ZN1HJN>WY639EiQpQH6D_o5AfbW40jj3-y#jj4%GD7UQSi%}E~ z70;Zq25W!7BO?LBRTPpZhCXIwl!3h(jDe-i8O`n)8pe4C?DPk92M=^BQpb{3Sq{&+ z5Bvgss-sz@JRHl6&{w+AhBD_v9})I;FC3H-%ZkNz<(AleEWn{_6$|R*--p~oD{B`t zQ)A!$q-)_dyap26W4%4JX@O4LJ}j5BK5T?Mu$_8k%e0)G!7Qdsx}yIM8XWG+)6UV4 z5zIY;UQ5@p(pd^yeL6c|y1H_Uqn%?mK&>i@Xuk9*Pb`nVVC84?_bZufWYgxdl1xsG ziGy-aE@R&v1ShWHKd`4_4D^n4_9+ajZjLS)T5qYswi)t?|eL6Dt4Y%Gnab{d&nWE`nj(9t7>-EsG3v1`~u^fWCo7n2UkM^lC zm-LFS>BX#@Q#$N)R9i3XNzKv4!(+zfz(h9ttC2ayr%Nf;&$2grFUHfSi|Z61mm)_; zn;aw8ZrI4D8_rrt#~gUqcDxI!>|UQxSviPt;(59^qp_7LVAZQ8CM+)*EU1~Uyv zXyid@;Jpsa4GM0##(ideJ>X~W#;|t$^|;5K7CMuJZkoR=t9`ZFKh-4&U0V*1&v=TL{woon)RF)C*a->kV{on3Ta9NLnH#UOCbz!7Z#HZ~f~ zX9?pVdlaMQ9fR~;vQ#0QPuG0@yJR9bL7-P6gESq1Zdf16RV>h2H||DuJb#`aS?X81 znVLtAx-;035p5IJ{0{sw_+=d*yIH?VrmHD~!`rNa4V5>F?j%%OS)`nBcUaI>_=mESI}T#~AnPw&FpW)xpa-rI7i zpB3jjRk~gXtBK>W+6t~yfFk!K1BOw}?uigSAPahh>V<QWIW4bpWE}ePWzO&V@^0V;5e2|FH@2{2Yjn>^+|U>k(~PXe356!Y01MXVFN#pr zDGwKG4i{e+ndG1ve@uNiPn{!AHD=f!;UByyVR7wzbYVvFF^;^U%n*!=R%d zDnK*9*`6idu(N_OK_%2q=Vj_-eZd#v7UJ`X|p%kRiFF{VSd<5to|LI__1>br;cPVE!X*b^q(XM zoHP0EYc?#FS?_DI9F%%;y~W2*?l;B_>*C|JU37S6y07K4`GM(3Yojhf45K>TU*D*w-qTjW9me4 zAJwR8onABW4G|;Sk>W)yec>!~#EGr_ImF#I&#_12QT}Kzw?uRf*q0_#h`l>EguKSx z?K4;)-Gt@T1DET|s{irhXk*6Gy(Voyy0J&EFK$Jq04eX{W#w+H^};T=G-7t=$;qSC zx%<6W$-U%Wz2{}d*46EGz?ZZVyKhf&ujuLp(5X9Pz&q|1LhXLJ(ztP?dyxy29WP$S zy!q^(ZB;*aaxjGG!_=l;57b$?Oz=%QX#lKr)N#b~w*}o+Ukra~;85;dQAu@?Sjm*ZX(0D;EZeetROEx>p+YUYCSBK zT?qe*UTC>OGL5PDtnKZX2Q{XdS#*|=i`^w3$hJ#vcyiapRzb#;G*8P4nE%I+T;GfM?pi-W|p_*P;KS0Lr>u*1}4a@ zq(OB)$qa!Ev0$GwkAneO-s9@LcXuX^;?3jAJ37>b&e3VEx{Ojo9p>Lx^$T=h7H!>g zYvr0&oSV9E%bZa0{xm+d9oD$0He2nqa8eH3eu%+$X1<+jUiUmVR#K+2WA#0`|3mn9 zO%0?Ez}&40rDD273V(-fU%*3t4<4i#AaLHo&bL%hl9 z=|-Ljy=F4>;scI3AKq0UOdwZ`w&A@0#EZIHfxsG7nqiPdT&6pdNE8ymVG2dGV+saG zp!jzx5t}f*aS=2j#2njO=->w)-%)M}6^cH=rB?imO_6@^AlqGmFvtCEIPb>wQkiGJ zuYVFLvAff#)NyQGVk3-Ah^fKroSWmkItWs;(f4j{1Fu!BK&NB(Xj+X z*QF6 z^=rm)qw-Aub8@36AIci0H1)A9u=g6K2>K|tbnD+hdPCWDUo;25?FzBHcW|2Nq;s4x zsUvRsZHB`$GVJO3OUa1#KKNIxFc26SteC7!MB$&ho!vgpT&hbA4haKJsk{(X6Y=*g z0(VsGD+}xzb|>G%dd8TUrYdCH+`qDO(AkPEA2HiTRlc7K30#~{sKU33+&^bC*&MYz zzr1X^%Dh5cc>HWT3>BpnHJh6TO}Dh+P)FkkOvI{XC*VLbwq{Dfw`;X+$I}QNc1g8aeLaat{ErDM}%tgi!F#HO8Vope}6J4C$V2`>7QwHYKUigA5sZ zD~Dn`Q!5D8@D!u=A|V# znt9rw2mkT9P=HKoO`z&@`|hDX@&yCI6$fztOWNOnO8yT1b%X74w6J%!a|8RM{uB)nFSoM<3TS7M;gK&!J%08 z_SP_)8}GpZc|f=Z6+3ZB5m`g~pU60%h&tTZ8x#KUH~^0Ex|#CtkU6;A4Z{s`4gyq% zL&R>tU>q!*PzVUB3x^lN5kC+%I7*QX4&(vD5m`{UGj#!QOb!PGz{(0`;bi4tX9sYw zY6I9nH-JIN@51owo1b5lal)ZooHuO0qIrIS#N)Hc@Cs67_*^D8psWYNG@1w zJ*yZ+d{23tAF$9>9Fvi7y=PApu2d`0+{VnoQh1 z-4Msj=W7%SV%@wL2b-_H?T}WMmL(?BEY)vY=GkFSA~a@G<6Av>Jg+VU(WaGJ+*<54 zVIQyXy(I~ZD*Bt``yCDV17D^4I}Q?Z6PX{&^q+8>aNyrhp8r?CO~`l5*#91I6Y?E~ z`A1%TM{)g{SF*6*@Lj)S`dHaHIDf8b4Oy8#mJ6?dyS<^Y70el61kWH_KFXciMoNH% z2_L08n=DAyUKnO>A>ru=Q}&coG4`}F<}#ra;78|m=XSTTw}FoWaJR9xb>epCqcprp zJZ|{)4H-zuj}8FvI+~brD~X8xV1nQAQJOnD+j9efZf8NC)5`?}L4Xhl zGn|9j$-~y!(4E=ViR!zKA6kA-Q8<+Ohc*-ApUUiA9Id~rGBF0ie#?3%AiQJ(DFHVY z7#i8RIP+2dPzP6NV$2PP##+N}Z2ax<-BLko=RYVv#Bx<&Sy7LFE4s)L#U?8~UGm zf0O!Uk^ZLq`y79v{+=y1_CMFItrN@l%waLMvjMupKc3(poA7n}Bdu)gEleE#f!F5u zk1|sWIFi@F1&%HMP4;isJUm~ye=iF+n1247RshT(Ff)il1Tk3vvK=`Tg$2~L5$j;8{w;0*jGyk;u z_dxw%Q_-SNxlc|8Lxd{>PRPyfqRZrQ6Mh&|lgmL4Mh}{JB5&`+vg! z|J&LAzKi$k4&48JJKNtQ@^5#xA@Ggvo2}?y_M*RUI{ujA|F9PgVf$lq(LGf+y2r8iqW)CHpjsS(1;j$ZR zau_LiKpOa@vHdgxs^lm2ftiyOHWU`h0i{~=d&NyxBR?&fDA3V?f3H8g*mrp1i+yN2 zB}uXZTf7QJUztjN^o+oh+@McBME%=miZiN53Ir!tY1J2vUi7sP>9qF&_o>Bq15Wtb zaG>!6*n%l`e1Bc{-`fKIx)6Re5Cr~q4s)}!{I@L)|Hh+Y@P2_esRQpW_Mv2@-Y+AtQMO4EeB*n}TXX4AfZAlkhgalzR>sNkw6L`OUOO(B6y9 zu@`}ISjG_Zo3j8kaL?c0P>k9vLWb64l7TKnPSiI%+SfmX5Qyt2< zF6&gPM$zh&Cl>5><`+B~e-p3Y_wls>(C=N=ex3q+zx`jYcks>m*3YvSS%vB62;jR< z2Go~4n&ens$zf2g6}?o?{&JA{2C(*}>Utmt`kcKSzT-`oU5`km8NND?KALmMS8<+38`!TPt4XvX##SW0ii zdesx}TR>a9Z_^S4#udVXsI)r7Gn1JIm@$$@_G1|*yjoB$PqbTjWwcGtOPDVZACHKW z{?%>njxbYncoR%0I_UQY8-SgS4ax>E1^lGlG^M-M1Az&!$FTQtnG=!gDJKprel@!tDWfKvGLd00XLZV`v ztgJ#@Y(h{$P7w|+Q7(2URG1aQ|G(zY1>PJze=oZ5hO_VY4xm From 5cb25e7c14aa68bf87ea54996a4f92955c627fbe Mon Sep 17 00:00:00 2001 From: Sinthoras39 Date: Mon, 8 Jan 2024 14:31:53 +0100 Subject: [PATCH 3/4] fix failed acceptance test + lint should be happy now too --- lute/book/routes.py | 4 ++-- lute/book/service.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lute/book/routes.py b/lute/book/routes.py index 5586a326..f39b10fd 100644 --- a/lute/book/routes.py +++ b/lute/book/routes.py @@ -60,9 +60,9 @@ def _get_file_content(filefielddata): ext = (ext or "").lower() if ext == ".txt": return service.get_textfile_content(filefielddata) - elif ext == ".epub": + if ext == ".epub": return service.get_epub_content(filefielddata) - elif ext == ".pdf": + if ext == ".pdf": return service.get_pdf_content_from_form(filefielddata) raise ValueError(f'Unknown file extension "{ext}"') diff --git a/lute/book/service.py b/lute/book/service.py index e9e0dff0..aced01c7 100644 --- a/lute/book/service.py +++ b/lute/book/service.py @@ -11,9 +11,9 @@ from bs4 import BeautifulSoup from flask import current_app, flash from openepub import Epub, EpubError +from pypdf import PdfReader from werkzeug.utils import secure_filename from lute.book.model import Book -from pypdf import PdfReader class BookImportException(Exception): From a3e50a79277c7aedfeedf24b918fa7a823a2e18d Mon Sep 17 00:00:00 2001 From: Sinthoras39 Date: Mon, 8 Jan 2024 16:53:33 +0100 Subject: [PATCH 4/4] update pyproject.toml --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7e531ca9..578252c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ dependencies = [ "PyYAML>=6.0.1,<7", "toml>=0.10.2,<1", "waitress>=2.1.2,<3", - "openepub>=0.0.6,<1" + "openepub>=0.0.6,<1", + "pypdf>=3.17.4" ] [project.scripts]