Skip to content

Try to optimize a couple of things #1740

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
4d79b18
Start with ADX assembly for squaring n = 8
albinahlback Jan 21, 2024
b5597da
Change order in fmpz_set_signed_uiui
albinahlback Jan 22, 2024
cbc8998
Return value from mpn_mul in _flint_mpn_mul
albinahlback Jan 22, 2024
766502c
Use flint_mpn_mul in fmpz_mul(_ui) and implement fmpz_sqr
albinahlback Jan 22, 2024
a9e3c34
Document fmpz_sqr
albinahlback Jan 22, 2024
4fdeff7
replace fmpz_mul(X, Y, Y) with fmpz_sqr(X, Y)
albinahlback Jan 22, 2024
fce660c
Replace fmpz_mul_ui(X, Y, 2) with fmpz_mul_2exp(X, Y, 1)
albinahlback Jan 22, 2024
079d27f
Replace fmpz_mul_ui(X, Y, 4) with fmpz_mul_2exp(X, Y, 2)
albinahlback Jan 22, 2024
f4b8592
Replace fmpz_mul_ui(X, Y, 8) with fmpz_mul_2exp(X, Y, 3)
albinahlback Jan 22, 2024
df56b62
Replace fmpz_mul_ui(X, Y, 16) with fmpz_mul_2exp(X, Y, 4)
albinahlback Jan 22, 2024
d75d5b8
Replace fmpz_mul_ui(X, Y, 64) with fmpz_mul_2exp(X, Y, 6)
albinahlback Jan 22, 2024
e79ff07
Replace more fmpz_mul_ui
albinahlback Jan 22, 2024
636def6
Add fmpz_inplace_[neg/abs]
albinahlback Jan 22, 2024
8eeab86
Replace fmpz_neg with fmpz_inplace_neg where applicable
albinahlback Jan 22, 2024
fc6fdf2
Replace fmpz_abs with fmpz_inplace_abs where applicable
albinahlback Jan 22, 2024
eff6358
Change a couple of fmpz_mul_si to faster versions
albinahlback Jan 22, 2024
a916448
Change fmpz_mul_si to inlined fmpz_mul_ui and fmpz_inplace_neg
albinahlback Jan 22, 2024
e557ad8
Inline fmpz_sgn
albinahlback Jan 22, 2024
a44d847
Use fmpz_inplace_neg if possible in fmpz_vec_neg
albinahlback Jan 22, 2024
2a45b44
Inline _fmpz_promote
albinahlback Jan 22, 2024
5e05e28
Faster version of fmpz_mul_2exp
albinahlback Jan 22, 2024
9e08768
Allocate a minimum of 3 limbs in _fmpz_new_mpz in single mode
albinahlback Jan 23, 2024
28b0a2b
Add fmpq_inplace_[neg/abs]
albinahlback Jan 23, 2024
25eb427
Use fmpq_inplace_[neg/abs] where applicable
albinahlback Jan 23, 2024
d3b6254
Add arf_inplace_[neg/abs]
albinahlback Jan 23, 2024
bef4ec3
Add arb_inplace_[neg/abs]
albinahlback Jan 23, 2024
af1b850
Add acb_inplace_[neg/conj]
albinahlback Jan 23, 2024
a3b6954
Use arf_inplace_[neg/abs] where applicable
albinahlback Jan 23, 2024
3210d80
Use arb_inplace_[neg/abs] where applicable
albinahlback Jan 23, 2024
c30cfa2
Use acb_inplace_[neg/conj] where applicable
albinahlback Jan 23, 2024
f00dd78
Use inplace_[neg/abs/conj] if applicable in [arb/acb/fmpq]_mat
albinahlback Jan 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
457 changes: 22 additions & 435 deletions dev/gen_mul_basecase.jl

Large diffs are not rendered by default.

13 changes: 10 additions & 3 deletions doc/source/fmpz.rst
Original file line number Diff line number Diff line change
@@ -715,12 +715,15 @@ Basic arithmetic


.. function:: void fmpz_neg(fmpz_t f1, const fmpz_t f2)
void fmpz_abs(fmpz_t f1, const fmpz_t f2)

Sets `f_1` to `-f_2`.
Sets `f_1` to `-f_2` and the absolute value of `f_2`, respectively.

.. function:: void fmpz_abs(fmpz_t f1, const fmpz_t f2)
.. function:: void fmpz_inplace_neg(fmpz_t f1)
void fmpz_inplace_abs(fmpz_t f1)

Sets `f_1` to the absolute value of `f_2`.
Works like :func:`fmpz_neg` and :func:`fmpz_abs`, but acts inplace and
therefore faster as less checks has to be made.

.. function:: void fmpz_add(fmpz_t f, const fmpz_t g, const fmpz_t h)
void fmpz_add_ui(fmpz_t f, const fmpz_t g, ulong h)
@@ -734,6 +737,10 @@ Basic arithmetic

Sets `f` to `g - h`.

.. function:: void fmpz_sqr(fmpz_t f, const fmpz_t g)

Sets `f` to `g^2`.

.. function:: void fmpz_mul(fmpz_t f, const fmpz_t g, const fmpz_t h)
void fmpz_mul_ui(fmpz_t f, const fmpz_t g, ulong h)
void fmpz_mul_si(fmpz_t f, const fmpz_t g, slong h)
2 changes: 1 addition & 1 deletion src/NTL-interface.h
Original file line number Diff line number Diff line change
@@ -56,7 +56,7 @@ inline void fmpz_set_ZZ(fmpz_t rop, const ZZ& op)
}

if (op < WORD(0))
fmpz_neg(rop, rop);
fmpz_inplace_neg(rop);
}
}

17 changes: 15 additions & 2 deletions src/acb.h
Original file line number Diff line number Diff line change
@@ -438,13 +438,26 @@ acb_neg(acb_t z, const acb_t x)
arb_neg(acb_imagref(z), acb_imagref(x));
}

ACB_INLINE void
acb_inplace_neg(acb_t z)
{
arb_inplace_neg(acb_realref(z));
arb_inplace_neg(acb_imagref(z));
}

ACB_INLINE void
acb_conj(acb_t z, const acb_t x)
{
arb_set(acb_realref(z), acb_realref(x));
arb_neg(acb_imagref(z), acb_imagref(x));
}

ACB_INLINE void
acb_inplace_conj(acb_t z)
{
arb_inplace_neg(acb_imagref(z));
}

ACB_INLINE void
acb_abs(arb_t u, const acb_t z, slong prec)
{
@@ -485,7 +498,7 @@ acb_mul_onei(acb_t z, const acb_t x)
if (z == x)
{
arb_swap(acb_realref(z), acb_imagref(z));
arb_neg(acb_realref(z), acb_realref(z));
arb_inplace_neg(acb_realref(z));
}
else
{
@@ -500,7 +513,7 @@ acb_div_onei(acb_t z, const acb_t x)
if (z == x)
{
arb_swap(acb_realref(z), acb_imagref(z));
arb_neg(acb_imagref(z), acb_imagref(z));
arb_inplace_neg(acb_imagref(z));
}
else
{
2 changes: 1 addition & 1 deletion src/acb/acos.c
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@ acb_acos(acb_t res, const acb_t z, slong prec)
{
/* pure imaginary on (1,inf) */
acb_asin(res, z, prec);
acb_neg(res, res);
acb_inplace_neg(res);
arb_zero(acb_realref(res));
}
else
6 changes: 3 additions & 3 deletions src/acb/agm.c
Original file line number Diff line number Diff line change
@@ -121,7 +121,7 @@ acb_agm(acb_t res, const acb_t a, const acb_t b, slong prec)
}
else
{
acb_neg(u, u);
acb_inplace_neg(u);
acb_sqrt(u, u, prec);
acb_mul_onei(u, u);
}
@@ -134,13 +134,13 @@ acb_agm(acb_t res, const acb_t a, const acb_t b, slong prec)
}
else if (arb_is_negative(acb_realref(v)))
{
acb_neg(u, u);
acb_inplace_neg(u);
agm_helper(res, t, u, prec);
}
else
{
agm_helper(v, t, u, prec);
acb_neg(u, u);
acb_inplace_neg(u);
agm_helper(res, t, u, prec);
acb_union(res, res, v, prec);
}
16 changes: 8 additions & 8 deletions src/acb/agm1.c
Original file line number Diff line number Diff line change
@@ -51,18 +51,18 @@ sqrtmul(acb_t c, const acb_t a, const acb_t b, slong prec)
arb_is_nonnegative(acb_imagref(b)))
{
acb_mul(c, a, b, prec);
acb_neg(c, c);
acb_inplace_neg(c);
acb_sqrt(c, c, prec);
acb_mul_onei(c, c);
}
else if (arb_is_nonpositive(acb_imagref(a)) &&
arb_is_nonpositive(acb_imagref(b)))
{
acb_mul(c, a, b, prec);
acb_neg(c, c);
acb_inplace_neg(c);
acb_sqrt(c, c, prec);
acb_mul_onei(c, c);
acb_neg(c, c);
acb_inplace_neg(c);
}
else
{
@@ -303,7 +303,7 @@ acb_agm1_deriv_diff(acb_t Mz, acb_t Mzp, const acb_t z, slong prec)
acb_sub(Mzp, u, v, prec);
acb_mul_2exp_si(Mz, Mz, -1);
acb_mul_2exp_si(Mzp, Mzp, -1);
fmpz_neg(hexp, hexp);
fmpz_inplace_neg(hexp);
acb_mul_2exp_fmpz(Mzp, Mzp, hexp);

/* add error */
@@ -314,7 +314,7 @@ acb_agm1_deriv_diff(acb_t Mz, acb_t Mzp, const acb_t z, slong prec)
else
acb_add_error_mag(Mz, err);

fmpz_neg(rexp, rexp);
fmpz_inplace_neg(rexp);
mag_mul_2exp_fmpz(err, err, rexp);

if (isreal)
@@ -585,7 +585,7 @@ acb_agm1_cpx(acb_ptr m, const acb_t z, slong len, slong prec)
acb_inv(w, w, prec);
acb_mul(t, w, w, prec);
acb_mul(w + 1, w + 1, t, prec);
acb_neg(w + 1, w + 1);
acb_inplace_neg(w + 1);

if (acb_is_one(z))
{
@@ -596,7 +596,7 @@ acb_agm1_cpx(acb_ptr m, const acb_t z, slong len, slong prec)
acb_mul_ui(w + k, w + n + 0, (n+1)*(n+1), prec);
acb_addmul_ui(w + k, w + n + 1, 7+3*n*(3+n), prec);
acb_div_ui(w + k, w + k, 2*(n+2)*(n+2), prec);
acb_neg(w + k, w + k);
acb_inplace_neg(w + k);
}
}
else
@@ -609,7 +609,7 @@ acb_agm1_cpx(acb_ptr m, const acb_t z, slong len, slong prec)
acb_sub_ui(t, t, 1, prec);
acb_sub(u, u, z, prec);
acb_inv(u, u, prec);
acb_neg(u, u);
acb_inplace_neg(u);

/* use differential equation for second derivative */
acb_mul(w + 2, z, w + 0, prec);
4 changes: 2 additions & 2 deletions src/acb/approx_dot.c
Original file line number Diff line number Diff line change
@@ -251,8 +251,8 @@ acb_approx_dot_simple(acb_t res, const acb_t initial, int subtract,

if (subtract)
{
arf_neg(arb_midref(acb_realref(res)), arb_midref(acb_realref(res)));
arf_neg(arb_midref(acb_imagref(res)), arb_midref(acb_imagref(res)));
arf_inplace_neg(arb_midref(acb_realref(res)));
arf_inplace_neg(arb_midref(acb_imagref(res)));
}
}

2 changes: 1 addition & 1 deletion src/acb/asin.c
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ acb_asin(acb_t res, const acb_t z, slong prec)

acb_mul(t, z, z, prec);
acb_sub_ui(t, t, 1, prec);
acb_neg(t, t);
acb_inplace_neg(t);
acb_sqrt(t, t, prec);

if (acb_is_real(z) && acb_is_real(t))
2 changes: 1 addition & 1 deletion src/acb/chebyshev_u2_ui.c
Original file line number Diff line number Diff line change
@@ -43,7 +43,7 @@ acb_chebyshev_u2_ui(acb_t a, acb_t b, ulong n, const acb_t x, slong prec)
{
acb_submul(b, x, a, prec);
acb_mul(a, a, b, prec);
acb_neg(a, a);
acb_inplace_neg(a);
acb_mul_2exp_si(a, a, 1);
acb_mul(b, t, u, prec);
}
2 changes: 1 addition & 1 deletion src/acb/chebyshev_u_ui.c
Original file line number Diff line number Diff line change
@@ -46,7 +46,7 @@ acb_chebyshev_u_ui(acb_t y, ulong n, const acb_t x, slong prec)
acb_submul(b, a, x, prec);
acb_mul(y, a, b, prec);
acb_mul_2exp_si(y, y, 1);
acb_neg(y, y);
acb_inplace_neg(y);
}

acb_clear(a);
2 changes: 1 addition & 1 deletion src/acb/cos.c
Original file line number Diff line number Diff line change
@@ -40,7 +40,7 @@ acb_cos(acb_t r, const acb_t z, slong prec)

arb_mul(acb_realref(r), ca, cb, prec);
arb_mul(acb_imagref(r), sa, sb, prec);
arb_neg(acb_imagref(r), acb_imagref(r));
arb_inplace_neg(acb_imagref(r));

arb_clear(sa);
arb_clear(ca);
2 changes: 1 addition & 1 deletion src/acb/cos_pi.c
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@ acb_cos_pi(acb_t r, const acb_t z, slong prec)

arb_mul(acb_realref(r), ca, cb, prec);
arb_mul(acb_imagref(r), sa, sb, prec);
arb_neg(acb_imagref(r), acb_imagref(r));
arb_inplace_neg(acb_imagref(r));

arb_clear(sa);
arb_clear(ca);
2 changes: 1 addition & 1 deletion src/acb/cot.c
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@ acb_cot(acb_t r, const acb_t z, slong prec)
else if (arb_is_zero(acb_realref(z)))
{
arb_coth(acb_imagref(r), acb_imagref(z), prec);
arb_neg(acb_imagref(r), acb_imagref(r));
arb_inplace_neg(acb_imagref(r));
arb_zero(acb_realref(r));
}
else
4 changes: 2 additions & 2 deletions src/acb/cot_pi.c
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ acb_cot_pi(acb_t r, const acb_t z, slong prec)
arb_const_pi(t, prec + 4);
arb_mul(t, acb_imagref(z), t, prec + 4);
arb_coth(acb_imagref(r), t, prec);
arb_neg(acb_imagref(r), acb_imagref(r));
arb_inplace_neg(acb_imagref(r));
arb_zero(acb_realref(r));
arb_clear(t);
}
@@ -55,7 +55,7 @@ acb_cot_pi(acb_t r, const acb_t z, slong prec)
}
else
{
acb_neg(t, t);
acb_inplace_neg(t);
acb_exp_pi_i(t, t, prec + 4);
acb_sub_ui(r, t, 1, prec + 4);
acb_div(r, t, r, prec + 4);
4 changes: 2 additions & 2 deletions src/acb/csc_pi.c
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ acb_csc_pi(acb_t res, const acb_t z, slong prec)
arb_const_pi(acb_realref(res), prec);
arb_mul(acb_imagref(res), acb_imagref(z), acb_realref(res), prec);
arb_csch(acb_imagref(res), acb_imagref(res), prec);
arb_neg(acb_imagref(res), acb_imagref(res));
arb_inplace_neg(acb_imagref(res));
arb_zero(acb_realref(res));
}
else
@@ -46,7 +46,7 @@ acb_csc_pi(acb_t res, const acb_t z, slong prec)
acb_mul(res, t, t, prec + 4);
acb_sub_ui(res, res, 1, prec + 4);
acb_div(res, t, res, prec);
acb_neg(res, res);
acb_inplace_neg(res);
}
else
{
4 changes: 2 additions & 2 deletions src/acb/csch.c
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ acb_csch(acb_t res, const acb_t z, slong prec)
else if (arb_is_zero(acb_realref(z)))
{
arb_csc(acb_imagref(res), acb_imagref(z), prec);
arb_neg(acb_imagref(res), acb_imagref(res));
arb_inplace_neg(acb_imagref(res));
arb_zero(acb_realref(res));
}
else
@@ -43,7 +43,7 @@ acb_csch(acb_t res, const acb_t z, slong prec)
acb_mul(res, t, t, prec + 4);
acb_sub_ui(res, res, 1, prec + 4);
acb_div(res, t, res, prec);
acb_neg(res, res);
acb_inplace_neg(res);
}
else
{
4 changes: 2 additions & 2 deletions src/acb/cube.c
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ acb_cube(acb_t r, const acb_t z, slong prec)
else if (arb_is_zero(a))
{
arb_pow_ui(acb_imagref(r), b, 3, prec);
arb_neg(acb_imagref(r), acb_imagref(r));
arb_inplace_neg(acb_imagref(r));
arb_zero(acb_realref(r));
}
else
@@ -44,7 +44,7 @@ acb_cube(acb_t r, const acb_t z, slong prec)

/* u = -(b^2 - 3a^2) */
arb_submul_ui(u, v, 3, prec);
arb_neg(u, u);
arb_inplace_neg(u);

arb_mul(acb_realref(r), t, a, prec);
arb_mul(acb_imagref(r), u, b, prec);
2 changes: 1 addition & 1 deletion src/acb/digamma.c
Original file line number Diff line number Diff line change
@@ -42,7 +42,7 @@ acb_digamma(acb_t y, const acb_t x, slong prec)
if (reflect)
{
acb_sub_ui(t, x, 1, wp);
acb_neg(t, t);
acb_inplace_neg(t);
acb_cot_pi(v, x, wp);
arb_const_pi(acb_realref(u), wp);
acb_mul_arb(v, v, acb_realref(u), wp);
6 changes: 3 additions & 3 deletions src/acb/div.c
Original file line number Diff line number Diff line change
@@ -50,7 +50,7 @@ acb_div(acb_t z, const acb_t x, const acb_t y, slong prec)
if (arb_is_zero(b))
{
arb_div(acb_imagref(z), a, d, prec);
arb_neg(acb_imagref(z), acb_imagref(z));
arb_inplace_neg(acb_imagref(z));
arb_zero(acb_realref(z));
}
else if (arb_is_zero(a))
@@ -63,7 +63,7 @@ acb_div(acb_t z, const acb_t x, const acb_t y, slong prec)
arb_div(acb_realref(z), a, d, prec);
arb_div(acb_imagref(z), b, d, prec);
arb_swap(acb_realref(z), acb_imagref(z));
arb_neg(acb_imagref(z), acb_imagref(z));
arb_inplace_neg(acb_imagref(z));
}
else
{
@@ -73,7 +73,7 @@ acb_div(acb_t z, const acb_t x, const acb_t y, slong prec)
arb_div(acb_realref(z), a, t, prec);
arb_div(acb_imagref(z), b, t, prec);
arb_swap(acb_realref(z), acb_imagref(z));
arb_neg(acb_imagref(z), acb_imagref(z));
arb_inplace_neg(acb_imagref(z));
arb_clear(t);
}
}
2 changes: 1 addition & 1 deletion src/acb/dot.c
Original file line number Diff line number Diff line change
@@ -319,7 +319,7 @@ acb_dot(acb_t res, const acb_t initial, int subtract, acb_srcptr x, slong xstep,
{
acb_mul(res, x, y, prec);
if (subtract)
acb_neg(res, res);
acb_inplace_neg(res);
}
return;
}
2 changes: 1 addition & 1 deletion src/acb/dot_fmpz.c
Original file line number Diff line number Diff line change
@@ -33,7 +33,7 @@ acb_dot_fmpz(acb_t res, const acb_t initial, int subtract, acb_srcptr x, slong x
{
acb_mul_fmpz(res, x, y, prec);
if (subtract)
acb_neg(res, res);
acb_inplace_neg(res);
}
return;
}
2 changes: 1 addition & 1 deletion src/acb/dot_si.c
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ acb_dot_si(acb_t res, const acb_t initial, int subtract, acb_srcptr x, slong xst
{
acb_mul_si(res, x, y[0], prec);
if (subtract)
acb_neg(res, res);
acb_inplace_neg(res);
}
return;
}
2 changes: 1 addition & 1 deletion src/acb/dot_simple.c
Original file line number Diff line number Diff line change
@@ -43,5 +43,5 @@ acb_dot_simple(acb_t res, const acb_t initial, int subtract,
acb_addmul(res, x + i * xstep, y + i * ystep, prec);

if (subtract)
acb_neg(res, res);
acb_inplace_neg(res);
}
Loading