Skip to content

Commit

Permalink
[Trifocal+P2Pt] all aligned 64 improves vectorization
Browse files Browse the repository at this point in the history
  • Loading branch information
rfabbri committed Nov 3, 2023
1 parent dc5f1d2 commit ce7e756
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 24 deletions.
4 changes: 2 additions & 2 deletions minus/chicago14a-default-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ struct minus_data<chicago14a,F> {
typedef minus_io_14a<chicago14a> io14;
typedef std::complex<F> complex;
static const complex start_sols_[M::nve*M::nsols];
static complex params_start_target_[2*M::f::nparams];
static complex default_params_start_target_gammified_[2*M::f::nparams];
alignas (64) static complex params_start_target_[2*M::f::nparams];
alignas (64) static complex default_params_start_target_gammified_[2*M::f::nparams];
static const complex *params_;
static F p_[io::pp::nviews][io::pp::npoints][io::ncoords2d];
static const F p_correct_[io::pp::nviews][io::pp::npoints][io::ncoords2d];
Expand Down
6 changes: 3 additions & 3 deletions minus/chicago14a-default-data.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace MiNuS {
// Internal note: m2: script t, variable sols
// You can reinterpret in C as start_sols_[M::nsols][M::nve]
template <typename F>
alignas(sizeof(std::complex<F>)) const std::complex<F> minus_data<chicago14a,F>::
alignas(64) const std::complex<F> minus_data<chicago14a,F>::
start_sols_[M::nve*M::nsols] = {
{-.59336028545681196,-.11013183013512155},
{.11944671140724233,-.13633687755694085},
Expand Down Expand Up @@ -4729,7 +4729,7 @@ start_sols_[M::nve*M::nsols] = {
// to store target system params
// the latter M::nparams are trash
template <typename F>
alignas(sizeof(std::complex<F>)) std::complex<F> minus_data<chicago14a,F>::
alignas(64) std::complex<F> minus_data<chicago14a,F>::
params_start_target_[2*M::f::nparams] = {
{.13016671344237549,-.36891394723672405},
{.2649393534275909,-.23418132862391827},
Expand Down Expand Up @@ -4800,7 +4800,7 @@ params_start_target_[2*M::f::nparams] = {
//
// The point-tangent inputs giving rise to this are given below
template <typename F>
alignas(sizeof(std::complex<F>)) std::complex<F> minus_data<chicago14a,F>::
alignas(64) std::complex<F> minus_data<chicago14a,F>::
default_params_start_target_gammified_[2*M::f::nparams] = { // start-target param pairs, P01 in chicago.m2
{.391195550619826,-.00262962533857666},
{.310140709227333,+.169842562835882},
Expand Down
2 changes: 1 addition & 1 deletion minus/chicago14a.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -3596,7 +3596,7 @@ minus<chicago14a, F>::solve(
)
{
typedef minus_data<chicago14a,F> data;
alignas(sizeof(C<F>)) C<F> params[2*M::f::nparams];
alignas(64) C<F> params[2*M::f::nparams];
memcpy(params, data::params_start_target_, M::f::nparams*sizeof(C<F>));

constexpr int id_tgt0 = 0; constexpr int id_tgt1 = 1; // TODO: select the best / least degenerate directions
Expand Down
19 changes: 1 addition & 18 deletions minus/minus.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,7 @@ track(const track_settings &s, const C<F> s_sols[f::nve*f::nsols], const C<F> pa
asm("#------ ENDMUL!");

asm("#------ Add to self!"); // not aligned
// v::add_to_self(xt, dx4);
xt[0] += dx4[0];
xt[1] += dx4[1];
xt[2] += dx4[2];
xt[3] += dx4[3];
xt[4] += dx4[4];
xt[5] += dx4[5];
xt[6] += dx4[6];
xt[7] += dx4[7];
xt[8] += dx4[8];
xt[9] += dx4[9];
xt[10] += dx4[10];
xt[11] += dx4[11];
xt[12] += dx4[12];
xt[13] += dx4[13];
// xt[14] += dx4[14];
// xt[15] += dx4[15];
// xt[16] += dx4[16];
v::add_to_self(xt, dx4);
asm("#------ end add to self!");
v::multiply_scalar_to_self(dx4, 2.);
xt[f::nve] += one_half_dt; // t0+.5dt
Expand Down

0 comments on commit ce7e756

Please sign in to comment.