Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gauss Jordan Matrix Inversion #80

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
03a8625
[software] Add Moore Penrose inversion kernel
mbertuletti Jul 13, 2022
c56a055
[software] add singular value decomposition kernel
mbertuletti Jul 13, 2022
77becf1
[software] Clean Gauss Jordan inverse function
mbertuletti Jul 14, 2022
1577906
[software] Allow use of input N by M rectangular matrices
mbertuletti Jul 18, 2022
21ded46
[software] Parallelize Gauss-Jordan matrix inversion
mbertuletti Jul 19, 2022
faeca50
[software] Unroll single core
mbertuletti Jul 19, 2022
9728023
[software] Unroll parallel core
mbertuletti Jul 19, 2022
ee0119c
[software] Clean comments on single-core
mbertuletti Jul 21, 2022
b412c87
[software] Change kernel name
mbertuletti Jul 21, 2022
9138024
[software] Add different parallelization schemes
mbertuletti Jul 27, 2022
3aad7fd
[software] Shape memory accesses to mempool
mbertuletti Jul 27, 2022
a045b42
[software] Add folded kernel
mbertuletti Aug 2, 2022
4dca2cf
[software] Let single core handle exchange in parallel implementation
mbertuletti Aug 3, 2022
0ca5b68
[software] Add code for unrolling in single-core
mbertuletti Aug 3, 2022
b42e968
[software] Add parallelization schemes in memory shaped version
mbertuletti Aug 3, 2022
82f8f51
[software] Merge the two final steps of matrix inversion
mbertuletti Aug 4, 2022
8acd260
[software] Correct lint errors
mbertuletti Sep 27, 2022
ae56dc4
[software] Erase SVD folder
mbertuletti Oct 31, 2022
0fbf978
[software] Fix reading of the number of cores
mbertuletti Dec 14, 2022
4c42194
[software] Move the kernels to runtime/kernels folder
mbertuletti Apr 13, 2023
cc31b71
[software] Clean up
mbertuletti May 26, 2023
c04dea3
Update changelog
mbertuletti May 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[software] Merge the two final steps of matrix inversion
  • Loading branch information
mbertuletti committed Sep 8, 2023
commit 82f8f518cc6930b85959e54b716a74c9c4d148e2
1 change: 1 addition & 0 deletions software/apps/mat_inv/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ void multi_core_folded()
init_matrix_zeros(inv, ((N * M) / N_USED_BANKS), N_BANKS, core_id);
if (core_id == 0) {
flag = 0U;
__atomic_store_n(&pivot_barrier, 0U, __ATOMIC_RELAXED);
}
mempool_barrier(num_cores);

Expand Down
203 changes: 144 additions & 59 deletions software/apps/mat_inv/mempool_mat_inv_q32p_memsized.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

/* GAUSS JORDAN INVERSION */

uint32_t volatile pivot_barrier __attribute__((section(".l1")));

int mempool_GJinv_q32p_memsized(int32_t * pSrc, int32_t * pDst, uint32_t n, uint32_t *flag);

int mempool_GJinv_q32p_memsized(int32_t * pSrc, int32_t * pDst, uint32_t n, uint32_t *flag) {
Expand Down Expand Up @@ -127,63 +129,8 @@ int mempool_GJinv_q32p_memsized(int32_t * pSrc, int32_t * pDst, uint32_t n, uint
if ((*flag == 0U) && (in == 0U)) {
return 1;
}
// /* DIVIDE BY THE PIVOT */
// /* Points to the pivot row of input and destination matrices */
// pPivotRowIn = pSrc + (l * n);
// pPivotRowDst = pDst + (l * n);
// /* Temporary pointers to the pivot row pointers */
// pSrcT1 = pPivotRowIn;
// pSrcT2 = pPivotRowDst;
// /* Pivot element of the row */
// in = *pPivotRowIn;
// /* Loop over number of columns to the right of the pilot element */
// j = 0;
// while (j < 4 * ((n - l) >> 2U)) {
// in1 = *pSrcT1;
// in2 = *(pSrcT1 + 1);
// in3 = *(pSrcT1 + 2);
// in4 = *(pSrcT1 + 3);
// out1 = FIX_DIV(in1, in);
// out2 = FIX_DIV(in2, in);
// out3 = FIX_DIV(in3, in);
// out4 = FIX_DIV(in4, in);
// *pSrcT1++ = out1;
// *pSrcT1++ = out2;
// *pSrcT1++ = out3;
// *pSrcT1++ = out4;
// j += 4;
// }
// while (j < n - l) {
// in1 = *pSrcT1;
// *pSrcT1++ = FIX_DIV(in1, in);
// j++;
// }
// /* Loop over number of columns of the destination matrix */
// j = 0;
// while (j < 4 * (n >> 2U)) {
// in1 = *pSrcT2;
// in2 = *(pSrcT2 + 1);
// in3 = *(pSrcT2 + 2);
// in4 = *(pSrcT2 + 3);
// out1 = FIX_DIV(in1, in);
// out2 = FIX_DIV(in2, in);
// out3 = FIX_DIV(in3, in);
// out4 = FIX_DIV(in4, in);
// *pSrcT2++ = out1;
// *pSrcT2++ = out2;
// *pSrcT2++ = out3;
// *pSrcT2++ = out4;
// j += 4;
// }
// while (j < n) {
// in1 = *pSrcT2;
// *pSrcT2++ = FIX_DIV(in1, in);
// j++;
// }
}
mempool_log_barrier(2, absolute_core_id);
//pPivotRowIn = pSrc + (l * n);
//pPivotRowDst = pDst + (l * n);

/* DIVIDE BY THE PIVOT */
/* Points to the pivot row of input and destination matrices */
Expand Down Expand Up @@ -276,15 +223,13 @@ int mempool_GJinv_q32p_memsized(int32_t * pSrc, int32_t * pDst, uint32_t n, uint
for (k = absolute_core_id / (n >> 2U); k < m; k += NUM_CORES / (n >> 2U)) {
/* Only the columns to the right of the pivot are to be processed */
if (k != l) {

pSrcT1 = pSrc + k * n;
pSrcT2 = pDst + k * n;
/* Element of the reference row */
in = *pSrcT1;
/* Reference row pointers */
pPRT_in = pPivotRowIn;
pPRT_pDst = pPivotRowDst;

/* Loop over the columns */
core_id = absolute_core_id % (n >> 2U);
core_id = core_id - (l >> 2U);
Expand Down Expand Up @@ -340,7 +285,6 @@ int mempool_GJinv_q32p_memsized(int32_t * pSrc, int32_t * pDst, uint32_t n, uint
// j++;
// }
//}

//uint32_t core_id_in;
//uint32_t core_id_Dst;
//int32_t p1_in, p2_in, p3_in, p4_in;
Expand Down Expand Up @@ -406,11 +350,152 @@ int mempool_GJinv_q32p_memsized(int32_t * pSrc, int32_t * pDst, uint32_t n, uint
//pSrcT2[i + 1] = in2 - FIX_MUL(in, p2_Dst);
//pSrcT2[i + 2] = in3 - FIX_MUL(in, p3_Dst);
//pSrcT2[i + 3] = in4 - FIX_MUL(in, p4_Dst);

}
}
mempool_log_barrier(2, absolute_core_id);

// /* REPLACE ROWS */
// pSrcT1 = pSrc;
// pSrcT2 = pDst;
// /* Reference row pointers */
// pPRT_in = pSrc + (l * n);
// pPRT_pDst = pDst + (l * n);
// int32_t pivot = *pPRT_in;
// uint32_t nPE = (n >> 2U);
// uint32_t check = 0;
// if (absolute_core_id >= m * nPE)
// mempool_wfi();
// for (k = absolute_core_id / nPE; k < m; k += NUM_CORES / nPE) {
// /* Only the columns to the right of the pivot are to be processed */
// if (k != l) {
// pSrcT1 = pSrc + k * n;
// pSrcT2 = pDst + k * n;
// /* Element of the reference row */
// in = *pSrcT1;
// /* Loop over the columns */
// core_id = absolute_core_id % nPE;
// core_id = core_id - (l >> 2U);
// j = core_id * 4;
// while (j < 4 * ((n - l) >> 2U)) {
// out1 = pPRT_in[j];
// out2 = pPRT_in[j + 1];
// out3 = pPRT_in[j + 2];
// out4 = pPRT_in[j + 3];
// out1 = FIX_DIV(out1, pivot);
// out2 = FIX_DIV(out2, pivot);
// out3 = FIX_DIV(out3, pivot);
// out4 = FIX_DIV(out4, pivot);
// in1 = pSrcT1[j];
// in2 = pSrcT1[j + 1];
// in3 = pSrcT1[j + 2];
// in4 = pSrcT1[j + 3];
// pSrcT1[j] = in1 - FIX_MUL(in, out1);
// pSrcT1[j + 1] = in2 - FIX_MUL(in, out2);
// pSrcT1[j + 2] = in3 - FIX_MUL(in, out3);
// pSrcT1[j + 3] = in4 - FIX_MUL(in, out4);
// j += 4 * (n >> 2U);
// }
// if (core_id == 0) {
// j = 4 * ((n - l) >> 2U);
// while (j < n - l) {
// out1 = pPRT_in[j];
// out1 = FIX_DIV(out1, pivot);
// in1 = pSrcT1[j];
// pSrcT1[j] = in1 - FIX_MUL(in, out1);
// j++;
// }
// }
// /* Loop over the columns */
// core_id = absolute_core_id % nPE;
// j = core_id * 4;
// while (j < 4 * (n >> 2U)) {
// out1 = pPRT_pDst[j];
// out2 = pPRT_pDst[j + 1];
// out3 = pPRT_pDst[j + 2];
// out4 = pPRT_pDst[j + 3];
// out1 = FIX_DIV(out1, pivot);
// out2 = FIX_DIV(out2, pivot);
// out3 = FIX_DIV(out3, pivot);
// out4 = FIX_DIV(out4, pivot);
// in1 = pSrcT2[j];
// in2 = pSrcT2[j + 1];
// in3 = pSrcT2[j + 2];
// in4 = pSrcT2[j + 3];
// pSrcT2[j] = in1 - FIX_MUL(in, out1);
// pSrcT2[j + 1] = in2 - FIX_MUL(in, out2);
// pSrcT2[j + 2] = in3 - FIX_MUL(in, out3);
// pSrcT2[j + 3] = in4 - FIX_MUL(in, out4);
// j += 4 * nPE;
// }
// __atomic_fetch_add(&pivot_barrier, 1, __ATOMIC_RELAXED);
// mempool_wfi();
// } else {
// do {
// check = __atomic_fetch_add(&pivot_barrier, 0, __ATOMIC_RELAXED);
// mempool_wait(20);
// } while (check < ((m - 1) * nPE));
// /* Loop over the columns */
// core_id = absolute_core_id % (n >> 2U);
// core_id = core_id - (l >> 2U);
// j = core_id * 4;
// while (j < 4 * ((n - l) >> 2U)) {
// in1 = pPRT_in[j];
// in2 = pPRT_in[j + 1];
// in3 = pPRT_in[j + 2];
// in4 = pPRT_in[j + 3];
// out1 = FIX_DIV(in1, pivot);
// out2 = FIX_DIV(in2, pivot);
// out3 = FIX_DIV(in3, pivot);
// out4 = FIX_DIV(in4, pivot);
// pPRT_in[j] = out1;
// pPRT_in[j + 1] = out2;
// pPRT_in[j + 2] = out3;
// pPRT_in[j + 3] = out4;
// j += 4 * (n >> 2U);
// }
// if (core_id == 0) {
// j = 4 * ((n - l) >> 2U);
// while (j < n - l) {
// in1 = pPRT_in[j];
// pPRT_in[j] = FIX_DIV(in1, pivot);
// j++;
// }
// }
// /* Loop over the columns */
// core_id = absolute_core_id % (n >> 2U);
// j = core_id * 4;
// while (j < 4 * (n >> 2U)) {
// in1 = pPRT_pDst[j];
// in2 = pPRT_pDst[j + 1];
// in3 = pPRT_pDst[j + 2];
// in4 = pPRT_pDst[j + 3];
// out1 = FIX_DIV(in1, pivot);
// out2 = FIX_DIV(in2, pivot);
// out3 = FIX_DIV(in3, pivot);
// out4 = FIX_DIV(in4, pivot);
// pPRT_pDst[j] = out1;
// pPRT_pDst[j + 1] = out2;
// pPRT_pDst[j + 2] = out3;
// pPRT_pDst[j + 3] = out4;
// j += 4 * (n >> 2U);
// }
// if (core_id == (n >> 2U) - 1) {
// j = 4 * (n >> 2U);
// while (j < n) {
// in1 = pPRT_pDst[j];
// pPRT_pDst[j] = FIX_DIV(in1, pivot);
// j++;
// }
// }
// if ((m * nPE) - 1 == __atomic_fetch_add(&pivot_barrier, 1, __ATOMIC_RELAXED)) {
// __atomic_store_n(&pivot_barrier, 0, __ATOMIC_RELAXED);
// __sync_synchronize();
// wake_up_all();
// }
// mempool_wfi();
// }
// }

// /* REPLACE ROWS */
// pSrcT1 = pSrc;
// pSrcT2 = pDst;
Expand Down