Skip to content

Commit

Permalink
Merge remote-tracking branch 'flame/master' into merge_sifive_x280
Browse files Browse the repository at this point in the history
  • Loading branch information
leekillough committed Jun 15, 2023
2 parents e8fe295 + 6b894c3 commit 7ae57cf
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 34 deletions.
30 changes: 15 additions & 15 deletions frame/include/blis.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,21 +80,6 @@ extern "C" {
#include "bli_pragma_macro_defs.h"


// -- Threading definitions --

#include "bli_thread.h"
#include "bli_thread_range.h"
#include "bli_thread_range_slab_rr.h"
#include "bli_thread_range_tlb.h"

#include "bli_pthread.h"


// -- Constant definitions --

#include "bli_extern_defs.h"


// -- BLIS architecture/kernel definitions --

#include "bli_pre_ker_params.h"
Expand All @@ -116,6 +101,21 @@ extern "C" {
#include "bli_kernel_macro_defs.h"


// -- Threading definitions --

#include "bli_thread.h"
#include "bli_thread_range.h"
#include "bli_thread_range_slab_rr.h"
#include "bli_thread_range_tlb.h"

#include "bli_pthread.h"


// -- Constant definitions --

#include "bli_extern_defs.h"


// -- Base operation prototypes --

#include "bli_init.h"
Expand Down
16 changes: 8 additions & 8 deletions frame/thread/bli_thrcomm.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ void* bli_thrcomm_bcast
return object;
}

#ifndef BLIS_TREE_BARRIER

// Use __sync_* builtins (assumed available) if __atomic_* ones are not present.
#ifndef __ATOMIC_RELAXED

Expand All @@ -214,14 +216,10 @@ void* bli_thrcomm_bcast
#define __ATOMIC_RELEASE
#define __ATOMIC_ACQ_REL

#define __atomic_load_n(ptr, constraint) \
__sync_fetch_and_add(ptr, 0)
#define __atomic_add_fetch(ptr, value, constraint) \
__sync_add_and_fetch(ptr, value)
#define __atomic_fetch_add(ptr, value, constraint) \
__sync_fetch_and_add(ptr, value)
#define __atomic_fetch_xor(ptr, value, constraint) \
__sync_fetch_and_xor(ptr, value)
#define __atomic_load_n( ptr, constraint ) __sync_fetch_and_add( ptr, 0 )
#define __atomic_add_fetch( ptr, value, constraint ) __sync_add_and_fetch( ptr, value )
#define __atomic_fetch_add( ptr, value, constraint ) __sync_fetch_and_add( ptr, value )
#define __atomic_fetch_xor( ptr, value, constraint ) __sync_fetch_and_xor( ptr, value )

#endif

Expand Down Expand Up @@ -269,3 +267,5 @@ void bli_thrcomm_barrier_atomic( dim_t t_id, thrcomm_t* comm )
}
}

#endif

6 changes: 3 additions & 3 deletions frame/thread/bli_thrcomm.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ struct barrier_s
// the fields above and fields below.
char padding1[ BLIS_CACHE_LINE_SIZE ];

int count;
dim_t count;

// We insert a cache line of padding here to eliminate false sharing between
// the fields above and fields below.
char padding2[ BLIS_CACHE_LINE_SIZE ];

volatile int signal;
gint_t signal;

// We insert a cache line of padding here to eliminate false sharing between
// this struct and the next one.
char padding2[ BLIS_CACHE_LINE_SIZE ];
char padding3[ BLIS_CACHE_LINE_SIZE ];
};
typedef struct barrier_s barrier_t;
#endif
Expand Down
35 changes: 27 additions & 8 deletions frame/thread/bli_thrcomm_openmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ void bli_thrcomm_cleanup_openmp( thrcomm_t* comm )

void bli_thrcomm_barrier_openmp( dim_t t_id, thrcomm_t* comm )
{
// Return early if the comm is NULL or if there is only one
// thread participating.
if ( comm == NULL || comm->n_threads == 1 ) return;

bli_thrcomm_tree_barrier( comm->barriers[t_id] );
}

Expand Down Expand Up @@ -176,27 +180,42 @@ void bli_thrcomm_tree_barrier_free( barrier_t* barrier )
return;
}

// Use __sync_* builtins (assumed available) if __atomic_* ones are not present.
#ifndef __ATOMIC_RELAXED

#define __ATOMIC_RELAXED
#define __ATOMIC_ACQUIRE
#define __ATOMIC_RELEASE
#define __ATOMIC_ACQ_REL

//#define __atomic_add_fetch( ptr, value, constraint ) __sync_add_and_fetch( ptr, value )
//#define __atomic_fetch_add( ptr, value, constraint ) __sync_fetch_and_add( ptr, value )

#define __atomic_load_n( ptr, constraint ) __sync_fetch_and_add( ptr, 0 )
#define __atomic_sub_fetch( ptr, value, constraint ) __sync_sub_and_fetch( ptr, value )
#define __atomic_fetch_xor( ptr, value, constraint ) __sync_fetch_and_xor( ptr, value )

#endif

void bli_thrcomm_tree_barrier( barrier_t* barack )
{
int my_signal = barack->signal;
int my_count;
gint_t my_signal = __atomic_load_n( &barack->signal, __ATOMIC_RELAXED );

_Pragma( "omp atomic capture" )
my_count = barack->count--;
dim_t my_count =
__atomic_sub_fetch( &barack->count, 1, __ATOMIC_ACQ_REL );

if ( my_count == 1 )
if ( my_count == 0 )
{
if ( barack->dad != NULL )
{
bli_thrcomm_tree_barrier( barack->dad );
}
barack->count = barack->arity;
barack->signal = !barack->signal;
__atomic_fetch_xor( &barack->signal, 1, __ATOMIC_RELEASE );
}
else
{
volatile int* listener = &barack->signal;
while ( *listener == my_signal ) {}
while ( __atomic_load_n( &barack->signal, __ATOMIC_ACQUIRE ) == my_signal ) {}
}
}

Expand Down

0 comments on commit 7ae57cf

Please sign in to comment.