Skip to content

Commit

Permalink
Merge branch 'sycl' into maxime/l0-perf-improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
mfrancepillois committed Mar 8, 2024
2 parents 6079ab1 + 43201db commit 3ea8d02
Show file tree
Hide file tree
Showing 2,999 changed files with 139,793 additions and 50,843 deletions.
8 changes: 0 additions & 8 deletions .arcconfig

This file was deleted.

15 changes: 0 additions & 15 deletions .arclint

This file was deleted.

2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-const-correctness,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,-misc-no-recursion,-misc-use-anonymous-namespace,readability-identifier-naming'
Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-const-correctness,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,-misc-no-recursion,-misc-use-anonymous-namespace,readability-identifier-naming,-misc-include-cleaner'
CheckOptions:
- key: readability-identifier-naming.ClassCase
value: CamelCase
Expand Down
3 changes: 3 additions & 0 deletions .github/new-prs-labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,9 @@ backend:SystemZ:
third-party:unittests:
- third-party/unittests/**

third-party:benchmark:
- third-party/benchmark/**

llvm:binary-utilities:
- llvm/docs/CommandGuide/llvm-*
- llvm/include/llvm/BinaryFormat/**
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/build-ci-container.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,18 @@ jobs:
cp ./.github/workflows/containers/github-action-ci/storage.conf ~/.config/containers/storage.conf
podman info
# Download the container image into /mnt/podman rather than
# $GITHUB_WORKSPACE to avoid space limitations on the default drive
# and use the permissions setup for /mnt/podman.
- name: Download stage1-toolchain
uses: actions/download-artifact@v4
with:
name: stage1-toolchain
path: /mnt/podman

- name: Load stage1-toolchain
run: |
podman load -i stage1-toolchain.tar
podman load -i /mnt/podman/stage1-toolchain.tar
- name: Build Container
working-directory: ./.github/workflows/containers/github-action-ci/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ COPY --from=stage2-toolchain $LLVM_SYSROOT $LLVM_SYSROOT
# Need to install curl for hendrikmuhs/ccache-action
# Need nodejs for some of the GitHub actions.
# Need perl-modules for clang analyzer tests.
# Need git for SPIRV-Tools tests.
RUN apt-get update && \
apt-get install -y \
binutils \
cmake \
curl \
git \
libstdc++-11-dev \
ninja-build \
nodejs \
Expand Down
22 changes: 22 additions & 0 deletions .github/workflows/sycl-linux-run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -297,17 +297,39 @@ jobs:
if: inputs.tests_selector == 'cts'
env:
ONEAPI_DEVICE_SELECTOR: ${{ inputs.target_devices }}
# By-default GitHub actions execute the "run" shell script with -e option,
# so the execution terminates if any command returns a non-zero status.
# Since we're using a loop to run all test-binaries separately, some test
# may fail and terminate the execution. Setting "shell" value to override
# the default behavior.
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#custom-shell
shell: bash {0}
# FIXME: For some reason the "sub_group api" test-case is failing with
# SIGSEGV while running test_all, so running each binary separately.
# BTW test_all requires a lot of resources to build it, so probably it'll
# be better to build each binary also separately.
# run: |
# ./build-cts/bin/test_all $CTS_TEST_ARGS
run: |
status=""
failed_suites=""
for i in `ls -1 ./build-cts/bin`; do
if [ "$i" != "test_all" ]; then
echo "::group::Running $i"
build-cts/bin/$i
if [ $? -ne 0 ]; then
status=1
if [$failed_suites == ""]; then
failed_suites=$i
else
failed_suites="$failed_suites, $i"
fi
fi
echo "::endgroup::"
fi
done
if [ -n "$status" ]; then
echo "Failed suite(s): $failed_suites"
exit 1
fi
exit 0
2 changes: 1 addition & 1 deletion .github/workflows/sycl-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
target_devices: ext_oneapi_cuda:gpu

- name: SYCL-CTS
runner: '["cts-cpu"]'
runner: '["Linux", "gen12"]'
image: ghcr.io/intel/llvm/ubuntu2204_intel_drivers:latest
image_options: -u 1001 --device=/dev/dri --privileged --cap-add SYS_ADMIN
target_devices: opencl:cpu
Expand Down
8 changes: 8 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -2056,6 +2056,14 @@ class BinaryFunction {
/// Returns false if disassembly failed.
Error disassemble();

/// An external interface to register a branch while the function is in
/// disassembled state. Allows to make custom modifications to the
/// disassembler. E.g., a pre-CFG pass can add an instruction and register
/// a branch that will later be used during the CFG construction.
///
/// Return a label at the branch destination.
MCSymbol *registerBranch(uint64_t Src, uint64_t Dst);

Error handlePCRelOperand(MCInst &Instruction, uint64_t Address,
uint64_t Size);

Expand Down
7 changes: 3 additions & 4 deletions bolt/include/bolt/Core/ParallelUtilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "bolt/Core/MCPlusBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ThreadPool.h"

using namespace llvm;

Expand All @@ -28,8 +29,6 @@ extern cl::opt<unsigned> TaskCount;
} // namespace opts

namespace llvm {
class ThreadPool;

namespace bolt {
class BinaryContext;
class BinaryFunction;
Expand All @@ -50,8 +49,8 @@ enum SchedulingPolicy {
SP_BB_QUADRATIC, /// cost is estimated by the square of the BB count
};

/// Return the managed thread pool and initialize it if not initiliazed.
ThreadPool &getThreadPool();
/// Return the managed thread pool and initialize it if not initialized.
ThreadPoolInterface &getThreadPool();

/// Perform the work on each BinaryFunction except those that are accepted
/// by SkipPredicate, scheduling heuristic is based on SchedPolicy.
Expand Down
24 changes: 17 additions & 7 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1445,6 +1445,16 @@ Error BinaryFunction::disassemble() {
return Error::success();
}

MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) {
assert(CurrentState == State::Disassembled &&
"Cannot register branch unless function is in disassembled state.");
assert(containsAddress(Src) && containsAddress(Dst) &&
"Cannot register external branch.");
MCSymbol *Target = getOrCreateLocalLabel(Dst);
TakenBranches.emplace_back(Src - getAddress(), Dst - getAddress());
return Target;
}

bool BinaryFunction::scanExternalRefs() {
bool Success = true;
bool DisassemblyFailed = false;
Expand Down Expand Up @@ -1759,13 +1769,6 @@ void BinaryFunction::postProcessJumpTables() {
}
}
}

// Remove duplicates branches. We can get a bunch of them from jump tables.
// Without doing jump table value profiling we don't have use for extra
// (duplicate) branches.
llvm::sort(TakenBranches);
auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
TakenBranches.erase(NewEnd, TakenBranches.end());
}

bool BinaryFunction::validateExternallyReferencedOffsets() {
Expand Down Expand Up @@ -2128,6 +2131,13 @@ Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) {
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
// blocks.

// Remove duplicates branches. We can get a bunch of them from jump tables.
// Without doing jump table value profiling we don't have a use for extra
// (duplicate) branches.
llvm::sort(TakenBranches);
auto NewEnd = std::unique(TakenBranches.begin(), TakenBranches.end());
TakenBranches.erase(NewEnd, TakenBranches.end());

for (std::pair<uint32_t, uint32_t> &Branch : TakenBranches) {
LLVM_DEBUG(dbgs() << "registering branch [0x"
<< Twine::utohexstr(Branch.first) << "] -> [0x"
Expand Down
10 changes: 5 additions & 5 deletions bolt/lib/Core/ParallelUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ namespace ParallelUtilities {

namespace {
/// A single thread pool that is used to run parallel tasks
std::unique_ptr<ThreadPool> ThreadPoolPtr;
std::unique_ptr<DefaultThreadPool> ThreadPoolPtr;

unsigned computeCostFor(const BinaryFunction &BF,
const PredicateTy &SkipPredicate,
Expand Down Expand Up @@ -102,11 +102,11 @@ inline unsigned estimateTotalCost(const BinaryContext &BC,

} // namespace

ThreadPool &getThreadPool() {
ThreadPoolInterface &getThreadPool() {
if (ThreadPoolPtr.get())
return *ThreadPoolPtr;

ThreadPoolPtr = std::make_unique<ThreadPool>(
ThreadPoolPtr = std::make_unique<DefaultThreadPool>(
llvm::hardware_concurrency(opts::ThreadCount));
return *ThreadPoolPtr;
}
Expand Down Expand Up @@ -145,7 +145,7 @@ void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;

// Divide work into blocks of equal cost
ThreadPool &Pool = getThreadPool();
ThreadPoolInterface &Pool = getThreadPool();
auto BlockBegin = BC.getBinaryFunctions().begin();
unsigned CurrentCost = 0;

Expand Down Expand Up @@ -202,7 +202,7 @@ void runOnEachFunctionWithUniqueAllocId(
TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;

// Divide work into blocks of equal cost
ThreadPool &Pool = getThreadPool();
ThreadPoolInterface &Pool = getThreadPool();
auto BlockBegin = BC.getBinaryFunctions().begin();
unsigned CurrentCost = 0;
unsigned AllocId = 1;
Expand Down
8 changes: 8 additions & 0 deletions bolt/lib/Core/Relocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,14 @@ static uint64_t encodeValueAArch64(uint64_t Type, uint64_t Value, uint64_t PC) {
// OP 1001_01 goes in bits 31:26 of BL.
Value = ((Value >> 2) & 0x3ffffff) | 0x94000000ULL;
break;
case ELF::R_AARCH64_JUMP26:
Value -= PC;
assert(isInt<28>(Value) &&
"only PC +/- 128MB is allowed for direct branch");
// Immediate goes in bits 25:0 of B.
// OP 0001_01 goes in bits 31:26 of B.
Value = ((Value >> 2) & 0x3ffffff) | 0x14000000ULL;
break;
}
return Value;
}
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/IdenticalCodeFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
Timer SinglePass("single fold pass", "single fold pass");
LLVM_DEBUG(SinglePass.startTimer());

ThreadPool *ThPool;
ThreadPoolInterface *ThPool;
if (!opts::NoThreads)
ThPool = &ParallelUtilities::getThreadPool();

Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ void DWARFRewriter::updateDebugInfo() {
}
} else {
// Update unit debug info in parallel
ThreadPool &ThreadPool = ParallelUtilities::getThreadPool();
ThreadPoolInterface &ThreadPool = ParallelUtilities::getThreadPool();
for (std::unique_ptr<DWARFUnit> &CU : BC.DwCtx->compile_units())
ThreadPool.async(processUnitDIE, CU.get(), &DIEBlder);
ThreadPool.wait();
Expand Down
Loading

0 comments on commit 3ea8d02

Please sign in to comment.