-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: move system commands to module
- Loading branch information
Showing
6 changed files
with
403 additions
and
334 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import collections | ||
import math | ||
import os | ||
import shutil | ||
import subprocess | ||
|
||
from pathlib import Path | ||
from typing import List, Union | ||
|
||
from .. import Utils | ||
from ..TensileInstructions import getGfxName | ||
from ..Common import globalParameters, print2, ensurePath, printWarning | ||
from ..KernelWriterAssembly import KernelWriterAssembly | ||
from .SharedCommands import compressCodeObject | ||
|
||
def _linkIntoCodeObject( | ||
objFiles: List[str], coPathDest: Union[Path, str], kernelWriterAssembly: KernelWriterAssembly | ||
): | ||
"""Links object files into a code object file. | ||
Args: | ||
objectFiles: A list of object files to be linked. | ||
coPathDest: The destination path for the code object file. | ||
kernelWriterAssembly: An instance of KernelWriterAssembly to get link arguments. | ||
Raises: | ||
RuntimeError: If linker invocation fails. | ||
""" | ||
if os.name == "nt": | ||
# Use args file on Windows b/c the command may exceed the limit of 8191 characters | ||
with open(Path.cwd() / "clangArgs.txt", 'wt') as file: | ||
file.write(" ".join(objFiles)) | ||
file.flush() | ||
args = [globalParameters['AssemblerPath'], '-target', 'amdgcn-amd-amdhsa', '-o', coFileRaw, '@clangArgs.txt'] | ||
subprocess.check_call(args, cwd=asmDir) | ||
else: | ||
numObjFiles = len(objFiles) | ||
maxObjFiles = 10000 | ||
|
||
if numObjFiles > maxObjFiles: | ||
batchedObjFiles = [objFiles[i:i+maxObjFiles] for i in range(0, numObjFiles, maxObjFiles)] | ||
numBatches = int(math.ceil(numObjFiles / maxObjFiles)) | ||
|
||
newObjFiles = [str(coPathDest) + "." + str(i) for i in range(0, numBatches)] | ||
newObjFilesOutput = [] | ||
|
||
for batch, filename in zip(batchedObjFiles, newObjFiles): | ||
if len(batch) > 1: | ||
args = [globalParameters["ROCmLdPath"], "-r"] + batch + [ "-o", filename] | ||
print2(f"Linking object files into fewer object files: {' '.join(args)}") | ||
subprocess.check_call(args) | ||
newObjFilesOutput.append(filename) | ||
else: | ||
newObjFilesOutput.append(batchedObjFiles[0]) | ||
|
||
objFiles = newObjFilesOutput | ||
|
||
args = kernelWriterAssembly.getLinkCodeObjectArgs(objFiles, str(coPathDest)) | ||
print2(f"Linking object files into code object: {' '.join(args)}") | ||
subprocess.check_call(args) | ||
|
||
|
||
|
||
def buildAssemblyCodeObjectFiles(kernels, kernelWriterAssembly, outputPath): | ||
|
||
isAsm = lambda k: k["KernelLanguage"] == "Assembly" | ||
|
||
extObj = ".o" | ||
extCo = ".co" | ||
extCoRaw = ".co.raw" | ||
|
||
destDir = Path(ensurePath(os.path.join(outputPath, 'library'))) | ||
asmDir = Path(kernelWriterAssembly.getAssemblyDirectory()) | ||
|
||
archKernelMap = collections.defaultdict(list) | ||
for k in filter(isAsm, kernels): | ||
archKernelMap[tuple(k['ISA'])].append(k) | ||
|
||
coFiles = [] | ||
for arch, archKernels in archKernelMap.items(): | ||
if len(archKernels) == 0: | ||
continue | ||
|
||
gfx = getGfxName(arch) | ||
|
||
if globalParameters["MergeFiles"] or globalParameters["NumMergedFiles"] > 1 or globalParameters["LazyLibraryLoading"]: | ||
objectFiles = [str(asmDir / (kernelWriterAssembly.getKernelFileBase(k) + extObj)) for k in archKernels if 'codeObjectFile' not in k] | ||
|
||
coFileMap = collections.defaultdict(list) | ||
|
||
if len(objectFiles): | ||
coFileMap[asmDir / ("TensileLibrary_"+ gfx + extCoRaw)] = objectFiles | ||
|
||
for kernel in archKernels: | ||
coName = kernel.get("codeObjectFile", None) | ||
if coName: | ||
coFileMap[asmDir / (coName + extCoRaw)].append(str(asmDir / (kernelWriterAssembly.getKernelFileBase(kernel) + extObj))) | ||
|
||
for coFileRaw, objFiles in coFileMap.items(): | ||
|
||
_linkIntoCodeObject(objFiles, coFileRaw, kernelWriterAssembly) | ||
coFile = destDir / coFileRaw.name.replace(extCoRaw, extCo) | ||
compressCodeObject(coFileRaw, coFile, gfx, globalParameters["ClangOffloadBundlerPath"]) | ||
|
||
coFiles.append(coFile) | ||
else: | ||
# no mergefiles | ||
def newCoFileName(kName): | ||
if globalParameters["PackageLibrary"]: | ||
return os.path.join(destDir, gfx, kName + '.co') | ||
else: | ||
return os.path.join(destDir, kName + '_' + gfx + '.co') | ||
|
||
def orgCoFileName(kName): | ||
return os.path.join(asmDir, kName + '.co') | ||
|
||
for src, dst in Utils.tqdm(((orgCoFileName(kName), newCoFileName(kName)) for kName in \ | ||
map(lambda k: kernelWriterAssembly.getKernelFileBase(k), archKernels)), "Copying code objects"): | ||
shutil.copyfile(src, dst) | ||
coFiles.append(dst) | ||
printWarning("Code object files are not compressed in `--no-merge-files` build mode.") | ||
|
||
return coFiles |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import subprocess | ||
|
||
from typing import Union | ||
from pathlib import Path | ||
|
||
from ..Common import print2 | ||
|
||
def compressCodeObject( | ||
coPathSrc: Union[Path, str], coPathDest: Union[Path, str], gfx: str, bundler: str | ||
): | ||
"""Compresses a code object file using the provided bundler. | ||
Args: | ||
coPathSrc: The source path of the code object file to be compressed. | ||
coPathDest: The destination path for the compressed code object file. | ||
gfx: The target GPU architecture. | ||
bundler: The path to the Clang Offload Bundler executable. | ||
Raises: | ||
RuntimeError: If compressing the code object file fails. | ||
""" | ||
args = [ | ||
bundler, | ||
"--compress", | ||
"--type=o", | ||
"--bundle-align=4096", | ||
f"--targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--{gfx}", | ||
"--input=/dev/null", | ||
f"--input={str(coPathSrc)}", | ||
f"--output={str(coPathDest)}", | ||
] | ||
|
||
print2(f"Bundling/compressing code objects: {' '.join(args)}") | ||
try: | ||
out = subprocess.check_output(args, stderr=subprocess.STDOUT) | ||
print2(f"Output: {out}") | ||
except subprocess.CalledProcessError as err: | ||
raise RuntimeError( | ||
f"Error compressing code object via bundling: {err.output}\nFailed command: {' '.join(args)}" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
import itertools | ||
import os | ||
import re | ||
import shlex | ||
import shutil | ||
import subprocess | ||
from pathlib import Path | ||
from typing import Iterable, List, Union | ||
|
||
from ..Common import globalParameters, print2, ensurePath, supportedCompiler, ParallelMap2, splitArchs, which | ||
from .SharedCommands import compressCodeObject | ||
|
||
def _compileSourceObjectFile(cmdlineArchs: List[str], cxxCompiler: str, cxxSrcPath: str, objDestPath: str, outputPath: str): | ||
"""Compiles a source file into an object file. | ||
Args: | ||
cmdlineArchs: List of architectures for offloading. | ||
cxxCompiler: The C++ compiler to use. | ||
kernelFile: The path to the kernel source file. | ||
buildPath: The build directory path. | ||
objectFilename: The name of the output object file. | ||
outputPath: The output directory path. | ||
globalParameters: A dictionary of global parameters. | ||
Raises: | ||
RuntimeError: If the compilation command fails. | ||
""" | ||
archFlags = ['--offload-arch=' + arch for arch in cmdlineArchs] | ||
|
||
#TODO(@jichangjichang) Needs to be fixed when Maneesh's change is made available | ||
hipFlags = ["-D__HIP_HCC_COMPAT_MODE__=1"] | ||
hipFlags.extend( | ||
["--genco"] if cxxCompiler == "hipcc" else ["--cuda-device-only", "-x", "hip", "-O3"] | ||
) | ||
|
||
hipFlags.extend(['-I', outputPath]) | ||
hipFlags.extend(["-Xoffload-linker", "--build-id=%s"%globalParameters["BuildIdKind"]]) | ||
hipFlags.append('-std=c++17') | ||
if globalParameters["AsanBuild"]: | ||
hipFlags.extend(["-fsanitize=address", "-shared-libasan", "-fuse-ld=lld"]) | ||
if globalParameters["SaveTemps"]: | ||
hipFlags.append('--save-temps') | ||
|
||
launcher = shlex.split(os.environ.get('Tensile_CXX_COMPILER_LAUNCHER', '')) | ||
|
||
if os.name == "nt": | ||
hipFlags.extend(['-fms-extensions', '-fms-compatibility', '-fPIC', '-Wno-deprecated-declarations']) | ||
|
||
args = launcher + [which(cxxCompiler)] + hipFlags + archFlags + [cxxSrcPath, '-c', '-o', objDestPath] | ||
|
||
try: | ||
out = subprocess.check_output(args, stderr=subprocess.STDOUT) | ||
print2(f"Output: {out}" if out else "") | ||
except subprocess.CalledProcessError as err: | ||
raise RuntimeError(f"Error compiling source object file: {err.output}\nFailed command: {' '.join(args)}") | ||
|
||
|
||
def _listTargetTriples(bundler: str, objFile: str) -> List[str]: | ||
"""Lists the target triples in an object file. | ||
Args: | ||
bundler: The path to the bundler, typically ``clang-offload-bundler``. | ||
objFile: The object file path. | ||
Returns: | ||
List of target triples in the object file. | ||
""" | ||
args = [bundler, "--type=o", f"--input={objFile}", "-list"] | ||
try: | ||
listing = subprocess.check_output(args, stderr=subprocess.STDOUT).decode().split("\n") | ||
except subprocess.CalledProcessError as err: | ||
raise RuntimeError(f"Error listing target triples in object files: {err.output}\nFailed command: {' '.join(args)}") | ||
return listing | ||
|
||
|
||
def _computeSourceCodeObjectFilename(target: str, base: str, buildPath: Union[Path, str], arch: str) -> Path: | ||
"""Generates a code object file path using the target, base, and build path. | ||
Args: | ||
target: The target triple. | ||
base: The base name for the output file (name without extension). | ||
buildPath: The build directory path. | ||
Returns: | ||
Path to the code object file. | ||
""" | ||
coPath = None | ||
buildPath = Path(buildPath) | ||
if "TensileLibrary" in base and "fallback" in base: | ||
coPath = buildPath / "{0}_{1}.hsaco.raw".format(base, arch) | ||
elif "TensileLibrary" in base: | ||
variant = [t for t in ["", "xnack-", "xnack+"] if t in target][-1] | ||
baseVariant = base + "-" + variant if variant else base | ||
if arch in baseVariant: | ||
coPath = buildPath / (baseVariant + ".hsaco.raw") | ||
else: | ||
raise RuntimeError( | ||
"Failed to compute code object name:" | ||
f"Could not find variant {variant} in base {baseVariant}" | ||
) | ||
else: | ||
coPath= buildPath / "{0}.so-000-{1}.hsaco.raw".format(base, arch) | ||
|
||
return coPath | ||
|
||
|
||
def _unbundleSourceCodeObjects(bundler: str, target: str, infile: str, outfileRaw: str): | ||
"""Unbundles source code object files using the Clang Offload Bundler. | ||
Args: | ||
bundler: The path to the bundler, typically ``clang-offload-bundler``. | ||
target: The target architecture string. | ||
infile: The input file path. | ||
outfileRaw: The output raw file path. | ||
Raises: | ||
RuntimeError: If unbundling the source code object file fails. | ||
""" | ||
args = [ | ||
bundler, | ||
"--type=o", | ||
f"--targets={target}", | ||
f"--input={infile}", | ||
f"--output={outfileRaw}", | ||
"--unbundle", | ||
] | ||
|
||
print2("Unbundling source code object file: " + " ".join(args)) | ||
try: | ||
out = subprocess.check_output(args, stderr=subprocess.STDOUT) | ||
print2(f"Output: {out}" if out else "") | ||
except subprocess.CalledProcessError as err: | ||
raise RuntimeError(f"Error unbundling source code object file: {err.output}\nFailed command: {' '.join(args)}") | ||
|
||
|
||
def _buildSourceCodeObjectFile(cxxCompiler: str, outputPath: Union[Path, str], kernelPath: Union[Path, str]) -> List[str]: | ||
"""Compiles a HIP source code file into a code object file. | ||
Args: | ||
cxxCompiler: The C++ compiler to use. | ||
outputPath: The output directory path where code objects will be placed. | ||
kernelPath: The path to the kernel source file. | ||
Returns: | ||
List of paths to the created code objects. | ||
""" | ||
buildPath = Path(ensurePath(os.path.join(globalParameters['WorkingPath'], 'code_object_tmp'))) | ||
destPath = Path(ensurePath(os.path.join(outputPath, 'library'))) | ||
kernelPath = Path(kernelPath) | ||
|
||
if "CmakeCxxCompiler" in globalParameters and globalParameters["CmakeCxxCompiler"] is not None: | ||
os.environ["CMAKE_CXX_COMPILER"] = globalParameters["CmakeCxxCompiler"] | ||
|
||
objFilename = kernelPath.stem + '.o' | ||
coPathsRaw = [] | ||
coPaths= [] | ||
|
||
if not supportedCompiler(cxxCompiler): | ||
raise RuntimeError("Unknown compiler {}".format(cxxCompiler)) | ||
|
||
_, cmdlineArchs = splitArchs() | ||
|
||
objPath = str(buildPath / objFilename) | ||
_compileSourceObjectFile(cmdlineArchs, cxxCompiler, str(kernelPath), objPath, str(outputPath)) | ||
|
||
bundler = globalParameters["ClangOffloadBundlerPath"] | ||
if not bundler: | ||
raise RuntimeError("No bundler found; set TENSILE_ROCM_OFFLOAD_BUNDLER_PATH to point to clang-offload-bundler") | ||
|
||
for target in _listTargetTriples(bundler, objPath): | ||
if match := re.search("gfx.*$", target): | ||
arch = re.sub(":", "-", match.group()) | ||
coPathRaw = _computeSourceCodeObjectFilename(target, kernelPath.stem, buildPath, arch) | ||
_unbundleSourceCodeObjects(bundler, target, objPath, str(coPathRaw)) | ||
|
||
coPath = str(destPath / coPathRaw.stem) | ||
coPathsRaw.append(coPathRaw) | ||
coPaths.append(coPath) | ||
|
||
for src, dst in zip(coPathsRaw, coPaths): | ||
shutil.move(src, dst) | ||
|
||
return coPaths | ||
|
||
def buildSourceCodeObjectFiles(cxxCompiler: str, kernelFiles: List[Path], outputPath: Path) -> Iterable[str]: | ||
"""Compiles HIP source code files into code object files. | ||
Args: | ||
cxxCompiler: The C++ compiler to use. | ||
kernelFiles: List of paths to the kernel source files. | ||
outputPath: The output directory path where code objects will be placed. | ||
removeTemporaries: Whether to clean up temporary files. | ||
Returns: | ||
List of paths to the created code objects. | ||
""" | ||
args = zip(itertools.repeat(cxxCompiler), itertools.repeat(outputPath), kernelFiles) | ||
coFiles = ParallelMap2(_buildSourceCodeObjectFile, args, "Compiling source kernels") | ||
return itertools.chain.from_iterable(coFiles) |
Empty file.
Oops, something went wrong.