Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
Merge pull request #108 from intel/a32543254-patch-4
Browse files Browse the repository at this point in the history
A32543254 patch 4
  • Loading branch information
a32543254 authored Feb 1, 2024
2 parents 29a1158 + 0ab0165 commit d06db34
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 0 deletions.
17 changes: 17 additions & 0 deletions docs/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ Neural Speed supports the following models:
<td>✅</td>
<td>✅</td>
<td>Latest</td>
</tr>
<td><a href="https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0" target="_blank" rel="noopener noreferrer">Solar-10.7B</a></td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>Latest</td>
</tr>
<tr>
<td><a href="https://huggingface.co/EleutherAI/gpt-j-6b" target="_blank" rel="noopener noreferrer">GPT-J-6B</a></td>
Expand Down Expand Up @@ -317,6 +326,14 @@ Neural Speed supports the following models:
<td>✅</td>
<td>✅</td>
<td>✅</td>
</tr>
<tr>
<td><a href="https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0" target="_blank" rel="noopener noreferrer">Solar-10.7B</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
<td>✅</td>
</tr>
</tr>
<tr>
Expand Down
5 changes: 5 additions & 0 deletions neural_speed/core/ne_layers.c
Original file line number Diff line number Diff line change
Expand Up @@ -10901,6 +10901,7 @@ void ne_graph_compute(struct ne_context* ctx, struct ne_cgraph* cgraph) {
}

void ne_graph_profiling(const struct ne_cgraph* cgraph) {
#ifdef NS_PERF
int64_t perf_total_per_op_us[NE_OP_COUNT] = {0};

NE_PRINT("=== GRAPH Profiling ===\n");
Expand All @@ -10923,6 +10924,10 @@ void ne_graph_profiling(const struct ne_cgraph* cgraph) {
}
NE_PRINT("perf_total_per_op_us[%24s] = %7.3f ms\n", "INNER PRODUCT", (double)ip_duration / 1000.0);
NE_PRINT("========================================\n");

#else
NE_PRINT("\n[Warning] To collect profiling data, please recompile with NS_PROFILING=ON.\n");
#endif
}

void ne_graph_reset(struct ne_cgraph* cgraph) {
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def check_env_flag(name: str, default: bool = False) -> bool:

NS_WITH_AVX2 = check_env_flag("NS_WITH_AVX2", 'avx512f' not in cpu_flags)
""" Whether to limit the max ISA used to AVX2; otherwise AVX512 will be used; set to ON/OFF """
NS_PROFILING_ENV = os.environ.get("NS_PROFILING", "OFF")

cwd = os.path.dirname(os.path.abspath(__file__))

Expand Down Expand Up @@ -104,6 +105,7 @@ def build_extension(self, ext: CMakeExtension) -> None:
f"-DNS_WITH_AVX2={'ON' if NS_WITH_AVX2 else 'OFF'}",
f"-DNS_WITH_TESTS=OFF",
f"-DNS_PYTHON_API=ON",
f"-DNS_PROFILING={NS_PROFILING_ENV}",
]
if sys.platform == "linux": # relative_rpath
cmake_args.append('-DCMAKE_BUILD_RPATH=$ORIGIN/')
Expand Down

0 comments on commit d06db34

Please sign in to comment.