-
-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from occ-ai/roy.overhaul_detection_vad_models
Update whisper-utils.h and whisper-utils.cpp to use update_whisper_mo…
- Loading branch information
Showing
25 changed files
with
1,585 additions
and
538 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,7 +45,7 @@ | |
} | ||
}, | ||
"name": "obs-cleanstream", | ||
"version": "0.0.5", | ||
"version": "0.0.6", | ||
"author": "Roy Shilkrot", | ||
"website": "https://github.com/occ-ai/obs-cleanstream/", | ||
"email": "[email protected]", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
include(FetchContent) | ||
|
||
set(CUSTOM_ONNXRUNTIME_URL | ||
"" | ||
CACHE STRING "URL of a downloaded ONNX Runtime tarball") | ||
|
||
set(CUSTOM_ONNXRUNTIME_HASH | ||
"" | ||
CACHE STRING "Hash of a downloaded ONNX Runtime tarball") | ||
|
||
set(Onnxruntime_VERSION "1.17.1") | ||
|
||
if(CUSTOM_ONNXRUNTIME_URL STREQUAL "") | ||
set(USE_PREDEFINED_ONNXRUNTIME ON) | ||
else() | ||
if(CUSTOM_ONNXRUNTIME_HASH STREQUAL "") | ||
message(FATAL_ERROR "Both of CUSTOM_ONNXRUNTIME_URL and CUSTOM_ONNXRUNTIME_HASH must be present!") | ||
else() | ||
set(USE_PREDEFINED_ONNXRUNTIME OFF) | ||
endif() | ||
endif() | ||
|
||
if(USE_PREDEFINED_ONNXRUNTIME) | ||
set(Onnxruntime_BASEURL "https://github.com/microsoft/onnxruntime/releases/download/v${Onnxruntime_VERSION}") | ||
|
||
if(APPLE) | ||
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-osx-universal2-${Onnxruntime_VERSION}.tgz") | ||
set(Onnxruntime_HASH SHA256=9FA57FA6F202A373599377EF75064AE568FDA8DA838632B26A86024C7378D306) | ||
elseif(MSVC) | ||
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-win-x64-${Onnxruntime_VERSION}.zip") | ||
set(OOnnxruntime_HASH SHA256=4802AF9598DB02153D7DA39432A48823FF69B2FB4B59155461937F20782AA91C) | ||
else() | ||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") | ||
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-linux-aarch64-${Onnxruntime_VERSION}.tgz") | ||
set(Onnxruntime_HASH SHA256=70B6F536BB7AB5961D128E9DBD192368AC1513BFFB74FE92F97AAC342FBD0AC1) | ||
else() | ||
set(Onnxruntime_URL "${Onnxruntime_BASEURL}/onnxruntime-linux-x64-gpu-${Onnxruntime_VERSION}.tgz") | ||
set(Onnxruntime_HASH SHA256=613C53745EA4960ED368F6B3AB673558BB8561C84A8FA781B4EA7FB4A4340BE4) | ||
endif() | ||
endif() | ||
else() | ||
set(Onnxruntime_URL "${CUSTOM_ONNXRUNTIME_URL}") | ||
set(Onnxruntime_HASH "${CUSTOM_ONNXRUNTIME_HASH}") | ||
endif() | ||
|
||
FetchContent_Declare( | ||
onnxruntime | ||
URL ${Onnxruntime_URL} | ||
URL_HASH ${Onnxruntime_HASH}) | ||
FetchContent_MakeAvailable(onnxruntime) | ||
|
||
if(APPLE) | ||
set(Onnxruntime_LIB "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.${Onnxruntime_VERSION}.dylib") | ||
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE "${Onnxruntime_LIB}") | ||
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC "${onnxruntime_SOURCE_DIR}/include") | ||
target_sources(${CMAKE_PROJECT_NAME} PRIVATE "${Onnxruntime_LIB}") | ||
set_property(SOURCE "${Onnxruntime_LIB}" PROPERTY MACOSX_PACKAGE_LOCATION Frameworks) | ||
source_group("Frameworks" FILES "${Onnxruntime_LIB}") | ||
# add a codesigning step | ||
add_custom_command( | ||
TARGET "${CMAKE_PROJECT_NAME}" | ||
PRE_BUILD VERBATIM | ||
COMMAND /usr/bin/codesign --force --verify --verbose --sign "${CODESIGN_IDENTITY}" "${Onnxruntime_LIB}") | ||
add_custom_command( | ||
TARGET "${CMAKE_PROJECT_NAME}" | ||
POST_BUILD | ||
COMMAND | ||
${CMAKE_INSTALL_NAME_TOOL} -change "@rpath/libonnxruntime.${Onnxruntime_VERSION}.dylib" | ||
"@loader_path/../Frameworks/libonnxruntime.${Onnxruntime_VERSION}.dylib" $<TARGET_FILE:${CMAKE_PROJECT_NAME}>) | ||
elseif(MSVC) | ||
add_library(Ort INTERFACE) | ||
set(Onnxruntime_LIB_NAMES onnxruntime;onnxruntime_providers_shared) | ||
foreach(lib_name IN LISTS Onnxruntime_LIB_NAMES) | ||
add_library(Ort::${lib_name} SHARED IMPORTED) | ||
set_target_properties(Ort::${lib_name} PROPERTIES IMPORTED_IMPLIB ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.lib) | ||
set_target_properties(Ort::${lib_name} PROPERTIES IMPORTED_LOCATION ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.dll) | ||
set_target_properties(Ort::${lib_name} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${onnxruntime_SOURCE_DIR}/include) | ||
target_link_libraries(Ort INTERFACE Ort::${lib_name}) | ||
install(FILES ${onnxruntime_SOURCE_DIR}/lib/${lib_name}.dll DESTINATION "obs-plugins/64bit") | ||
endforeach() | ||
|
||
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE Ort) | ||
|
||
else() | ||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") | ||
set(Onnxruntime_LINK_LIBS "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so.${Onnxruntime_VERSION}") | ||
set(Onnxruntime_INSTALL_LIBS ${Onnxruntime_LINK_LIBS}) | ||
else() | ||
set(Onnxruntime_LINK_LIBS "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime.so.${Onnxruntime_VERSION}") | ||
set(Onnxruntime_INSTALL_LIBS ${Onnxruntime_LINK_LIBS} | ||
"${onnxruntime_SOURCE_DIR}/lib/libonnxruntime_providers_shared.so") | ||
endif() | ||
target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE ${Onnxruntime_LINK_LIBS}) | ||
target_include_directories(${CMAKE_PROJECT_NAME} SYSTEM PUBLIC "${onnxruntime_SOURCE_DIR}/include") | ||
install(FILES ${Onnxruntime_INSTALL_LIBS} DESTINATION "${CMAKE_INSTALL_LIBDIR}/obs-plugins/${CMAKE_PROJECT_NAME}") | ||
set_target_properties(${CMAKE_PROJECT_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/${CMAKE_PROJECT_NAME}") | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,17 @@ | ||
CleanStreamAudioFilter="Clean stream audio filter" | ||
CleanStreamFilterPlugin="Clean stream filter plugin" | ||
detect_regex="Detect regex" | ||
advanced_settings="Advanced settings" | ||
filler_p_threshold="Filler p threshold" | ||
do_silence="Do silence" | ||
vad_enabled="VAD enabled" | ||
log_level="Log level" | ||
log_words="Log words" | ||
whisper_model="Whisper model" | ||
Whisper_Parameters="Whisper Parameters" | ||
whisper_sampling_method="Whisper sampling method" | ||
n_threads="Number of threads" | ||
n_max_text_ctx="Number of max text context" | ||
no_context="No context" | ||
replace_sound_path="Replace Sound Path" | ||
replace_sound="Replace Sound" |
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
#if defined(_WIN32) || defined(__APPLE__) | ||
|
||
extern "C" { | ||
#include <libavformat/avformat.h> | ||
#include <libavcodec/avcodec.h> | ||
#include <libavutil/frame.h> | ||
#include <libavutil/mem.h> | ||
#include <libavutil/opt.h> | ||
#include <libswresample/swresample.h> | ||
} | ||
|
||
#include <iostream> | ||
#include <vector> | ||
|
||
#include "read-audio-file.h" | ||
#include "obs.h" | ||
#include "plugin-support.h" | ||
|
||
AudioDataFloat read_audio_file(const char *filename, int targetSampleRate) | ||
{ | ||
AVFormatContext *formatContext = nullptr; | ||
if (avformat_open_input(&formatContext, filename, nullptr, nullptr) != 0) { | ||
obs_log(LOG_ERROR, "Error opening file"); | ||
return {}; | ||
} | ||
|
||
if (avformat_find_stream_info(formatContext, nullptr) < 0) { | ||
obs_log(LOG_ERROR, "Error finding stream information"); | ||
return {}; | ||
} | ||
|
||
int audioStreamIndex = -1; | ||
for (unsigned int i = 0; i < formatContext->nb_streams; i++) { | ||
if (formatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { | ||
audioStreamIndex = i; | ||
break; | ||
} | ||
} | ||
|
||
if (audioStreamIndex == -1) { | ||
obs_log(LOG_ERROR, "No audio stream found"); | ||
return {}; | ||
} | ||
|
||
AVCodecParameters *codecParams = formatContext->streams[audioStreamIndex]->codecpar; | ||
const AVCodec *codec = avcodec_find_decoder(codecParams->codec_id); | ||
if (!codec) { | ||
obs_log(LOG_ERROR, "Decoder not found"); | ||
return {}; | ||
} | ||
|
||
AVCodecContext *codecContext = avcodec_alloc_context3(codec); | ||
if (!codecContext) { | ||
obs_log(LOG_ERROR, "Failed to allocate codec context"); | ||
return {}; | ||
} | ||
|
||
if (avcodec_parameters_to_context(codecContext, codecParams) < 0) { | ||
obs_log(LOG_ERROR, "Failed to copy codec parameters to codec context"); | ||
return {}; | ||
} | ||
|
||
if (avcodec_open2(codecContext, codec, nullptr) < 0) { | ||
obs_log(LOG_ERROR, "Failed to open codec"); | ||
return {}; | ||
} | ||
|
||
AVFrame *frame = av_frame_alloc(); | ||
AVPacket packet; | ||
|
||
// set up swresample | ||
AVChannelLayout ch_layout; | ||
av_channel_layout_from_string(&ch_layout, "mono"); | ||
SwrContext *swr = nullptr; | ||
int ret; | ||
ret = swr_alloc_set_opts2(&swr, &ch_layout, AV_SAMPLE_FMT_FLT, targetSampleRate, | ||
&(codecContext->ch_layout), codecContext->sample_fmt, | ||
codecContext->sample_rate, 0, nullptr); | ||
if (ret < 0) { | ||
char errbuf[AV_ERROR_MAX_STRING_SIZE]; | ||
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE); | ||
obs_log(LOG_ERROR, "Failed to set up swr context: %s", errbuf); | ||
return {}; | ||
} | ||
// init swr | ||
ret = swr_init(swr); | ||
if (ret < 0) { | ||
char errbuf[AV_ERROR_MAX_STRING_SIZE]; | ||
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE); | ||
obs_log(LOG_ERROR, "Failed to initialize swr context: %s", errbuf); | ||
return {}; | ||
} | ||
|
||
AudioDataFloat audioFrames; | ||
|
||
float *convertBuffer[1]; | ||
convertBuffer[0] = (float *)av_malloc(4096 * sizeof(float)); | ||
while (av_read_frame(formatContext, &packet) >= 0) { | ||
if (packet.stream_index == audioStreamIndex) { | ||
if (avcodec_send_packet(codecContext, &packet) == 0) { | ||
while (avcodec_receive_frame(codecContext, frame) == 0) { | ||
int ret = swr_convert(swr, (uint8_t **)convertBuffer, 4096, | ||
(const uint8_t **)frame->data, | ||
frame->nb_samples); | ||
if (ret < 0) { | ||
char errbuf[AV_ERROR_MAX_STRING_SIZE]; | ||
av_strerror(ret, errbuf, AV_ERROR_MAX_STRING_SIZE); | ||
obs_log(LOG_ERROR, | ||
"Failed to convert audio frame: %s", | ||
errbuf); | ||
return {}; | ||
} | ||
audioFrames.insert(audioFrames.end(), convertBuffer[0], | ||
convertBuffer[0] + ret); | ||
} | ||
} | ||
} | ||
av_packet_unref(&packet); | ||
} | ||
av_free(convertBuffer[0]); | ||
|
||
obs_log(LOG_INFO, | ||
"Converted %lu frames of audio data (orig: %d, %s sample format, %d channels, %s)", | ||
audioFrames.size(), codecContext->sample_rate, | ||
av_get_sample_fmt_name(codecContext->sample_fmt), | ||
codecContext->ch_layout.nb_channels, | ||
av_sample_fmt_is_planar(codecContext->sample_fmt) ? "planar" : "packed"); | ||
|
||
swr_free(&swr); | ||
av_frame_free(&frame); | ||
avcodec_free_context(&codecContext); | ||
avformat_close_input(&formatContext); | ||
|
||
return audioFrames; | ||
} | ||
|
||
#endif |
Oops, something went wrong.