Skip to content

Commit

Permalink
Merge pull request #2 from hplt-project/singleton-scanner
Browse files Browse the repository at this point in the history
Experiment: count small batch of ngrams at a time
  • Loading branch information
onadegibert authored Mar 19, 2024
2 parents ca5f5ab + 4582311 commit ede1238
Show file tree
Hide file tree
Showing 12 changed files with 295 additions and 104 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/data
/build
38 changes: 38 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
on:
push:

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v2
with:
submodules: recursive
- name: install dependencies
run: |-
sudo apt-get update
sudo apt-get install -y \
python3 \
libboost-program-options-dev \
libboost-test-dev \
libicu-dev \
zlib1g-dev \
cmake \
build-essential
- name: cmake
run: |-
mkdir -p build
cd build
cmake -L .. \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCOMPILE_TESTS=On
- name: build
working-directory: build
run: make -j2
- name: run unit tests
working-directory: build
run: make test
- name: run end-to-end tests
working-directory: tests/docalign
run: PATH=../../build/bin:$PATH ./run.sh
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/data
/data*
/tests/docalign/out.txt
15 changes: 8 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
set(CMAKE_CXX_FLAGS_DEBUG "-Wextra -g")

# Lazy mode: get me a static binary for at least the essentials
set(Boost_USE_STATIC_LIBS ON)
set(ZLIB_USE_STATIC_LIBS ON)

# Compile all executables into bin/
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)

Expand Down Expand Up @@ -48,7 +52,6 @@ include_directories(${PREPROCESS_PATH})
# find *.h and *.cpp files
file(GLOB dalign_cpp_headers ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h)
file(GLOB dalign_cpp_cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
file(GLOB dalign_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/*.cpp)

# Tool to score alignment between two sets of documents in the same language.
add_executable(docalign docalign.cpp ${dalign_cpp_headers} ${dalign_cpp_cpp})
Expand All @@ -65,10 +68,8 @@ install(TARGETS docjoin docalign
LIBRARY DESTINATION ${CMAKE_LIBRARY_BINDIR}
)

if (BUILD_TESTING)
add_executable(ngram_test tests/ngram_test.cpp ${dalign_cpp_headers} ${dalign_cpp_cpp})
target_compile_definitions(ngram_test PRIVATE "BOOST_TEST_DYN_LINK=1")
target_link_libraries(ngram_test ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} preprocess_util)
add_test(NAME ngram_test COMMAND ngram_test)
endif (BUILD_TESTING)
option(COMPILE_TESTS "Compile tests" OFF)
if (COMPILE_TESTS)
add_subdirectory(tests)
endif (COMPILE_TESTS)

19 changes: 19 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM ubuntu:latest as builder
RUN apt-get update \
&& apt-get install -y \
libboost-program-options-dev \
libboost-test-dev \
libicu-dev \
zlib1g-dev \
cmake \
build-essential

COPY . /root/

RUN mkdir /root/build \
&& cd /root/build \
&& cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo \
&& make -j docalign docjoin

FROM ubuntu:latest as runner
COPY --from=builder /root/build/bin/* /usr/local/bin/
Loading

0 comments on commit ede1238

Please sign in to comment.