Skip to content

Commit

Permalink
merge with internal master
Browse files Browse the repository at this point in the history
  • Loading branch information
emjotde committed Feb 11, 2022
2 parents 8fd553e + 4b51dcb commit b0275e7
Show file tree
Hide file tree
Showing 23 changed files with 418 additions and 220 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Scripts using PyYAML now use `safe_load`; see https://msg.pyyaml.org/load

### Changed
- Make guided-alignment faster via sparse memory layout, add alignment points for EOS, remove losses other than ce.
- Changed minimal C++ standard to C++-17
- Faster LSH top-k search on CPU

## [1.11.0] - 2022-02-08

Expand Down
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ if (POLICY CMP0074)
endif ()

project(marian CXX C)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.")

Expand Down Expand Up @@ -91,10 +91,11 @@ if(MSVC)
# C4310: cast truncates constant value
# C4324: 'marian::cpu::int16::`anonymous-namespace'::ScatterPut': structure was padded due to alignment specifier
# C4702: unreachable code; note it is also disabled globally in the VS project file
# C4996: warning STL4015: The std::iterator class template (used as a base class to provide typedefs) is deprecated in C++17
if(USE_SENTENCEPIECE)
set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\" /wd\"4702\" /wd\"4100\"")
set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\" /wd\"4702\" /wd\"4996\" /wd\"4100\"")
else()
set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\" /wd\"4702\"")
set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\" /wd\"4702\" /wd\"4996\"")
endif()

# set(INTRINSICS "/arch:AVX")
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.11.1
v1.11.3
48 changes: 3 additions & 45 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ stages:
displayName: Ubuntu
timeoutInMinutes: 90

# Minimal tested configurations for marian-dev v1.11 and C++17:
# * Ubuntu 16.04, GCC 7.5, CMake 3.10.2, CUDA 9.2 (probably GCC 6 would work too)
# * Ubuntu 18.04, GCC 7.5, CMake 3.12.2, CUDA 10.0
strategy:
matrix:
################################################################
Expand Down Expand Up @@ -319,51 +322,6 @@ stages:
displayName: Print versions
workingDirectory: build
######################################################################
- job: BuildUbuntuMinimal
condition: eq(${{ parameters.runBuilds }}, true)
displayName: Ubuntu CPU+GPU gcc-7 cmake 3.5

pool:
vmImage: ubuntu-18.04

steps:
- checkout: self
submodules: true

# The script simplifies installation of different versions of CUDA.
- bash: ./scripts/ci/install_cuda_ubuntu.sh "10.0"
displayName: Install CUDA

# CMake 3.5.1 is the minimum version supported
- bash: |
wget -nv https://cmake.org/files/v3.5/cmake-3.5.1-Linux-x86_64.tar.gz
tar zxf cmake-3.5.1-Linux-x86_64.tar.gz
./cmake-3.5.1-Linux-x86_64/bin/cmake --version
displayName: Download CMake
# GCC 5 is the minimum version supported
- bash: |
/usr/bin/gcc-7 --version
mkdir -p build
cd build
CC=/usr/bin/gcc-7 CXX=/usr/bin/g++-7 CUDAHOSTCXX=/usr/bin/g++-7 \
../cmake-3.5.1-Linux-x86_64/bin/cmake .. \
-DCOMPILE_CPU=on \
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.0
displayName: Configure CMake
- bash: make -j3
displayName: Compile
workingDirectory: build

- bash: |
./marian --version
./marian-decoder --version
./marian-scorer --version
displayName: Print versions
workingDirectory: build
######################################################################
- job: BuildMacOS
condition: eq(${{ parameters.runBuilds }}, true)
Expand Down
2 changes: 1 addition & 1 deletion examples
11 changes: 9 additions & 2 deletions scripts/ci/install_cuda_ubuntu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ CUDA_PACKAGES_IN=(

CUDA_PACKAGES=""
for package in "${CUDA_PACKAGES_IN[@]}"; do
# @todo This is not perfect. Should probably provide a separate list for diff versions
# cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y
if [[ "${package}" == "nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then
package="compiler"
elif [[ "${package}" == "compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then
package="nvcc"
fi
# Build the full package name and append to the string.
CUDA_PACKAGES+=" cuda-${package}-${CUDA_MAJOR}-${CUDA_MINOR}"
done
Expand All @@ -72,8 +79,8 @@ echo "CUDA_PACKAGES ${CUDA_PACKAGES}"

PIN_FILENAME="cuda-ubuntu${UBUNTU_VERSION}.pin"
PIN_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/${PIN_FILENAME}"
APT_KEY_URL="http://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/7fa2af80.pub"
REPO_URL="http://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/"
APT_KEY_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/7fa2af80.pub"
REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/"

echo "PIN_FILENAME ${PIN_FILENAME}"
echo "PIN_URL ${PIN_URL}"
Expand Down
2 changes: 1 addition & 1 deletion src/3rd_party/half_float/umHalf.inl
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ inline HalfFloat operator+ (HalfFloat one, HalfFloat two)

// compute the difference between the two exponents. shifts with negative
// numbers are undefined, thus we need two code paths
register int expDiff = one.IEEE.Exp - two.IEEE.Exp;
/*register*/ int expDiff = one.IEEE.Exp - two.IEEE.Exp;

if (0 == expDiff)
{
Expand Down
11 changes: 9 additions & 2 deletions src/command/marian_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,17 @@ int main(int argc, char** argv) {
graph->setDevice(CPU0);
graph->load(modelFrom);

std::vector<lsh::ParamConvInfo> toBeLSHed;
if(addLsh) {
// Add dummy parameters for the LSH before the model gets actually initialized.
// This create the parameters with useless values in the tensors, but it gives us the memory we need.
toBeLSHed = {
{lshOutputWeights, "lsh_output_codes", "lsh_output_rotation", lshNBits}
};

graph->setReloaded(false);
lsh::addDummyParameters(graph, /*weights=*/lshOutputWeights, /*nBits=*/lshNBits);
for(auto p : toBeLSHed)
lsh::addDummyParameters(graph, /*paramInfo=*/p);
graph->setReloaded(true);
}

Expand All @@ -99,7 +105,8 @@ int main(int argc, char** argv) {
if(addLsh) {
// After initialization, hijack the paramters for the LSH and force-overwrite with correct values.
// Once this is done we can just pack and save as normal.
lsh::overwriteDummyParameters(graph, /*weights=*/lshOutputWeights);
for(auto p : toBeLSHed)
lsh::overwriteDummyParameters(graph, /*paramInfo=*/p);
}

// added a flag if the weights needs to be packed or not
Expand Down
2 changes: 1 addition & 1 deletion src/common/config_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) {
"none");
cli.add<std::string>("--guided-alignment-cost",
"Cost type for guided alignment: ce (cross-entropy), mse (mean square error), mult (multiplication)",
"mse");
"ce");
cli.add<double>("--guided-alignment-weight",
"Weight for guided alignment cost",
0.1);
Expand Down
39 changes: 35 additions & 4 deletions src/data/alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "common/utils.h"

#include <algorithm>
#include <cmath>
#include <set>

namespace marian {
namespace data {
Expand All @@ -10,10 +12,11 @@ WordAlignment::WordAlignment() {}

WordAlignment::WordAlignment(const std::vector<Point>& align) : data_(align) {}

WordAlignment::WordAlignment(const std::string& line) {
WordAlignment::WordAlignment(const std::string& line, size_t srcEosPos, size_t tgtEosPos) {
std::vector<std::string> atok = utils::splitAny(line, " -");
for(size_t i = 0; i < atok.size(); i += 2)
data_.emplace_back(Point{ (size_t)std::stoi(atok[i]), (size_t)std::stoi(atok[i + 1]), 1.f });
data_.push_back(Point{ (size_t)std::stoi(atok[i]), (size_t)std::stoi(atok[i + 1]), 1.f });
data_.push_back(Point{ srcEosPos, tgtEosPos, 1.f }); // add alignment point for both EOS symbols
}

void WordAlignment::sort() {
Expand All @@ -22,6 +25,35 @@ void WordAlignment::sort() {
});
}

void WordAlignment::normalize(bool reverse/*=false*/) {
std::vector<size_t> counts;
counts.reserve(data_.size());

// reverse==false : normalize target word prob by number of source words
// reverse==true : normalize source word prob by number of target words
auto srcOrTgt = [](const Point& p, bool reverse) {
return reverse ? p.srcPos : p.tgtPos;
};

for(const auto& a : data_) {
size_t pos = srcOrTgt(a, reverse);
if(counts.size() <= pos)
counts.resize(pos + 1, 0);
counts[pos]++;
}

// a.prob at this point is either 1 or normalized to a different value,
// but we just set it to 1 / count, so multiple calls result in re-normalization
// regardless of forward or reverse direction. We also set the remaining values to 1.
for(auto& a : data_) {
size_t pos = srcOrTgt(a, reverse);
if(counts[pos] > 1)
a.prob = 1.f / counts[pos];
else
a.prob = 1.f;
}
}

std::string WordAlignment::toString() const {
std::stringstream str;
for(auto p = begin(); p != end(); ++p) {
Expand All @@ -32,7 +64,7 @@ std::string WordAlignment::toString() const {
return str.str();
}

WordAlignment ConvertSoftAlignToHardAlign(SoftAlignment alignSoft,
WordAlignment ConvertSoftAlignToHardAlign(const SoftAlignment& alignSoft,
float threshold /*= 1.f*/) {
WordAlignment align;
// Alignments by maximum value
Expand All @@ -58,7 +90,6 @@ WordAlignment ConvertSoftAlignToHardAlign(SoftAlignment alignSoft,
}
}
}

// Sort alignment pairs in ascending order
align.sort();

Expand Down
21 changes: 16 additions & 5 deletions src/data/alignment.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
#pragma once

#include <sstream>
#include <tuple>
#include <vector>

namespace marian {
namespace data {

class WordAlignment {
struct Point
{
public:
struct Point {
size_t srcPos;
size_t tgtPos;
float prob;
};
private:
std::vector<Point> data_;

public:
WordAlignment();

Expand All @@ -28,11 +30,14 @@ class WordAlignment {
public:

/**
* @brief Constructs word alignments from textual representation.
* @brief Constructs word alignments from textual representation. Adds alignment point for externally
* supplied EOS positions in source and target string.
*
* @param line String in the form of "0-0 1-1 1-2", etc.
*/
WordAlignment(const std::string& line);
WordAlignment(const std::string& line, size_t srcEosPos, size_t tgtEosPos);

Point& operator[](size_t i) { return data_[i]; }

auto begin() const -> decltype(data_.begin()) { return data_.begin(); }
auto end() const -> decltype(data_.end()) { return data_.end(); }
Expand All @@ -46,6 +51,12 @@ class WordAlignment {
*/
void sort();

/**
* @brief Normalizes alignment probabilities of target words to sum to 1 over source words alignments.
* This is needed for correct cost computation for guided alignment training with CE cost criterion.
*/
void normalize(bool reverse=false);

/**
* @brief Returns textual representation.
*/
Expand All @@ -56,7 +67,7 @@ class WordAlignment {
// Also used on QuickSAND boundary where beam and batch size is 1. Then it is simply [t][s] -> P(s|t)
typedef std::vector<std::vector<float>> SoftAlignment; // [trg pos][beam depth * max src length * batch size]

WordAlignment ConvertSoftAlignToHardAlign(SoftAlignment alignSoft,
WordAlignment ConvertSoftAlignToHardAlign(const SoftAlignment& alignSoft,
float threshold = 1.f);

std::string SoftAlignToString(SoftAlignment align);
Expand Down
2 changes: 1 addition & 1 deletion src/data/batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Batch {
const std::vector<size_t>& getSentenceIds() const { return sentenceIds_; }
void setSentenceIds(const std::vector<size_t>& ids) { sentenceIds_ = ids; }

virtual void setGuidedAlignment(std::vector<float>&&) = 0;
virtual void setGuidedAlignment(std::vector<WordAlignment>&&) = 0;
virtual void setDataWeights(const std::vector<float>&) = 0;
virtual ~Batch() {};
protected:
Expand Down
13 changes: 6 additions & 7 deletions src/data/corpus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,13 @@ SentenceTuple Corpus::next() {
tup.markAltered();
addWordsToSentenceTuple(fields[i], vocabId, tup);
}

// weights are added last to the sentence tuple, because this runs a validation that needs
// length of the target sequence
if(alignFileIdx_ > -1)
addAlignmentToSentenceTuple(fields[alignFileIdx_], tup);
if(weightFileIdx_ > -1)
addWeightsToSentenceTuple(fields[weightFileIdx_], tup);
}
// weights are added last to the sentence tuple, because this runs a validation that needs
// length of the target sequence
if(alignFileIdx_ > -1)
addAlignmentToSentenceTuple(fields[alignFileIdx_], tup);
if(weightFileIdx_ > -1)
addWeightsToSentenceTuple(fields[weightFileIdx_], tup);

// check if all streams are valid, that is, non-empty and no longer than maximum allowed length
if(std::all_of(tup.begin(), tup.end(), [=](const Words& words) {
Expand Down
25 changes: 11 additions & 14 deletions src/data/corpus_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,11 +429,13 @@ void CorpusBase::addWordsToSentenceTuple(const std::string& line,

void CorpusBase::addAlignmentToSentenceTuple(const std::string& line,
SentenceTupleImpl& tup) const {
ABORT_IF(rightLeft_,
"Guided alignment and right-left model cannot be used "
"together at the moment");
ABORT_IF(rightLeft_, "Guided alignment and right-left model cannot be used together at the moment");
ABORT_IF(tup.size() != 2, "Using alignment between source and target, but sentence tuple has {} elements??", tup.size());

auto align = WordAlignment(line);
size_t srcEosPos = tup[0].size() - 1;
size_t tgtEosPos = tup[1].size() - 1;

auto align = WordAlignment(line, srcEosPos, tgtEosPos);
tup.setAlignment(align);
}

Expand All @@ -457,22 +459,17 @@ void CorpusBase::addWeightsToSentenceTuple(const std::string& line, SentenceTupl

void CorpusBase::addAlignmentsToBatch(Ptr<CorpusBatch> batch,
const std::vector<Sample>& batchVector) {
int srcWords = (int)batch->front()->batchWidth();
int trgWords = (int)batch->back()->batchWidth();
std::vector<WordAlignment> aligns;

int dimBatch = (int)batch->getSentenceIds().size();

std::vector<float> aligns(srcWords * dimBatch * trgWords, 0.f);

aligns.reserve(dimBatch);

for(int b = 0; b < dimBatch; ++b) {

// If the batch vector is altered within marian by, for example, case augmentation,
// the guided alignments we received for this tuple cease to be valid.
// Hence skip setting alignments for that sentence tuple..
if (!batchVector[b].isAltered()) {
for(auto p : batchVector[b].getAlignment()) {
size_t idx = p.srcPos * dimBatch * trgWords + b * trgWords + p.tgtPos;
aligns[idx] = 1.f;
}
aligns.push_back(std::move(batchVector[b].getAlignment()));
}
}
batch->setGuidedAlignment(std::move(aligns));
Expand Down
Loading

0 comments on commit b0275e7

Please sign in to comment.