diff --git a/scripts/training/phrase-extract/PhraseAlignment.cpp b/scripts/training/phrase-extract/PhraseAlignment.cpp index 19f4f9aa..642258a6 100644 --- a/scripts/training/phrase-extract/PhraseAlignment.cpp +++ b/scripts/training/phrase-extract/PhraseAlignment.cpp @@ -17,22 +17,17 @@ using namespace std; extern Vocabulary vcbT; extern Vocabulary vcbS; -extern PhraseTable phraseTableT; -extern PhraseTable phraseTableS; extern bool hierarchicalFlag; -PhraseAlignment::PhraseAlignment() -:sourcePhraseId(999999) -,targetPhraseId(999999) -{} - // read in a phrase pair and store it void PhraseAlignment::create( char line[], int lineID ) { + assert(phraseS.empty()); + assert(phraseT.empty()); + //cerr << "processing " << line; vector< string > token = tokenize( line ); int item = 1; - PHRASE phraseS, phraseT; for (int j=0; j #include @@ -14,24 +16,20 @@ class PhraseAlignment { protected: - int targetPhraseId, sourcePhraseId; + PHRASE phraseS; + PHRASE phraseT; void createAlignVec(size_t sourceSize, size_t targetSize); public: float count; std::vector< std::set > alignedToT; std::vector< std::set > alignedToS; - - PhraseAlignment(); void create( char*, int ); void clear(); bool equals( const PhraseAlignment& ); bool match( const PhraseAlignment& ); - int GetTarget() const - { return targetPhraseId; } - int GetSource() const - { return sourcePhraseId; } - + const PHRASE &GetSource() const { return phraseS; } + const PHRASE &GetTarget() const { return phraseT; } }; diff --git a/scripts/training/phrase-extract/score.cpp b/scripts/training/phrase-extract/score.cpp index 116dc7bc..7c874909 100644 --- a/scripts/training/phrase-extract/score.cpp +++ b/scripts/training/phrase-extract/score.cpp @@ -61,13 +61,11 @@ void computeCountOfCounts( char* fileNameExtract, int maxLines ); void processPhrasePairs( vector< PhraseAlignment > & ); PhraseAlignment* findBestAlignment( vector< PhraseAlignment* > & ); void outputPhrasePair( vector< PhraseAlignment * > &, float ); -double computeLexicalTranslation( PHRASE &, PHRASE &, PhraseAlignment * ); +double computeLexicalTranslation( const PHRASE &, const PHRASE &, PhraseAlignment * ); ofstream phraseTableFile; LexicalTable lexTable; -PhraseTable phraseTableT; -PhraseTable phraseTableS; bool inverseFlag = false; bool hierarchicalFlag = false; bool wordAlignmentFlag = false; @@ -166,7 +164,6 @@ int main(int argc, char* argv[]) } // loop through all extracted phrase translations - int lastSource = -1; float lastCount = 0.0f; vector< PhraseAlignment > phrasePairsWithSameF; int i=0; @@ -196,27 +193,20 @@ int main(int argc, char* argv[]) if (lastPhrasePair != NULL && lastPhrasePair->equals( phrasePair )) { lastPhrasePair->count += phrasePair.count; - phrasePair.clear(); continue; } // if new source phrase, process last batch - if (lastSource >= 0 && lastSource != phrasePair.GetSource()) { + if (lastPhrasePair != NULL && + lastPhrasePair->GetSource() != phrasePair.GetSource()) { processPhrasePairs( phrasePairsWithSameF ); - for(int j=0;jGetSource() != lastPhrasePair->GetSource()) - { - phraseTableT.clear(); // these would get too big - phraseTableS.clear(); // these would get too big - // process line again, since phrase tables flushed - phrasePair->clear(); - phrasePair->create( line, lineNum ); - } - int count = lastPhrasePair->count + 0.99999; if(count <= GT_MAX) countOfCounts[ count ]++; @@ -382,8 +362,8 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount count += phrasePair[i]->count; } - PHRASE phraseS = phraseTableS.getPhrase( phrasePair[0]->GetSource() ); - PHRASE phraseT = phraseTableT.getPhrase( phrasePair[0]->GetTarget() ); + const PHRASE &phraseS = phrasePair[0]->GetSource(); + const PHRASE &phraseT = phrasePair[0]->GetTarget(); // labels (if hierarchical) @@ -470,7 +450,7 @@ void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount phraseTableFile << endl; } -double computeLexicalTranslation( PHRASE &phraseS, PHRASE &phraseT, PhraseAlignment *alignment ) { +double computeLexicalTranslation( const PHRASE &phraseS, const PHRASE &phraseT, PhraseAlignment *alignment ) { // lexical translation probability double lexScore = 1.0; int null = vcbS.getWordID("NULL");