diff --git a/moses-chart/src/ChartManager.cpp b/moses-chart/src/ChartManager.cpp index 9048b46c..57fb3b2e 100644 --- a/moses-chart/src/ChartManager.cpp +++ b/moses-chart/src/ChartManager.cpp @@ -43,17 +43,28 @@ namespace MosesChart Manager::Manager(InputType const& source, const TranslationSystem* system) :m_source(source) ,m_hypoStackColl(source, *this) -,m_transOptColl(source, system, m_hypoStackColl) +,m_transOptColl(source, system, m_hypoStackColl, m_ruleLookupManagers) ,m_system(system) ,m_start(clock()) - { m_system->InitializeBeforeSentenceProcessing(source); + const std::vector &dictionaries = m_system->GetPhraseDictionaries(); + m_ruleLookupManagers.reserve(dictionaries.size()); + for (std::vector::const_iterator p = dictionaries.begin(); + p != dictionaries.end(); ++p) + { + PhraseDictionaryFeature *pdf = *p; + const PhraseDictionary *dict = pdf->GetDictionary(); + PhraseDictionary *nonConstDict = const_cast(dict); + m_ruleLookupManagers.push_back(nonConstDict->CreateRuleLookupManager(source, m_hypoStackColl)); + } } Manager::~Manager() { m_system->CleanUpAfterSentenceProcessing(); + + RemoveAllInColl(m_ruleLookupManagers); clock_t end = clock(); float et = (end - m_start); diff --git a/moses-chart/src/ChartManager.h b/moses-chart/src/ChartManager.h index 58150713..6e5b1091 100644 --- a/moses-chart/src/ChartManager.h +++ b/moses-chart/src/ChartManager.h @@ -30,6 +30,7 @@ #include "../../moses/src/TrellisPathList.h" #include "../../moses/src/SentenceStats.h" #include "../../moses/src/TranslationSystem.h" +#include "../../moses/src/ChartRuleLookupManager.h" namespace MosesChart { @@ -46,6 +47,7 @@ class Manager std::auto_ptr m_sentenceStats; const Moses::TranslationSystem* m_system; clock_t m_start; /**< starting time, used for logging */ + std::vector m_ruleLookupManagers; public: Manager(Moses::InputType const& source, const Moses::TranslationSystem* system); diff --git a/moses-chart/src/ChartTranslationOptionCollection.cpp b/moses-chart/src/ChartTranslationOptionCollection.cpp index 61900ff3..f110a7e7 100644 --- a/moses-chart/src/ChartTranslationOptionCollection.cpp +++ b/moses-chart/src/ChartTranslationOptionCollection.cpp @@ -37,12 +37,14 @@ namespace MosesChart TranslationOptionCollection::TranslationOptionCollection(InputType const& source , const Moses::TranslationSystem* system - , const ChartCellCollection &hypoStackColl) + , const ChartCellCollection &hypoStackColl + , const std::vector &ruleLookupManagers) :m_source(source) ,m_system(system) ,m_decodeGraphList(system->GetDecodeGraphs()) ,m_hypoStackColl(hypoStackColl) ,m_collection(source.GetSize()) +,m_ruleLookupManagers(ruleLookupManagers) { // create 2-d vector size_t size = source.GetSize(); @@ -76,14 +78,21 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange( size_t startPos , size_t endPos) { + ChartTranslationOptionList &chartRuleColl = GetTranslationOptionList(startPos, endPos); + const WordsRange &wordsRange = chartRuleColl.GetSourceRange(); + + assert(m_decodeGraphList.size() == m_ruleLookupManagers.size()); std::vector ::const_iterator iterDecodeGraph; - for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph) + std::vector ::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin(); + for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) { const DecodeGraph &decodeGraph = **iterDecodeGraph; + assert(decodeGraph.GetSize() == 1); + ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers; size_t maxSpan = decodeGraph.GetMaxChartSpan(); if (maxSpan == 0 || (endPos-startPos+1) <= maxSpan) { - CreateTranslationOptionsForRange(decodeGraph, startPos, endPos, true); + ruleLookupManager.GetChartRuleCollection(wordsRange, true, chartRuleColl); } } @@ -104,18 +113,22 @@ void TranslationOptionCollection::ProcessUnknownWord(size_t startPos, size_t end } ChartTranslationOptionList &fullList = GetTranslationOptionList(startPos, startPos); + const WordsRange &wordsRange = fullList.GetSourceRange(); // try to translation for coverage with no trans by expanding table limit std::vector ::const_iterator iterDecodeGraph; - for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph) + std::vector ::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin(); + for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) { const DecodeGraph &decodeGraph = **iterDecodeGraph; + ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers; size_t numTransOpt = fullList.GetSize(); if (numTransOpt == 0) { - CreateTranslationOptionsForRange(decodeGraph, startPos, startPos, false); + ruleLookupManager.GetChartRuleCollection(wordsRange, false, fullList); } } + assert(iterRuleLookupManagers == m_ruleLookupManagers.end()); bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption(); // create unknown words for 1 word coverage where we don't have any trans options @@ -124,33 +137,6 @@ void TranslationOptionCollection::ProcessUnknownWord(size_t startPos, size_t end } -void TranslationOptionCollection::CreateTranslationOptionsForRange( - const DecodeGraph &decodeGraph - , size_t startPos - , size_t endPos - , bool adhereTableLimit) -{ - assert(decodeGraph.GetSize() == 1); - const DecodeStep &decodeStep = **decodeGraph.begin(); - - // get wordsrange that doesn't go away until after sentence processing - const WordsRange &wordsRange = GetTranslationOptionList(startPos, endPos).GetSourceRange(); - - ChartTranslationOptionList &translationOptionList = GetTranslationOptionList(startPos, endPos); - const PhraseDictionary* phraseDictionary = - decodeStep.GetPhraseDictionaryFeature()->GetDictionary(); - //cerr << phraseDictionary.GetScoreProducerDescription() << endl; - - ChartTranslationOptionList &chartRuleCollection = GetTranslationOptionList(startPos, endPos); - - phraseDictionary->GetChartRuleCollection(chartRuleCollection - , m_source - , wordsRange - , adhereTableLimit - , m_hypoStackColl); - //cerr << "chartRuleCollection size=" << chartRuleCollection->GetSize(); -} - ChartTranslationOptionList &TranslationOptionCollection::GetTranslationOptionList(size_t startPos, size_t endPos) { size_t sizeVec = m_collection[startPos].size(); diff --git a/moses-chart/src/ChartTranslationOptionCollection.h b/moses-chart/src/ChartTranslationOptionCollection.h index ab1fc79a..876d1872 100644 --- a/moses-chart/src/ChartTranslationOptionCollection.h +++ b/moses-chart/src/ChartTranslationOptionCollection.h @@ -25,6 +25,7 @@ #include "../../moses/src/InputType.h" #include "../../moses/src/DecodeGraph.h" #include "../../moses/src/ChartTranslationOptionList.h" +#include "../../moses/src/ChartRuleLookupManager.h" namespace Moses { @@ -47,17 +48,13 @@ class TranslationOptionCollection const Moses::TranslationSystem* m_system; std::vector m_decodeGraphList; const ChartCellCollection &m_hypoStackColl; + const std::vector &m_ruleLookupManagers; std::vector< std::vector< Moses::ChartTranslationOptionList > > m_collection; /*< contains translation options */ std::vector m_unksrcs; std::list m_cacheTargetPhrase; std::list* > m_cachedWordsConsumed; - virtual void CreateTranslationOptionsForRange(const Moses::DecodeGraph& decodeGraph, - size_t startPosition - , size_t endPosition - , bool adhereTableLimit); - // for adding 1 trans opt in unknown word proc void Add(Moses::ChartTranslationOption *transOpt, size_t pos); @@ -82,9 +79,9 @@ class TranslationOptionCollection public: TranslationOptionCollection(Moses::InputType const& source , const Moses::TranslationSystem* system - , const ChartCellCollection &hypoStackColl); + , const ChartCellCollection &hypoStackColl + , const std::vector &ruleLookupManagers); virtual ~TranslationOptionCollection(); - //virtual void CreateTranslationOptions(const std::vector &decodeGraphList); void CreateTranslationOptionsForRange(size_t startPos , size_t endPos); diff --git a/moses/src/ChartRuleLookupManager.h b/moses/src/ChartRuleLookupManager.h new file mode 100644 index 00000000..27514d37 --- /dev/null +++ b/moses/src/ChartRuleLookupManager.h @@ -0,0 +1,67 @@ +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once +#ifndef moses_ChartRuleLookupManager_h +#define moses_ChartRuleLookupManager_h + +#include "CellCollection.h" +#include "InputType.h" + +namespace Moses +{ + +class ChartTranslationOptionList; +class WordsRange; + +// Defines an interface for looking up rules in a rule table. Concrete +// implementation classes should correspond to specific PhraseDictionary +// subclasses (memory or on-disk). Since a ChartRuleLookupManager object +// maintains sentence-specific state, exactly one should be created for +// each sentence that is to be decoded. +class ChartRuleLookupManager +{ + public: + ChartRuleLookupManager(const InputType &sentence, + const CellCollection &cellColl) + : m_sentence(sentence) + , m_cellCollection(cellColl) {} + + virtual ~ChartRuleLookupManager() {} + + const InputType &GetSentence() const { return m_sentence; } + const CellCollection &GetCellCollection() const { return m_cellCollection; } + + virtual void GetChartRuleCollection( + const WordsRange &range, + bool adhereTableLimit, + ChartTranslationOptionList &outColl) = 0; + + private: + // Non-copyable: copy constructor and assignment operator not implemented. + ChartRuleLookupManager(const ChartRuleLookupManager &); + ChartRuleLookupManager &operator=(const ChartRuleLookupManager &); + + const InputType &m_sentence; + const CellCollection &m_cellCollection; +}; + +} // namespace Moses + +#endif diff --git a/moses/src/PhraseDictionarySCFGChart.cpp b/moses/src/ChartRuleLookupManagerMemory.cpp similarity index 72% rename from moses/src/PhraseDictionarySCFGChart.cpp rename to moses/src/ChartRuleLookupManagerMemory.cpp index 2e114b3f..c5136fcd 100644 --- a/moses/src/PhraseDictionarySCFGChart.cpp +++ b/moses/src/ChartRuleLookupManagerMemory.cpp @@ -1,45 +1,72 @@ -// $Id$ -// vim:tabstop=2 /*********************************************************************** - Moses - factored phrase-based language decoder - Copyright (C) 2010 Hieu Hoang - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Moses - factored phrase-based language decoder + Copyright (C) 2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ +#include "ChartRuleLookupManagerMemory.h" + #include "PhraseDictionarySCFG.h" -#include "FactorCollection.h" #include "InputType.h" #include "ChartTranslationOptionList.h" #include "CellCollection.h" #include "DotChart.h" #include "StaticData.h" -#include "TreeInput.h" - -using namespace std; -using namespace Moses; - -void PhraseDictionarySCFG::GetChartRuleCollection(ChartTranslationOptionList &outColl - ,InputType const& src - ,WordsRange const& range - ,bool adhereTableLimit - ,const CellCollection &cellColl) const -{ - size_t relEndPos = range.GetEndPos() - range.GetStartPos(); - size_t absEndPos = range.GetEndPos(); - +#include "NonTerminal.h" + +namespace Moses +{ + +ChartRuleLookupManagerMemory::ChartRuleLookupManagerMemory( + const InputType &src, + const CellCollection &cellColl, + const PhraseDictionarySCFG &ruleTable) + : ChartRuleLookupManager(src, cellColl) + , m_ruleTable(ruleTable) +{ + assert(m_processedRuleColls.size() == 0); + size_t sourceSize = src.GetSize(); + m_processedRuleColls.resize(sourceSize); + + const PhraseDictionaryNodeSCFG &rootNode = m_ruleTable.GetRootNode(); + + for (size_t ind = 0; ind < m_processedRuleColls.size(); ++ind) + { + ProcessedRule *initProcessedRule = new ProcessedRule(rootNode); + + ProcessedRuleColl *processedRuleColl = new ProcessedRuleColl(sourceSize - ind + 1); + processedRuleColl->Add(0, initProcessedRule); // init rule. stores the top node in tree + + m_processedRuleColls[ind] = processedRuleColl; + } +} + +ChartRuleLookupManagerMemory::~ChartRuleLookupManagerMemory() +{ + RemoveAllInColl(m_processedRuleColls); +} + +void ChartRuleLookupManagerMemory::GetChartRuleCollection( + const WordsRange &range, + bool adhereTableLimit, + ChartTranslationOptionList &outColl) +{ + size_t relEndPos = range.GetEndPos() - range.GetStartPos(); + size_t absEndPos = range.GetEndPos(); + // MAIN LOOP. create list of nodes of target phrases ProcessedRuleColl &processedRuleCol = *m_processedRuleColls[range.GetStartPos()]; @@ -56,7 +83,7 @@ void PhraseDictionarySCFG::GetChartRuleCollection(ChartTranslationOptionList &ou // search for terminal symbol if (startPos == absEndPos) { - const Word &sourceWord = src.GetWord(absEndPos); + const Word &sourceWord = GetSentence().GetWord(absEndPos); const PhraseDictionaryNodeSCFG *node = prevNode.GetChild(sourceWord); if (node != NULL) { @@ -84,10 +111,10 @@ void PhraseDictionarySCFG::GetChartRuleCollection(ChartTranslationOptionList &ou } const NonTerminalSet &sourceNonTerms = - src.GetLabelSet(startPos, endPos); + GetSentence().GetLabelSet(startPos, endPos); const NonTerminalSet &targetNonTerms = - cellColl.GetHeadwords(WordsRange(startPos, endPos)); + GetCellCollection().GetHeadwords(WordsRange(startPos, endPos)); ExtendPartialRuleApplication(prevNode, prevWordConsumed, startPos, endPos, stackInd, sourceNonTerms, @@ -120,7 +147,7 @@ void PhraseDictionarySCFG::GetChartRuleCollection(ChartTranslationOptionList &ou // source and target non-terminals covering the span [startPos, endPos], // determines the full or partial rule applications that can be produced through // extending the current rule application by a single non-terminal. -void PhraseDictionarySCFG::ExtendPartialRuleApplication( +void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication( const PhraseDictionaryNodeSCFG & node, const WordConsumed *prevWordConsumed, size_t startPos, @@ -128,7 +155,7 @@ void PhraseDictionarySCFG::ExtendPartialRuleApplication( size_t stackInd, const NonTerminalSet & sourceNonTerms, const NonTerminalSet & targetNonTerms, - ProcessedRuleColl & processedRuleColl) const + ProcessedRuleColl & processedRuleColl) { const PhraseDictionaryNodeSCFG::NonTerminalMap & nonTermMap = node.GetNonTerminalMap(); @@ -202,3 +229,5 @@ void PhraseDictionarySCFG::ExtendPartialRuleApplication( } } } + +} // namespace Moses diff --git a/moses/src/ChartRuleLookupManagerMemory.h b/moses/src/ChartRuleLookupManagerMemory.h new file mode 100644 index 00000000..c6d2a481 --- /dev/null +++ b/moses/src/ChartRuleLookupManagerMemory.h @@ -0,0 +1,71 @@ +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once +#ifndef moses_ChartRuleLookupManagerMemory_h +#define moses_ChartRuleLookupManagerMemory_h + +#include + +#include "ChartRuleLookupManager.h" +#include "NonTerminal.h" +#include "PhraseDictionaryNodeSCFG.h" +#include "PhraseDictionarySCFG.h" + +namespace Moses +{ + +class ChartTranslationOptionList; +class ProcessedRuleColl; +class WordsRange; +class WordConsumed; + +// Implementation of ChartRuleLookupManager for in-memory rule tables. +class ChartRuleLookupManagerMemory : public ChartRuleLookupManager +{ + public: + ChartRuleLookupManagerMemory(const InputType &sentence, + const CellCollection &cellColl, + const PhraseDictionarySCFG &ruleTable); + + ~ChartRuleLookupManagerMemory(); + + virtual void GetChartRuleCollection( + const WordsRange &range, + bool adhereTableLimit, + ChartTranslationOptionList &outColl); + + private: + void ExtendPartialRuleApplication( + const PhraseDictionaryNodeSCFG &node, + const WordConsumed *prevWordConsumed, + size_t startPos, + size_t endPos, + size_t stackInd, + const NonTerminalSet &sourceNonTerms, + const NonTerminalSet &targetNonTerms, + ProcessedRuleColl &processedRuleColl); + + std::vector m_processedRuleColls; + const PhraseDictionarySCFG &m_ruleTable; +}; + +} // namespace Moses + +#endif diff --git a/moses/src/ChartRuleLookupManagerOnDisk.cpp b/moses/src/ChartRuleLookupManagerOnDisk.cpp new file mode 100644 index 00000000..da18df08 --- /dev/null +++ b/moses/src/ChartRuleLookupManagerOnDisk.cpp @@ -0,0 +1,311 @@ +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#include "ChartRuleLookupManagerOnDisk.h" + +#include + +#include "PhraseDictionaryOnDisk.h" +#include "StaticData.h" +#include "DotChartOnDisk.h" +#include "CellCollection.h" +#include "ChartTranslationOptionList.h" +#include "../../OnDiskPt/src/TargetPhraseCollection.h" + +using namespace std; + +namespace Moses +{ + +ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk( + const InputType &sentence, + const CellCollection &cellColl, + const PhraseDictionaryOnDisk &dictionary, + OnDiskPt::OnDiskWrapper &dbWrapper, + const LMList *languageModels, + const WordPenaltyProducer *wpProducer, + const std::vector &inputFactorsVec, + const std::vector &outputFactorsVec, + const std::vector &weight, + const std::string &filePath) + : ChartRuleLookupManager(sentence, cellColl) + , m_dictionary(dictionary) + , m_dbWrapper(dbWrapper) + , m_languageModels(languageModels) + , m_wpProducer(wpProducer) + , m_inputFactorsVec(inputFactorsVec) + , m_outputFactorsVec(outputFactorsVec) + , m_weight(weight) + , m_filePath(filePath) +{ + assert(m_runningNodesVec.size() == 0); + size_t sourceSize = sentence.GetSize(); + m_runningNodesVec.resize(sourceSize); + + for (size_t ind = 0; ind < m_runningNodesVec.size(); ++ind) + { + ProcessedRuleOnDisk *initProcessedRule = new ProcessedRuleOnDisk(m_dbWrapper.GetRootSourceNode()); + + ProcessedRuleStackOnDisk *processedStack = new ProcessedRuleStackOnDisk(sourceSize - ind + 1); + processedStack->Add(0, initProcessedRule); // init rule. stores the top node in tree + + m_runningNodesVec[ind] = processedStack; + } +} + +ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk() +{ + std::map::const_iterator iterCache; + for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) + { + delete iterCache->second; + } + m_cache.clear(); + + RemoveAllInColl(m_runningNodesVec); + RemoveAllInColl(m_sourcePhraseNode); +} + +void ChartRuleLookupManagerOnDisk::GetChartRuleCollection( + const WordsRange &range, + bool adhereTableLimit, + ChartTranslationOptionList &outColl) +{ + const StaticData &staticData = StaticData::Instance(); + size_t rulesLimit = StaticData::Instance().GetRuleLimit(); + + size_t relEndPos = range.GetEndPos() - range.GetStartPos(); + size_t absEndPos = range.GetEndPos(); + + // MAIN LOOP. create list of nodes of target phrases + ProcessedRuleStackOnDisk &runningNodes = *m_runningNodesVec[range.GetStartPos()]; + + // sort save nodes so only do nodes with most counts + runningNodes.SortSavedNodes(); + size_t numDerivations = 0 + ,maxDerivations = 999999; // staticData.GetMaxDerivations(); + bool overThreshold = true; + + const ProcessedRuleStackOnDisk::SavedNodeColl &savedNodeColl = runningNodes.GetSavedNodeColl(); + //cerr << "savedNodeColl=" << savedNodeColl.size() << " "; + + for (size_t ind = 0; ind < (savedNodeColl.size()) && ((numDerivations < maxDerivations) || overThreshold) ; ++ind) + { + const SavedNodeOnDisk &savedNode = *savedNodeColl[ind]; + + const ProcessedRuleOnDisk &prevProcessedRule = savedNode.GetProcessedRule(); + const OnDiskPt::PhraseNode &prevNode = prevProcessedRule.GetLastNode(); + const WordConsumed *prevWordConsumed = prevProcessedRule.GetLastWordConsumed(); + size_t startPos = (prevWordConsumed == NULL) ? range.GetStartPos() : prevWordConsumed->GetWordsRange().GetEndPos() + 1; + + // search for terminal symbol + if (startPos == absEndPos) + { + const Word &sourceWord = GetSentence().GetWord(absEndPos); + OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceWord); + + if (sourceWordBerkeleyDb != NULL) + { + const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper); + if (node != NULL) + { + // TODO figure out why source word is needed from node, not from sentence + // prob to do with factors or non-term + //const Word &sourceWord = node->GetSourceWord(); + WordConsumed *newWordConsumed = new WordConsumed(absEndPos, absEndPos + , sourceWord + , prevWordConsumed); + ProcessedRuleOnDisk *processedRule = new ProcessedRuleOnDisk(*node, newWordConsumed); + runningNodes.Add(relEndPos+1, processedRule); + + // cache for cleanup + m_sourcePhraseNode.push_back(node); + } + + delete sourceWordBerkeleyDb; + } + } + + // search for non-terminals + size_t endPos, stackInd; + if (startPos > absEndPos) + continue; + else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) + { // start. + endPos = absEndPos - 1; + stackInd = relEndPos; + } + else + { + endPos = absEndPos; + stackInd = relEndPos + 1; + } + + size_t nonTermNumWordsCovered = endPos - startPos + 1; + + // get target headwords in this span from chart + const NonTerminalSet &headWords = GetCellCollection().GetHeadwords(WordsRange(startPos, endPos)); + + const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal() + ,&defaultTargetNonTerm = staticData.GetOutputDefaultNonTerminal(); + + // go through each SOURCE lhs + const NonTerminalSet &sourceLHSSet = GetSentence().GetLabelSet(startPos, endPos); + + NonTerminalSet::const_iterator iterSourceLHS; + for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) + { + const Word &sourceLHS = *iterSourceLHS; + + OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS); + + if (sourceLHSBerkeleyDb == NULL) + { + delete sourceLHSBerkeleyDb; + continue; // vocab not in pt. node definately won't be in there + } + + const OnDiskPt::PhraseNode *sourceNode = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); + delete sourceLHSBerkeleyDb; + + if (sourceNode == NULL) + continue; // didn't find source node + + // go through each TARGET lhs + NonTerminalSet::const_iterator iterTargetLHS; + for (iterTargetLHS = headWords.begin(); iterTargetLHS != headWords.end(); ++iterTargetLHS) + { + const Word &targetLHS = *iterTargetLHS; + + //cerr << sourceLHS << " " << defaultSourceNonTerm << " " << targetLHS << " " << defaultTargetNonTerm << endl; + + //bool isSyntaxNonTerm = (sourceLHS != defaultSourceNonTerm) || (targetLHS != defaultTargetNonTerm); + bool doSearch = true; //isSyntaxNonTerm ? nonTermNumWordsCovered <= maxSyntaxSpan : + // nonTermNumWordsCovered <= maxDefaultSpan; + + if (doSearch) + { + + OnDiskPt::Word *targetLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Output, m_outputFactorsVec, targetLHS); + + if (targetLHSBerkeleyDb == NULL) + continue; + + const OnDiskPt::PhraseNode *node = sourceNode->GetChild(*targetLHSBerkeleyDb, m_dbWrapper); + delete targetLHSBerkeleyDb; + + if (node == NULL) + continue; + + // found matching entry + //const Word &sourceWord = node->GetSourceWord(); + WordConsumed *newWordConsumed = new WordConsumed(startPos, endPos + , targetLHS + , prevWordConsumed); + + ProcessedRuleOnDisk *processedRule = new ProcessedRuleOnDisk(*node, newWordConsumed); + runningNodes.Add(stackInd, processedRule); + + m_sourcePhraseNode.push_back(node); + } + } // for (iterHeadWords + + delete sourceNode; + + } // for (iterLabelListf + + // return list of target phrases + ProcessedRuleCollOnDisk &nodes = runningNodes.Get(relEndPos + 1); + + // source LHS + ProcessedRuleCollOnDisk::const_iterator iterProcessedRuleColl; + for (iterProcessedRuleColl = nodes.begin(); iterProcessedRuleColl != nodes.end(); ++iterProcessedRuleColl) + { + // node of last source word + const ProcessedRuleOnDisk &prevProcessedRule = **iterProcessedRuleColl; + if (prevProcessedRule.Done()) + continue; + prevProcessedRule.Done(true); + + const WordConsumed *wordConsumed = prevProcessedRule.GetLastWordConsumed(); + assert(wordConsumed); + + const OnDiskPt::PhraseNode &prevNode = prevProcessedRule.GetLastNode(); + + //get node for each source LHS + const NonTerminalSet &lhsSet = GetSentence().GetLabelSet(range.GetStartPos(), range.GetEndPos()); + NonTerminalSet::const_iterator iterLabelSet; + for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) + { + const Word &sourceLHS = *iterLabelSet; + + OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS); + if (sourceLHSBerkeleyDb == NULL) + continue; + + const TargetPhraseCollection *targetPhraseCollection = NULL; + const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); + if (node) + { + UINT64 tpCollFilePos = node->GetValue(); + std::map::const_iterator iterCache = m_cache.find(tpCollFilePos); + if (iterCache == m_cache.end()) + { // not in case + overThreshold = node->GetCount(0) > staticData.GetRuleCountThreshold(); + //cerr << node->GetCount(0) << " "; + + const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper); + + targetPhraseCollection + = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec + ,m_outputFactorsVec + ,m_dictionary + ,m_weight + ,m_wpProducer + ,*m_languageModels + ,m_filePath + , m_dbWrapper.GetVocab()); + + delete tpcollBerkeleyDb; + m_cache[tpCollFilePos] = targetPhraseCollection; + } + else + { // jsut get out of cache + targetPhraseCollection = iterCache->second; + } + + assert(targetPhraseCollection); + outColl.Add(*targetPhraseCollection, *wordConsumed, adhereTableLimit, rulesLimit); + + numDerivations++; + + } // if (node) + + delete node; + delete sourceLHSBerkeleyDb; + } + } + } // for (size_t ind = 0; ind < savedNodeColl.size(); ++ind) + + outColl.CreateChartRules(rulesLimit); + + //cerr << numDerivations << " "; +} + +} // namespace Moses diff --git a/moses/src/ChartRuleLookupManagerOnDisk.h b/moses/src/ChartRuleLookupManagerOnDisk.h new file mode 100644 index 00000000..b2344f4d --- /dev/null +++ b/moses/src/ChartRuleLookupManagerOnDisk.h @@ -0,0 +1,73 @@ +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2011 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once +#ifndef moses_ChartRuleLookupManagerOnDisk_h +#define moses_ChartRuleLookupManagerOnDisk_h + +#include "../../OnDiskPt/src/OnDiskWrapper.h" + +#include "CellCollection.h" +#include "ChartRuleLookupManager.h" +#include "ChartTranslationOptionList.h" +#include "DotChartOnDisk.h" +#include "InputType.h" +#include "PhraseDictionaryOnDisk.h" + +namespace Moses +{ + +// Implementation of ChartRuleLookupManager for on-disk rule tables. +class ChartRuleLookupManagerOnDisk : public ChartRuleLookupManager +{ + public: + ChartRuleLookupManagerOnDisk(const InputType &sentence, + const CellCollection &cellColl, + const PhraseDictionaryOnDisk &dictionary, + OnDiskPt::OnDiskWrapper &dbWrapper, + const LMList *languageModels, + const WordPenaltyProducer *wpProducer, + const std::vector &inputFactorsVec, + const std::vector &outputFactorsVec, + const std::vector &weight, + const std::string &filePath); + + ~ChartRuleLookupManagerOnDisk(); + + virtual void GetChartRuleCollection(const WordsRange &range, + bool adhereTableLimit, + ChartTranslationOptionList &outColl); + + private: + const PhraseDictionaryOnDisk &m_dictionary; + OnDiskPt::OnDiskWrapper &m_dbWrapper; + const LMList *m_languageModels; + const WordPenaltyProducer *m_wpProducer; + const std::vector &m_inputFactorsVec; + const std::vector &m_outputFactorsVec; + const std::vector &m_weight; + const std::string &m_filePath; + std::vector m_runningNodesVec; + std::map m_cache; + std::list m_sourcePhraseNode; +}; + +} // namespace Moses + +#endif diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am index 1fe7b731..7c511076 100644 --- a/moses/src/Makefile.am +++ b/moses/src/Makefile.am @@ -8,6 +8,9 @@ libmoses_la_HEADERS = \ BilingualDynSuffixArray.h \ BitmapContainer.h \ CellCollection.h \ + ChartRuleLookupManager.h \ + ChartRuleLookupManagerMemory.h \ + ChartRuleLookupManagerOnDisk.h \ ChartTranslationOption.h \ ChartTranslationOptionList.h \ ConfusionNet.h \ @@ -137,6 +140,8 @@ libmoses_la_SOURCES = \ AlignmentInfo.cpp \ BilingualDynSuffixArray.cpp \ BitmapContainer.cpp \ + ChartRuleLookupManagerMemory.cpp \ + ChartRuleLookupManagerOnDisk.cpp \ ChartTranslationOption.cpp \ ChartTranslationOptionList.cpp \ ConfusionNet.cpp \ @@ -192,11 +197,9 @@ libmoses_la_SOURCES = \ PhraseDictionaryDynSuffixArray.cpp \ PhraseDictionaryMemory.cpp \ PhraseDictionarySCFG.cpp \ - PhraseDictionarySCFGChart.cpp \ PhraseDictionaryNode.cpp \ PhraseDictionaryNodeSCFG.cpp \ PhraseDictionaryOnDisk.cpp \ - PhraseDictionaryOnDiskChart.cpp \ PhraseDictionaryTree.cpp \ PhraseDictionaryTreeAdaptor.cpp \ PrefixTreeMap.cpp \ diff --git a/moses/src/PhraseDictionary.h b/moses/src/PhraseDictionary.h index 25795ce9..77dcd7dc 100644 --- a/moses/src/PhraseDictionary.h +++ b/moses/src/PhraseDictionary.h @@ -48,8 +48,10 @@ class WordsRange; class ChartTranslationOptionList; class CellCollection; class TranslationSystem; +class ChartRuleLookupManager; class PhraseDictionaryFeature; + /** * Abstract base class for phrase dictionaries (tables). **/ @@ -70,11 +72,10 @@ class PhraseDictionary: public Dictionary { virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0; virtual void InitializeForInput(InputType const& source) = 0; - virtual void GetChartRuleCollection(ChartTranslationOptionList &outColl - , InputType const& src - , WordsRange const& range - , bool adhereTableLimit - , const CellCollection &cellColl) const=0; + //! Create a sentence-specific manager for SCFG rule lookup. + virtual ChartRuleLookupManager *CreateRuleLookupManager( + const InputType &, + const CellCollection &) = 0; protected: size_t m_tableLimit; diff --git a/moses/src/PhraseDictionaryDynSuffixArray.cpp b/moses/src/PhraseDictionaryDynSuffixArray.cpp index e0704227..f4b87f80 100644 --- a/moses/src/PhraseDictionaryDynSuffixArray.cpp +++ b/moses/src/PhraseDictionaryDynSuffixArray.cpp @@ -66,14 +66,6 @@ const TargetPhraseCollection *PhraseDictionaryDynSuffixArray::GetTargetPhraseCol return ret; } -void PhraseDictionaryDynSuffixArray::GetChartRuleCollection(ChartTranslationOptionList &outColl - ,InputType const& src - ,WordsRange const& range - ,bool adhereTableLimit - ,const CellCollection &cellColl) const -{ - assert(false); -} void PhraseDictionaryDynSuffixArray::insertSnt(string& source, string& target, string& alignment) { m_biSA->addSntPair(source, target, alignment); } @@ -81,4 +73,9 @@ void PhraseDictionaryDynSuffixArray::deleteSnt(unsigned idx, unsigned num2Del) { // need to implement -- } +ChartRuleLookupManager *PhraseDictionaryDynSuffixArray::CreateRuleLookupManager(const InputType&, const CellCollection&) { + assert(false); + return 0; +} + }// end namepsace diff --git a/moses/src/PhraseDictionaryDynSuffixArray.h b/moses/src/PhraseDictionaryDynSuffixArray.h index 7a1664a3..dba53718 100644 --- a/moses/src/PhraseDictionaryDynSuffixArray.h +++ b/moses/src/PhraseDictionaryDynSuffixArray.h @@ -28,6 +28,7 @@ class PhraseDictionaryDynSuffixArray: public PhraseDictionary { void CleanUp(); void insertSnt(string&, string&, string&); void deleteSnt(unsigned, unsigned); + ChartRuleLookupManager *CreateRuleLookupManager(const InputType&, const CellCollection&); private: BilingualDynSuffixArray *m_biSA; std::vector m_weight; @@ -35,12 +36,6 @@ class PhraseDictionaryDynSuffixArray: public PhraseDictionary { const LMList *m_languageModels; float m_weightWP; - virtual void GetChartRuleCollection(ChartTranslationOptionList &outColl - ,InputType const& src - ,WordsRange const& range - ,bool adhereTableLimit - ,const CellCollection &cellColl) const; - }; diff --git a/moses/src/PhraseDictionaryMemory.h b/moses/src/PhraseDictionaryMemory.h index da48c871..506a94fc 100644 --- a/moses/src/PhraseDictionaryMemory.h +++ b/moses/src/PhraseDictionaryMemory.h @@ -62,15 +62,14 @@ class PhraseDictionaryMemory : public PhraseDictionary // for mert virtual void InitializeForInput(InputType const&) {/* Don't do anything source specific here as this object is shared between threads.*/} - - void GetChartRuleCollection(ChartTranslationOptionList &outColl - ,InputType const& /*src*/ - , WordsRange const& /*range*/ - , bool /*adhereTableLimit*/ - ,const CellCollection &/*cellColl*/) const - { - assert(false); - } + + virtual ChartRuleLookupManager *CreateRuleLookupManager( + const InputType &, + const CellCollection &) + { + assert(false); + return 0; + } TO_STRING(); diff --git a/moses/src/PhraseDictionaryOnDisk.cpp b/moses/src/PhraseDictionaryOnDisk.cpp index 58819594..659f184f 100644 --- a/moses/src/PhraseDictionaryOnDisk.cpp +++ b/moses/src/PhraseDictionaryOnDisk.cpp @@ -24,6 +24,7 @@ #include "StaticData.h" #include "TargetPhraseCollection.h" #include "DotChartOnDisk.h" +#include "ChartRuleLookupManagerOnDisk.h" using namespace std; @@ -85,33 +86,12 @@ void PhraseDictionaryOnDisk::AddEquivPhrase(const Phrase &source, TargetPhrase * void PhraseDictionaryOnDisk::InitializeForInput(const InputType& input) { - assert(m_runningNodesVec.size() == 0); - size_t sourceSize = input.GetSize(); - m_runningNodesVec.resize(sourceSize); - - for (size_t ind = 0; ind < m_runningNodesVec.size(); ++ind) - { - ProcessedRuleOnDisk *initProcessedRule = new ProcessedRuleOnDisk(m_dbWrapper.GetRootSourceNode()); - - ProcessedRuleStackOnDisk *processedStack = new ProcessedRuleStackOnDisk(sourceSize - ind + 1); - processedStack->Add(0, initProcessedRule); // init rule. stores the top node in tree - - m_runningNodesVec[ind] = processedStack; - } - + // Nothing to do: sentence-specific state is stored in ChartRuleLookupManager } void PhraseDictionaryOnDisk::CleanUp() { - std::map::const_iterator iterCache; - for (iterCache = m_cache.begin(); iterCache != m_cache.end(); ++iterCache) - { - delete iterCache->second; - } - m_cache.clear(); - - RemoveAllInColl(m_runningNodesVec); - RemoveAllInColl(m_sourcePhraseNode); + // Nothing to do: sentence-specific state is stored in ChartRuleLookupManager } void PhraseDictionaryOnDisk::LoadTargetLookup() @@ -119,6 +99,16 @@ void PhraseDictionaryOnDisk::LoadTargetLookup() // TODO } - +ChartRuleLookupManager *PhraseDictionaryOnDisk::CreateRuleLookupManager( + const InputType &sentence, + const CellCollection &cellCollection) +{ + return new ChartRuleLookupManagerOnDisk(sentence, cellCollection, *this, + m_dbWrapper, m_languageModels, + m_wpProducer, m_inputFactorsVec, + m_outputFactorsVec, m_weight, + m_filePath); +} + } diff --git a/moses/src/PhraseDictionaryOnDisk.h b/moses/src/PhraseDictionaryOnDisk.h index 3bae31c1..da5fa06a 100644 --- a/moses/src/PhraseDictionaryOnDisk.h +++ b/moses/src/PhraseDictionaryOnDisk.h @@ -42,19 +42,13 @@ class PhraseDictionaryOnDisk : public PhraseDictionary friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryOnDisk&); protected: + OnDiskPt::OnDiskWrapper m_dbWrapper; const LMList* m_languageModels; - const WordPenaltyProducer* m_wpProducer;; + const WordPenaltyProducer* m_wpProducer; std::vector m_inputFactorsVec, m_outputFactorsVec; std::vector m_weight; std::string m_filePath; - - mutable OnDiskPt::OnDiskWrapper m_dbWrapper; - - mutable std::map m_cache; - mutable std::list m_sourcePhraseNode; - mutable std::vector m_runningNodesVec; - void LoadTargetLookup(); public: @@ -86,18 +80,12 @@ class PhraseDictionaryOnDisk : public PhraseDictionary //! Create entry for translation of source to targetPhrase virtual void AddEquivPhrase(const Phrase &source, TargetPhrase *targetPhrase); - virtual void GetChartRuleCollection(ChartTranslationOptionList &outColl - ,InputType const& src - ,WordsRange const& range - ,bool adhereTableLimit - ,const CellCollection &cellColl) const; - void InitializeForInput(const InputType& input); void CleanUp(); + virtual ChartRuleLookupManager *CreateRuleLookupManager( + const InputType &, + const CellCollection &); }; - -}; - - +} // namespace Moses diff --git a/moses/src/PhraseDictionaryOnDiskChart.cpp b/moses/src/PhraseDictionaryOnDiskChart.cpp deleted file mode 100644 index b7eab649..00000000 --- a/moses/src/PhraseDictionaryOnDiskChart.cpp +++ /dev/null @@ -1,262 +0,0 @@ -// $Id$ -// vim:tabstop=2 -/*********************************************************************** - Moses - factored phrase-based language decoder - Copyright (C) 2010 Hieu Hoang - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - ***********************************************************************/ - -#include -#include "PhraseDictionaryOnDisk.h" -#include "StaticData.h" -#include "DotChartOnDisk.h" -#include "CellCollection.h" -#include "ChartTranslationOptionList.h" -#include "../../OnDiskPt/src/TargetPhraseCollection.h" - -using namespace std; - -namespace Moses -{ - void PhraseDictionaryOnDisk::GetChartRuleCollection(ChartTranslationOptionList &outColl - ,InputType const& src - , WordsRange const& range - ,bool adhereTableLimit - ,const CellCollection &cellColl) const - { - const StaticData &staticData = StaticData::Instance(); - size_t rulesLimit = StaticData::Instance().GetRuleLimit(); - - size_t relEndPos = range.GetEndPos() - range.GetStartPos(); - size_t absEndPos = range.GetEndPos(); - - // MAIN LOOP. create list of nodes of target phrases - ProcessedRuleStackOnDisk &runningNodes = *m_runningNodesVec[range.GetStartPos()]; - - // sort save nodes so only do nodes with most counts - runningNodes.SortSavedNodes(); - size_t numDerivations = 0 - ,maxDerivations = 999999; // staticData.GetMaxDerivations(); - bool overThreshold = true; - - const ProcessedRuleStackOnDisk::SavedNodeColl &savedNodeColl = runningNodes.GetSavedNodeColl(); - //cerr << "savedNodeColl=" << savedNodeColl.size() << " "; - - for (size_t ind = 0; ind < (savedNodeColl.size()) && ((numDerivations < maxDerivations) || overThreshold) ; ++ind) - { - const SavedNodeOnDisk &savedNode = *savedNodeColl[ind]; - - const ProcessedRuleOnDisk &prevProcessedRule = savedNode.GetProcessedRule(); - const OnDiskPt::PhraseNode &prevNode = prevProcessedRule.GetLastNode(); - const WordConsumed *prevWordConsumed = prevProcessedRule.GetLastWordConsumed(); - size_t startPos = (prevWordConsumed == NULL) ? range.GetStartPos() : prevWordConsumed->GetWordsRange().GetEndPos() + 1; - - // search for terminal symbol - if (startPos == absEndPos) - { - const Word &sourceWord = src.GetWord(absEndPos); - OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceWord); - - if (sourceWordBerkeleyDb != NULL) - { - const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper); - if (node != NULL) - { - // TODO figure out why source word is needed from node, not from sentence - // prob to do with factors or non-term - //const Word &sourceWord = node->GetSourceWord(); - WordConsumed *newWordConsumed = new WordConsumed(absEndPos, absEndPos - , sourceWord - , prevWordConsumed); - ProcessedRuleOnDisk *processedRule = new ProcessedRuleOnDisk(*node, newWordConsumed); - runningNodes.Add(relEndPos+1, processedRule); - - // cache for cleanup - m_sourcePhraseNode.push_back(node); - } - - delete sourceWordBerkeleyDb; - } - } - - // search for non-terminals - size_t endPos, stackInd; - if (startPos > absEndPos) - continue; - else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) - { // start. - endPos = absEndPos - 1; - stackInd = relEndPos; - } - else - { - endPos = absEndPos; - stackInd = relEndPos + 1; - } - - size_t nonTermNumWordsCovered = endPos - startPos + 1; - - // get target headwords in this span from chart - const NonTerminalSet &headWords = cellColl.GetHeadwords(WordsRange(startPos, endPos)); - - const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal() - ,&defaultTargetNonTerm = staticData.GetOutputDefaultNonTerminal(); - - // go through each SOURCE lhs - const NonTerminalSet &sourceLHSSet = src.GetLabelSet(startPos, endPos); - - NonTerminalSet::const_iterator iterSourceLHS; - for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) - { - const Word &sourceLHS = *iterSourceLHS; - - OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS); - - if (sourceLHSBerkeleyDb == NULL) - { - delete sourceLHSBerkeleyDb; - continue; // vocab not in pt. node definately won't be in there - } - - const OnDiskPt::PhraseNode *sourceNode = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); - delete sourceLHSBerkeleyDb; - - if (sourceNode == NULL) - continue; // didn't find source node - - // go through each TARGET lhs - NonTerminalSet::const_iterator iterTargetLHS; - for (iterTargetLHS = headWords.begin(); iterTargetLHS != headWords.end(); ++iterTargetLHS) - { - const Word &targetLHS = *iterTargetLHS; - - //cerr << sourceLHS << " " << defaultSourceNonTerm << " " << targetLHS << " " << defaultTargetNonTerm << endl; - - //bool isSyntaxNonTerm = (sourceLHS != defaultSourceNonTerm) || (targetLHS != defaultTargetNonTerm); - bool doSearch = true; //isSyntaxNonTerm ? nonTermNumWordsCovered <= maxSyntaxSpan : - // nonTermNumWordsCovered <= maxDefaultSpan; - - if (doSearch) - { - - OnDiskPt::Word *targetLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Output, m_outputFactorsVec, targetLHS); - - if (targetLHSBerkeleyDb == NULL) - continue; - - const OnDiskPt::PhraseNode *node = sourceNode->GetChild(*targetLHSBerkeleyDb, m_dbWrapper); - delete targetLHSBerkeleyDb; - - if (node == NULL) - continue; - - // found matching entry - //const Word &sourceWord = node->GetSourceWord(); - WordConsumed *newWordConsumed = new WordConsumed(startPos, endPos - , targetLHS - , prevWordConsumed); - - ProcessedRuleOnDisk *processedRule = new ProcessedRuleOnDisk(*node, newWordConsumed); - runningNodes.Add(stackInd, processedRule); - - m_sourcePhraseNode.push_back(node); - } - } // for (iterHeadWords - - delete sourceNode; - - } // for (iterLabelListf - - // return list of target phrases - ProcessedRuleCollOnDisk &nodes = runningNodes.Get(relEndPos + 1); - - // source LHS - ProcessedRuleCollOnDisk::const_iterator iterProcessedRuleColl; - for (iterProcessedRuleColl = nodes.begin(); iterProcessedRuleColl != nodes.end(); ++iterProcessedRuleColl) - { - // node of last source word - const ProcessedRuleOnDisk &prevProcessedRule = **iterProcessedRuleColl; - if (prevProcessedRule.Done()) - continue; - prevProcessedRule.Done(true); - - const WordConsumed *wordConsumed = prevProcessedRule.GetLastWordConsumed(); - assert(wordConsumed); - - const OnDiskPt::PhraseNode &prevNode = prevProcessedRule.GetLastNode(); - - //get node for each source LHS - const NonTerminalSet &lhsSet = src.GetLabelSet(range.GetStartPos(), range.GetEndPos()); - NonTerminalSet::const_iterator iterLabelSet; - for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) - { - const Word &sourceLHS = *iterLabelSet; - - OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS); - if (sourceLHSBerkeleyDb == NULL) - continue; - - const TargetPhraseCollection *targetPhraseCollection = NULL; - const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper); - if (node) - { - UINT64 tpCollFilePos = node->GetValue(); - std::map::const_iterator iterCache = m_cache.find(tpCollFilePos); - if (iterCache == m_cache.end()) - { // not in case - overThreshold = node->GetCount(0) > staticData.GetRuleCountThreshold(); - //cerr << node->GetCount(0) << " "; - - const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(GetTableLimit(), m_dbWrapper); - - targetPhraseCollection - = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec - ,m_outputFactorsVec - ,*this - ,m_weight - ,m_wpProducer - ,*m_languageModels - ,m_filePath - , m_dbWrapper.GetVocab()); - - delete tpcollBerkeleyDb; - m_cache[tpCollFilePos] = targetPhraseCollection; - } - else - { // jsut get out of cache - targetPhraseCollection = iterCache->second; - } - - assert(targetPhraseCollection); - outColl.Add(*targetPhraseCollection, *wordConsumed, adhereTableLimit, rulesLimit); - - numDerivations++; - - } // if (node) - - delete node; - delete sourceLHSBerkeleyDb; - } - } - } // for (size_t ind = 0; ind < savedNodeColl.size(); ++ind) - - outColl.CreateChartRules(rulesLimit); - - //cerr << numDerivations << " "; - } - -}; // namespace - diff --git a/moses/src/PhraseDictionarySCFG.cpp b/moses/src/PhraseDictionarySCFG.cpp index 38f476e9..3a3267ee 100644 --- a/moses/src/PhraseDictionarySCFG.cpp +++ b/moses/src/PhraseDictionarySCFG.cpp @@ -36,6 +36,7 @@ #include "ChartTranslationOptionList.h" #include "DotChart.h" #include "FactorCollection.h" +#include "ChartRuleLookupManagerMemory.h" using namespace std; @@ -264,19 +265,7 @@ const TargetPhraseCollection *PhraseDictionarySCFG::GetTargetPhraseCollection(co void PhraseDictionarySCFG::InitializeForInput(const InputType& input) { - assert(m_processedRuleColls.size() == 0); - size_t sourceSize = input.GetSize(); - m_processedRuleColls.resize(sourceSize); - - for (size_t ind = 0; ind < m_processedRuleColls.size(); ++ind) - { - ProcessedRule *initProcessedRule = new ProcessedRule(m_collection); - - ProcessedRuleColl *processedRuleColl = new ProcessedRuleColl(sourceSize - ind + 1); - processedRuleColl->Add(0, initProcessedRule); // init rule. stores the top node in tree - - m_processedRuleColls[ind] = processedRuleColl; - } + // Nothing to do: sentence-specific state is stored in ChartRuleLookupManager } PhraseDictionarySCFG::~PhraseDictionarySCFG() @@ -286,7 +275,14 @@ PhraseDictionarySCFG::~PhraseDictionarySCFG() void PhraseDictionarySCFG::CleanUp() { - RemoveAllInColl(m_processedRuleColls); + // Nothing to do: sentence-specific state is stored in ChartRuleLookupManager +} + +ChartRuleLookupManager *PhraseDictionarySCFG::CreateRuleLookupManager( + const InputType &sentence, + const CellCollection &cellCollection) +{ + return new ChartRuleLookupManagerMemory(sentence, cellCollection, *this); } TO_STRING_BODY(PhraseDictionarySCFG); diff --git a/moses/src/PhraseDictionarySCFG.h b/moses/src/PhraseDictionarySCFG.h index 541525e1..958df533 100644 --- a/moses/src/PhraseDictionarySCFG.h +++ b/moses/src/PhraseDictionarySCFG.h @@ -45,8 +45,6 @@ namespace Moses protected: PhraseDictionaryNodeSCFG m_collection; - mutable std::vector m_processedRuleColls; - std::string m_filePath; TargetPhraseCollection &GetOrCreateTargetPhraseCollection(const Phrase &source, const TargetPhrase &target); @@ -84,6 +82,10 @@ namespace Moses , size_t tableLimit , const LMList &languageModels , const WordPenaltyProducer* wpProducer); + + const PhraseDictionaryNodeSCFG &GetRootNode() const + { return m_collection; } + const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &source) const; @@ -98,23 +100,11 @@ namespace Moses void InitializeForInput(const InputType& i); - virtual void GetChartRuleCollection(ChartTranslationOptionList &outColl - ,InputType const& src - ,WordsRange const& range - ,bool adhereTableLimit - ,const CellCollection &cellColl) const; - - void ExtendPartialRuleApplication( - const PhraseDictionaryNodeSCFG & node, - const WordConsumed * prevWordConsumed, - size_t startPos, - size_t endPos, - size_t stackInd, - const NonTerminalSet & sourceNonTerms, - const NonTerminalSet & targetNonTerms, - ProcessedRuleColl & processedRuleColl) const; - void CleanUp(); + + ChartRuleLookupManager *CreateRuleLookupManager( + const InputType &, + const CellCollection &); }; - -} + +} // namespace Moses diff --git a/moses/src/PhraseDictionaryTreeAdaptor.h b/moses/src/PhraseDictionaryTreeAdaptor.h index 172e1865..0d5a1235 100644 --- a/moses/src/PhraseDictionaryTreeAdaptor.h +++ b/moses/src/PhraseDictionaryTreeAdaptor.h @@ -67,15 +67,14 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary { size_t GetNumInputScores() const; virtual void InitializeForInput(InputType const& source); - - void GetChartRuleCollection(ChartTranslationOptionList &outColl - ,InputType const& /*src*/ - ,WordsRange const& /*range*/ - ,bool /*adhereTableLimit*/ - ,const CellCollection &/*cellColl*/) const - { - assert(false); - } + + virtual ChartRuleLookupManager *CreateRuleLookupManager( + const InputType &, + const CellCollection &) + { + assert(false); + return 0; + } }; }