Skip to content

Commit

Permalink
Improve error logging for UTF-8 decoding failures across cppjieba com…
Browse files Browse the repository at this point in the history
…ponents. Updated error messages in DictTrie, PosTagger, PreFilter, and SegmentBase to provide clearer context on the specific input causing the failure. This change enhances the debugging experience when handling UTF-8 encoded strings.
  • Loading branch information
yanyiwu committed Dec 8, 2024
1 parent 39fc58f commit 016fc17
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion include/cppjieba/DictTrie.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ class DictTrie {
double weight,
const string& tag) {
if (!DecodeUTF8RunesInString(word, node_info.word)) {
XLOG(ERROR) << "Decode " << word << " failed.";
XLOG(ERROR) << "UTF-8 decode failed for dict word: " << word;
return false;
}
node_info.weight = weight;
Expand Down
2 changes: 1 addition & 1 deletion include/cppjieba/PosTagger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class PosTagger {
const DictTrie * dict = segment.GetDictTrie();
assert(dict != NULL);
if (!DecodeUTF8RunesInString(str, runes)) {
XLOG(ERROR) << "Decode failed.";
XLOG(ERROR) << "UTF-8 decode failed for word: " << str;
return POS_X;
}
tmp = dict->Find(runes.begin(), runes.end());
Expand Down
2 changes: 1 addition & 1 deletion include/cppjieba/PreFilter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class PreFilter {
const string& sentence)
: symbols_(symbols) {
if (!DecodeUTF8RunesInString(sentence, sentence_)) {
XLOG(ERROR) << "decode failed. ";
XLOG(ERROR) << "UTF-8 decode failed for input sentence";
}
cursor_ = sentence_.begin();
}
Expand Down
2 changes: 1 addition & 1 deletion include/cppjieba/SegmentBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class SegmentBase {
symbols_.clear();
RuneStrArray runes;
if (!DecodeUTF8RunesInString(s, runes)) {
XLOG(ERROR) << "decode " << s << " failed";
XLOG(ERROR) << "UTF-8 decode failed for separators: " << s;
return false;
}
for (size_t i = 0; i < runes.size(); i++) {
Expand Down

0 comments on commit 016fc17

Please sign in to comment.