Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C++ stream for log messages and use it in two debug messages #4314

Merged
merged 4 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions src/ccmain/control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#endif
#include "sorthelper.h"
#include "tesseractclass.h"
#include "tesserrstream.h" // for tesserr
#include "tessvars.h"
#include "werdit.h"

Expand Down Expand Up @@ -1313,9 +1314,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
PointerVector<WERD_RES> best_words;
// Points to the best result. May be word or in lang_words.
const WERD_RES *word = word_data->word;
clock_t start_t = 0;
if (tessedit_timing_debug) {
start_t = clock();
clock_t total_time = 0;
const bool timing_debug = tessedit_timing_debug;
if (timing_debug) {
total_time = clock();
}
const bool debug = classify_debug_level > 0 || multilang_debug_level > 0;
if (debug) {
Expand Down Expand Up @@ -1368,10 +1370,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD
} else {
tprintf("no best words!!\n");
}
if (tessedit_timing_debug) {
clock_t ocr_t = clock();
tprintf("%s (ocr took %.2f sec)\n", word_data->word->best_choice->unichar_string().c_str(),
static_cast<double>(ocr_t - start_t) / CLOCKS_PER_SEC);
if (timing_debug) {
total_time = clock() - total_time;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why now to use c++11 chrono::steady_clock::now() (https://en.cppreference.com/w/cpp/chrono/steady_clock - most suitable for measuring intervals) instead of std::clock()?
AFAIK it should be more platform consistent than `std::clock()'....

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use it any time, but in time critical code we'd have to check that it does not cost more resources than clock().

tesserr << word_data->word->best_choice->unichar_string()
<< " (ocr took " << 1000 * total_time / CLOCKS_PER_SEC << " ms)\n";
}
}

Expand Down
68 changes: 68 additions & 0 deletions src/ccutil/tesserrstream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// File: tesserrstream.h
// Description: C++ stream which enhances tprintf
// Author: Stefan Weil
//
// (C) Copyright 2024
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef TESSERACT_CCUTIL_TESSERRSTREAM_H
#define TESSERACT_CCUTIL_TESSERRSTREAM_H

#include "tprintf.h"
#include <tesseract/export.h> // for TESS_API

#include <ostream> // for std::ostream

namespace tesseract {

class TessStreamBuf : public std::streambuf {
public:
TessStreamBuf() = default;

protected:
virtual int_type overflow(int_type c) override {
if (c != EOF) {
if (debugfp == nullptr) {
debugfp = get_debugfp();
}
if (fputc(c, debugfp) == EOF) {
return EOF;
}
}
return c;
}

virtual std::streamsize xsputn(const char* s, std::streamsize n) override {
if (debugfp == nullptr) {
debugfp = get_debugfp();
}
return fwrite(s, 1, n, debugfp);
}

private:
FILE *debugfp = nullptr;
};

class TessErrStream : public std::ostream {
private:
TessStreamBuf buf;

public:
TessErrStream() : std::ostream(nullptr), buf() {
rdbuf(&buf);
}
};

extern TESS_API TessErrStream tesserr;

} // namespace tesseract

#endif // TESSERACT_CCUTIL_TESSERRSTREAM_H
12 changes: 8 additions & 4 deletions src/ccutil/tprintf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# include "config_auto.h"
#endif

#include "tesserrstream.h"
#include "tprintf.h"

#include "params.h"
Expand All @@ -36,7 +37,7 @@ INT_VAR(log_level, INT_MAX, "Logging level");
static STRING_VAR(debug_file, "", "File to send tprintf output to");

// File for debug output.
static FILE *debugfp;
FILE *debugfp;

// Set output for log messages.
// The output is written to stderr if debug_file is empty.
Expand All @@ -49,7 +50,7 @@ static FILE *debugfp;
// tprintf("write to /tmp/log\n");
// debug_file = "";
// tprintf("write to stderr\n");
static void set_debugfp() {
FILE *get_debugfp() {
if (debug_file.empty()) {
// Write to stderr.
if (debugfp != stderr && debugfp != nullptr) {
Expand All @@ -66,15 +67,18 @@ static void set_debugfp() {
#endif
debugfp = fopen(debug_file.c_str(), "wb");
}
return debugfp;
}

// Trace printf.
void tprintf(const char *format, ...) {
set_debugfp();
FILE *f = get_debugfp();
va_list args; // variable args
va_start(args, format); // variable list
vfprintf(debugfp, format, args);
vfprintf(f, format, args);
va_end(args);
}

TessErrStream tesserr;

} // namespace tesseract
3 changes: 3 additions & 0 deletions src/ccutil/tprintf.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ extern TESS_API void tprintf( // Trace printf
const char *format, ...) // Message
__attribute__((format(printf, 1, 2)));

// Get file for debug output.
FILE *get_debugfp();

} // namespace tesseract

#undef __attribute__
Expand Down
12 changes: 8 additions & 4 deletions src/training/common/errorcounter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "sampleiterator.h"
#include "shapeclassifier.h"
#include "shapetable.h"
#include "tesserrstream.h"
#include "trainingsample.h"
#include "trainingsampleset.h"
#include "unicity_table.h"
Expand Down Expand Up @@ -50,7 +51,10 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
ErrorCounter counter(classifier->GetUnicharset(), fontsize);
std::vector<UnicharRating> results;

clock_t start = clock();
clock_t total_time = 0;
if (report_level > 1) {
total_time = clock();
}
unsigned total_samples = 0;
double unscaled_error = 0.0;
// Set a number of samples on which to run the classify debug mode.
Expand Down Expand Up @@ -85,7 +89,6 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
}
++total_samples;
}
const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
// Create the appropriate error report.
unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it,
unichar_error, fonts_report);
Expand All @@ -94,8 +97,9 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
}
if (report_level > 1 && total_samples > 0) {
// It is useful to know the time in microseconds/char.
tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time,
1000000.0 * total_time / total_samples);
total_time = 1000 * (clock() - total_time) / CLOCKS_PER_SEC;
tesserr << "Errors computed in " << total_time << " ms at "
<< 1000 * total_time / total_samples << " μs/char\n";
}
return unscaled_error;
}
Expand Down
Loading