Skip to content

Commit a18620c

Browse files
author
Ray Smith
committed
Improved results on images with no resolution. Estimates resolution
from the size of the connected components, based on average text size.
1 parent 147a1a5 commit a18620c

File tree

4 files changed

+27
-18
lines changed

4 files changed

+27
-18
lines changed

api/baseapi.cpp

+2-11
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,6 @@ const char* kInputFile = "noname.tif";
102102
const char* kOldVarsFile = "failed_vars.txt";
103103
/** Max string length of an int. */
104104
const int kMaxIntSize = 22;
105-
/**
106-
* Minimum believable resolution. Used as a default if there is no other
107-
* information, as it is safer to under-estimate than over-estimate.
108-
*/
109-
const int kMinCredibleResolution = 70;
110-
/** Maximum believable resolution. */
111-
const int kMaxCredibleResolution = 2400;
112-
/** Default resolution. */
113-
const int kDefaultResolution = 300;
114105

115106
/* Add all available languages recursively.
116107
*/
@@ -2225,8 +2216,8 @@ bool TessBaseAPI::Threshold(Pix** pix) {
22252216
// Use the minimum default resolution, as it is safer to under-estimate
22262217
// than over-estimate resolution.
22272218
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", y_res,
2228-
kDefaultResolution);
2229-
thresholder_->SetSourceYResolution(kDefaultResolution);
2219+
kMinCredibleResolution);
2220+
thresholder_->SetSourceYResolution(kMinCredibleResolution);
22302221
}
22312222
PageSegMode pageseg_mode =
22322223
static_cast<PageSegMode>(

ccmain/osdetect.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,6 @@ const char* ScriptDetector::korean_script_ = "Korean";
5858
const char* ScriptDetector::japanese_script_ = "Japanese";
5959
const char* ScriptDetector::fraktur_script_ = "Fraktur";
6060

61-
// Minimum believable resolution.
62-
const int kMinCredibleResolution = 70;
63-
6461
void OSResults::update_best_orientation() {
6562
float first = orientations[0];
6663
float second = orientations[1];

ccmain/pagesegmain.cpp

+12-3
Original file line numberDiff line numberDiff line change
@@ -310,13 +310,22 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
310310
TO_BLOCK* to_block = to_block_it.data();
311311
TBOX blkbox = to_block->block->bounding_box();
312312
ColumnFinder* finder = NULL;
313+
int estimated_resolution = source_resolution_;
314+
if (source_resolution_ == kMinCredibleResolution) {
315+
// Try to estimate resolution from typical body text size.
316+
int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor);
317+
if (res > estimated_resolution && res < kMaxCredibleResolution) {
318+
estimated_resolution = res;
319+
tprintf("Estimating resolution as %d\n", estimated_resolution);
320+
}
321+
}
313322

314323
if (to_block->line_size >= 2) {
315324
finder = new ColumnFinder(static_cast<int>(to_block->line_size),
316325
blkbox.botleft(), blkbox.topright(),
317-
source_resolution_, textord_use_cjk_fp_model,
318-
textord_tabfind_aligned_gap_fraction,
319-
&v_lines, &h_lines, vertical_x, vertical_y);
326+
estimated_resolution, textord_use_cjk_fp_model,
327+
textord_tabfind_aligned_gap_fraction, &v_lines,
328+
&h_lines, vertical_x, vertical_y);
320329

321330
finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);
322331

ccstruct/publictypes.h

+13-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,19 @@
3030
// API-level code should include apitypes.h in preference to this file.
3131

3232
/** Number of printers' points in an inch. The unit of the pointsize return. */
33-
const int kPointsPerInch = 72;
33+
constexpr int kPointsPerInch = 72;
34+
/**
35+
* Minimum believable resolution. Used as a default if there is no other
36+
* information, as it is safer to under-estimate than over-estimate.
37+
*/
38+
constexpr int kMinCredibleResolution = 70;
39+
/** Maximum believable resolution. */
40+
constexpr int kMaxCredibleResolution = 2400;
41+
/**
42+
* Ratio between median blob size and likely resolution. Used to estimate
43+
* resolution when none is provided. This is basically 1/usual text size in
44+
* inches. */
45+
constexpr int kResolutionEstimationFactor = 10;
3446

3547
/**
3648
* Possible types for a POLY_BLOCK or ColPartition.

0 commit comments

Comments
 (0)