Skip to content

Commit d0cb107

Browse files
committed
remove parameters tessedit_pdf_jpg_quality, tessedit_pdf_compression (reasons are in i1300 and i1285)
1 parent 55d11ad commit d0cb107

File tree

4 files changed

+21
-47
lines changed

4 files changed

+21
-47
lines changed

api/pdfrenderer.cpp

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,7 @@ bool TessPDFRenderer::BeginDocumentHandler() {
431431
return true;
432432
}
433433

434-
bool TessPDFRenderer::imageToPDFObj(TessBaseAPI* api,
435-
Pix *pix,
434+
bool TessPDFRenderer::imageToPDFObj(Pix *pix,
436435
char *filename,
437436
long int objnum,
438437
char **pdf_object,
@@ -449,32 +448,25 @@ bool TessPDFRenderer::imageToPDFObj(TessBaseAPI* api,
449448
return false;
450449

451450
L_COMP_DATA *cid = NULL;
452-
int kJpegQuality;
453-
int encoding_type;
454-
api->GetIntVariable("tessedit_pdf_jpg_quality", &kJpegQuality);
455-
api->GetIntVariable("tessedit_pdf_compression", &encoding_type);
456-
if (encoding_type > 0 && encoding_type < 4) {
457-
if (pixGenerateCIData(pix, encoding_type, kJpegQuality, 0, &cid) != 0)
458-
return false;
451+
const int kJpegQuality = 85;
452+
l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid);
453+
// TODO(jbreiden) Leptonica 1.71 doesn't correctly handle certain
454+
// types of PNG files, especially if there are 2 samples per pixel.
455+
// We can get rid of this logic after Leptonica 1.72 is released and
456+
// has propagated everywhere. Bug discussion as follows.
457+
// https://code.google.com/p/tesseract-ocr/issues/detail?id=1300
458+
int format, sad;
459+
findFileFormat(filename, &format);
460+
if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
461+
pixSetSpp(pix, 3);
462+
sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid);
459463
} else {
460-
// TODO(jbreiden) Leptonica 1.71 doesn't correctly handle certain
461-
// types of PNG files, especially if there are 2 samples per pixel.
462-
// We can get rid of this logic after Leptonica 1.72 is released and
463-
// has propagated everywhere. Bug discussion as follows.
464-
// https://code.google.com/p/tesseract-ocr/issues/detail?id=1300
465-
int format, sad;
466-
findFileFormat(filename, &format);
467-
if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
468-
pixSetSpp(pix, 3);
469-
sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid);
470-
} else {
471-
sad = l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid);
472-
}
464+
sad = l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid);
465+
}
473466

474-
if (sad || !cid) {
475-
l_CIDataDestroy(&cid);
476-
return false;
477-
}
467+
if (sad || !cid) {
468+
l_CIDataDestroy(&cid);
469+
return false;
478470
}
479471

480472
const char *group4 = "";
@@ -665,7 +657,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
665657
AppendPDFObjectDIY(objsize);
666658

667659
char *pdf_object;
668-
if (!imageToPDFObj(api, pix, filename, obj_, &pdf_object, &objsize)) {
660+
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
669661
return false;
670662
}
671663
AppendData(pdf_object, objsize);

api/renderer.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
194194
static char* GetPDFTextObjects(TessBaseAPI* api,
195195
double width, double height);
196196
// Turn an image into a PDF object. Only transcode if we have to.
197-
static bool imageToPDFObj(tesseract::TessBaseAPI *api, Pix *pix,
198-
char *filename, long int objnum, char **pdf_object,
199-
long int *pdf_object_size);
197+
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
198+
char **pdf_object, long int *pdf_object_size);
200199
};
201200

202201

ccmain/tesseractclass.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -318,16 +318,6 @@ Tesseract::Tesseract()
318318
"Write .html hOCR output file", this->params()),
319319
BOOL_MEMBER(tessedit_create_pdf, false,
320320
"Write .pdf output file", this->params()),
321-
INT_MEMBER(tessedit_pdf_compression, 0,
322-
"Type of image compression in pdf output: "
323-
"0 - autoselection (default); "
324-
"1 - jpeg; "
325-
"2 - G4; "
326-
"3 - flate",
327-
this->params()),
328-
INT_MEMBER(tessedit_pdf_jpg_quality, 85,
329-
"Quality level of jpeg image compression in pdf output",
330-
this->params()),
331321
STRING_MEMBER(unrecognised_char, "|",
332322
"Output char for unidentified blobs", this->params()),
333323
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),

ccmain/tesseractclass.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -930,13 +930,6 @@ class Tesseract : public Wordrec {
930930
BOOL_VAR_H(tessedit_create_txt, true, "Write .txt output file");
931931
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
932932
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
933-
INT_VAR_H(tessedit_pdf_compression, 0, "Type of image encoding in pdf output:"
934-
"0 - autoselection (default); "
935-
"1 - jpeg; "
936-
"2 - G4; "
937-
"3 - flate");
938-
INT_VAR_H(tessedit_pdf_jpg_quality, 85, "Quality level of jpeg image "
939-
"compression in pdf output");
940933
STRING_VAR_H(unrecognised_char, "|",
941934
"Output char for unidentified blobs");
942935
INT_VAR_H(suspect_level, 99, "Suspect marker level");

0 commit comments

Comments
 (0)