Skip to content

Commit bce2cd5

Browse files
author
zdenop
committed
enable to select pdf compression type and jpeg quality (fix issue 1263)
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@1134 d0cd1f9f-072b-0410-8dd7-cf729c803f20
1 parent 6cdf70b commit bce2cd5

File tree

4 files changed

+30
-6
lines changed

4 files changed

+30
-6
lines changed

api/pdfrenderer.cpp

+11-5
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ bool TessPDFRenderer::fileToPDFObj(char *filename, long int objnum,
519519
return true;
520520
}
521521

522-
bool TessPDFRenderer::pixToPDFObj(Pix *pix, long int objnum,
522+
bool TessPDFRenderer::pixToPDFObj(TessBaseAPI* api, Pix *pix, long int objnum,
523523
char **pdf_object,
524524
long int *pdf_object_size) {
525525
if (!pdf_object_size || !pdf_object)
@@ -531,11 +531,17 @@ bool TessPDFRenderer::pixToPDFObj(Pix *pix, long int objnum,
531531
char b2[kBasicBufSize];
532532
L_COMP_DATA *cid;
533533
int encoding_type;
534-
const int kJpegQuality = 85;
535-
if (selectDefaultPdfEncoding(pix, &encoding_type) != 0)
536-
return false;
534+
int kJpegQuality;
535+
api->GetIntVariable("tessedit_pdf_jpg_quality", &kJpegQuality);
536+
api->GetIntVariable("tessedit_pdf_compression", &encoding_type);
537+
if (encoding_type == 0 || encoding_type > 3) {
538+
if (selectDefaultPdfEncoding(pix, &encoding_type) != 0)
539+
return false;
540+
}
541+
537542
if (pixGenerateCIData(pix, encoding_type, kJpegQuality, 0, &cid) != 0)
538543
return false;
544+
encoding_type = cid->type;
539545

540546
const char *filter;
541547
switch(encoding_type) {
@@ -677,7 +683,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
677683

678684
char *pdf_object;
679685
if (!fileToPDFObj(filename, obj_, &pdf_object, &objsize)) {
680-
if (!pixToPDFObj(pix, obj_, &pdf_object, &objsize)) {
686+
if (!pixToPDFObj(api, pix, obj_, &pdf_object, &objsize)) {
681687
return false;
682688
}
683689
}

api/renderer.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
198198
static bool fileToPDFObj(char *filename, long int objnum,
199199
char **pdf_object, long int *pdf_object_size);
200200
// Turn a Pix into a the very best PDF object that we can.
201-
static bool pixToPDFObj(Pix *pix, long int objnum,
201+
static bool pixToPDFObj(tesseract::TessBaseAPI *api, Pix *pix,
202+
long int objnum,
202203
char **pdf_object, long int *pdf_object_size);
203204
};
204205

ccmain/tesseractclass.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,16 @@ Tesseract::Tesseract()
315315
"Write .html hOCR output file", this->params()),
316316
BOOL_MEMBER(tessedit_create_pdf, false,
317317
"Write .pdf output file", this->params()),
318+
INT_MEMBER(tessedit_pdf_compression, 0,
319+
"Type of image compression in pdf output: "
320+
"0 - autoselection (default); "
321+
"1 - jpeg; "
322+
"2 - G4; "
323+
"3 - flat",
324+
this->params()),
325+
INT_MEMBER(tessedit_pdf_jpg_quality, 85,
326+
"Quality level of jpeg image compression in pdf output",
327+
this->params()),
318328
STRING_MEMBER(unrecognised_char, "|",
319329
"Output char for unidentified blobs", this->params()),
320330
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),

ccmain/tesseractclass.h

+7
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,13 @@ class Tesseract : public Wordrec {
908908
BOOL_VAR_H(tessedit_write_unlv, false, "Write .unlv output file");
909909
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
910910
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
911+
INT_VAR_H(tessedit_pdf_compression, 0, "Type of image encoding in pdf output:"
912+
"0 - autoselection (default); "
913+
"1 - jpeg; "
914+
"2 - G4; "
915+
"3 - flat");
916+
INT_VAR_H(tessedit_pdf_jpg_quality, 85, "Quality level of jpeg image "
917+
"compression in pdf output");
911918
STRING_VAR_H(unrecognised_char, "|",
912919
"Output char for unidentified blobs");
913920
INT_VAR_H(suspect_level, 99, "Suspect marker level");

0 commit comments

Comments
 (0)