mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-05-17 14:16:10 -06:00
923 lines
32 KiB
C
923 lines
32 KiB
C
/*====================================================================*
|
|
- Copyright (C) 2001 Leptonica. All rights reserved.
|
|
- This software is distributed in the hope that it will be
|
|
- useful, but with NO WARRANTY OF ANY KIND.
|
|
- No author or distributor accepts responsibility to anyone for the
|
|
- consequences of using this software, or for whether it serves any
|
|
- particular purpose or works at all, unless he or she says so in
|
|
- writing. Everyone is granted permission to copy, modify and
|
|
- redistribute this source code, for commercial or non-commercial
|
|
- purposes, with the following restrictions: (1) the origin of this
|
|
- source code must not be misrepresented; (2) modified versions must
|
|
- be plainly marked as such; and (3) this notice may not be removed
|
|
- or altered from any source or modified source distribution.
|
|
*====================================================================*/
|
|
|
|
|
|
/*
|
|
* classapp.c
|
|
*
|
|
* Top-level jb2 correlation and rank-hausdorff
|
|
*
|
|
* l_int32 jbCorrelation()
|
|
* l_int32 jbRankHausdorff()
|
|
*
|
|
* Extract and classify words in textline order
|
|
*
|
|
* JBCLASSER *jbWordsInTextlines()
|
|
* l_int32 pixGetWordsInTextlines()
|
|
* l_int32 pixGetWordBoxesInTextlines()
|
|
*
|
|
* Use word bounding boxes to compare page images
|
|
*
|
|
* NUMAA *boxaExtractSortedPattern()
|
|
* l_int32 numaaCompareImagesByBoxes()
|
|
* static l_int32 testLineAlignmentX()
|
|
* static l_int32 countAlignedMatches()
|
|
* static void printRowIndices()
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include "allheaders.h"
|
|
|
|
static const l_int32 JB_WORDS_MIN_WIDTH = 5; /* pixels */
|
|
static const l_int32 JB_WORDS_MIN_HEIGHT = 3; /* pixels */
|
|
|
|
/* MSVC can't handle arrays dimensioned by static const integers */
|
|
#define L_BUF_SIZE 512
|
|
|
|
/* Static comparison functions */
|
|
static l_int32 testLineAlignmentX(NUMA *na1, NUMA *na2, l_int32 shiftx,
|
|
l_int32 delx, l_int32 nperline);
|
|
static l_int32 countAlignedMatches(NUMA *nai1, NUMA *nai2, NUMA *nasx,
|
|
NUMA *nasy, l_int32 n1, l_int32 n2,
|
|
l_int32 delx, l_int32 dely,
|
|
l_int32 nreq, l_int32 *psame,
|
|
l_int32 debugflag);
|
|
static void printRowIndices(l_int32 *index1, l_int32 n1,
|
|
l_int32 *index2, l_int32 n2);
|
|
|
|
|
|
/*------------------------------------------------------------------*
|
|
* Top-level jb2 correlation and rank-hausdorff *
|
|
*------------------------------------------------------------------*/
|
|
/*!
|
|
* jbCorrelation()
|
|
*
|
|
* Input: dirin (directory of input images)
|
|
* thresh (typically ~0.8)
|
|
* weight (typically ~0.6)
|
|
* components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
|
|
* rootname (for output files)
|
|
* firstpage (0-based)
|
|
* npages (use 0 for all pages in dirin)
|
|
* renderflag (1 to render from templates; 0 to skip)
|
|
* Return: 0 if OK, 1 on error
|
|
*
|
|
* Notes:
|
|
* (1) See prog/jbcorrelation for generating more output (e.g.,
|
|
* for debugging)
|
|
*/
|
|
l_int32
|
|
jbCorrelation(const char *dirin,
|
|
l_float32 thresh,
|
|
l_float32 weight,
|
|
l_int32 components,
|
|
const char *rootname,
|
|
l_int32 firstpage,
|
|
l_int32 npages,
|
|
l_int32 renderflag)
|
|
{
|
|
char filename[L_BUF_SIZE];
|
|
l_int32 nfiles, i, numpages;
|
|
JBDATA *data;
|
|
JBCLASSER *classer;
|
|
PIX *pix;
|
|
PIXA *pixa;
|
|
SARRAY *safiles;
|
|
|
|
PROCNAME("jbCorrelation");
|
|
|
|
if (!dirin)
|
|
return ERROR_INT("dirin not defined", procName, 1);
|
|
if (!rootname)
|
|
return ERROR_INT("rootname not defined", procName, 1);
|
|
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
|
|
components != JB_WORDS)
|
|
return ERROR_INT("components invalid", procName, 1);
|
|
|
|
safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
|
|
nfiles = sarrayGetCount(safiles);
|
|
|
|
/* Classify components */
|
|
classer = jbCorrelationInit(components, 0, 0, thresh, weight);
|
|
jbAddPages(classer, safiles);
|
|
|
|
/* Save data */
|
|
data = jbDataSave(classer);
|
|
jbDataWrite(rootname, data);
|
|
|
|
/* Optionally, render pages using class templates */
|
|
if (renderflag) {
|
|
pixa = jbDataRender(data, FALSE);
|
|
numpages = pixaGetCount(pixa);
|
|
if (numpages != nfiles)
|
|
fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n",
|
|
numpages, nfiles);
|
|
for (i = 0; i < numpages; i++) {
|
|
pix = pixaGetPix(pixa, i, L_CLONE);
|
|
snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i);
|
|
fprintf(stderr, "filename: %s\n", filename);
|
|
pixWrite(filename, pix, IFF_PNG);
|
|
pixDestroy(&pix);
|
|
}
|
|
pixaDestroy(&pixa);
|
|
}
|
|
|
|
sarrayDestroy(&safiles);
|
|
jbClasserDestroy(&classer);
|
|
jbDataDestroy(&data);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* jbRankHaus()
|
|
*
|
|
* Input: dirin (directory of input images)
|
|
* size (of Sel used for dilation; typ. 2)
|
|
* rank (rank value of match; typ. 0.97)
|
|
* components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
|
|
* rootname (for output files)
|
|
* firstpage (0-based)
|
|
* npages (use 0 for all pages in dirin)
|
|
* renderflag (1 to render from templates; 0 to skip)
|
|
* Return: 0 if OK, 1 on error
|
|
*
|
|
* Notes:
|
|
* (1) See prog/jbrankhaus for generating more output (e.g.,
|
|
* for debugging)
|
|
*/
|
|
l_int32
|
|
jbRankHaus(const char *dirin,
|
|
l_int32 size,
|
|
l_float32 rank,
|
|
l_int32 components,
|
|
const char *rootname,
|
|
l_int32 firstpage,
|
|
l_int32 npages,
|
|
l_int32 renderflag)
|
|
{
|
|
char filename[L_BUF_SIZE];
|
|
l_int32 nfiles, i, numpages;
|
|
JBDATA *data;
|
|
JBCLASSER *classer;
|
|
PIX *pix;
|
|
PIXA *pixa;
|
|
SARRAY *safiles;
|
|
|
|
PROCNAME("jbRankHaus");
|
|
|
|
if (!dirin)
|
|
return ERROR_INT("dirin not defined", procName, 1);
|
|
if (!rootname)
|
|
return ERROR_INT("rootname not defined", procName, 1);
|
|
if (components != JB_CONN_COMPS && components != JB_CHARACTERS &&
|
|
components != JB_WORDS)
|
|
return ERROR_INT("components invalid", procName, 1);
|
|
|
|
safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
|
|
nfiles = sarrayGetCount(safiles);
|
|
|
|
/* Classify components */
|
|
classer = jbRankHausInit(components, 0, 0, size, rank);
|
|
jbAddPages(classer, safiles);
|
|
|
|
/* Save data */
|
|
data = jbDataSave(classer);
|
|
jbDataWrite(rootname, data);
|
|
|
|
/* Optionally, render pages using class templates */
|
|
if (renderflag) {
|
|
pixa = jbDataRender(data, FALSE);
|
|
numpages = pixaGetCount(pixa);
|
|
if (numpages != nfiles)
|
|
fprintf(stderr, "numpages = %d, nfiles = %d, not equal!\n",
|
|
numpages, nfiles);
|
|
for (i = 0; i < numpages; i++) {
|
|
pix = pixaGetPix(pixa, i, L_CLONE);
|
|
snprintf(filename, L_BUF_SIZE, "%s.%05d", rootname, i);
|
|
fprintf(stderr, "filename: %s\n", filename);
|
|
pixWrite(filename, pix, IFF_PNG);
|
|
pixDestroy(&pix);
|
|
}
|
|
pixaDestroy(&pixa);
|
|
}
|
|
|
|
sarrayDestroy(&safiles);
|
|
jbClasserDestroy(&classer);
|
|
jbDataDestroy(&data);
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
/*------------------------------------------------------------------*
|
|
* Extract and classify words in textline order *
|
|
*------------------------------------------------------------------*/
|
|
/*!
|
|
* jbWordsInTextlines()
|
|
*
|
|
* Input: dirin (directory of input pages)
|
|
* reduction (1 for full res; 2 for half-res)
|
|
* maxwidth (of word mask components, to be kept)
|
|
* maxheight (of word mask components, to be kept)
|
|
* thresh (on correlation; 0.80 is reasonable)
|
|
* weight (for handling thick text; 0.6 is reasonable)
|
|
* natl (<return> numa with textline index for each component)
|
|
* firstpage (0-based)
|
|
* npages (use 0 for all pages in dirin)
|
|
* Return: classer (for the set of pages)
|
|
*
|
|
* Notes:
|
|
* (1) This is a high-level function. See prog/jbwords for example
|
|
* of usage.
|
|
* (2) Typically, words can be found reasonably well at a resolution
|
|
* of about 150 ppi. For highest accuracy, you should use 300 ppi.
|
|
* Assuming that the input images are 300 ppi, use reduction = 1
|
|
* for finding words at full res, and reduction = 2 for finding
|
|
* them at 150 ppi.
|
|
*/
|
|
JBCLASSER *
|
|
jbWordsInTextlines(const char *dirin,
|
|
l_int32 reduction,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
l_float32 thresh,
|
|
l_float32 weight,
|
|
NUMA **pnatl,
|
|
l_int32 firstpage,
|
|
l_int32 npages)
|
|
{
|
|
char *fname;
|
|
l_int32 nfiles, i, w, h;
|
|
BOXA *boxa;
|
|
JBCLASSER *classer;
|
|
NUMA *nai, *natl;
|
|
PIX *pix;
|
|
PIXA *pixa;
|
|
SARRAY *safiles;
|
|
|
|
PROCNAME("jbWordsInTextlines");
|
|
|
|
if (!pnatl)
|
|
return (JBCLASSER *)ERROR_PTR("&natl not defined", procName, NULL);
|
|
*pnatl = NULL;
|
|
if (!dirin)
|
|
return (JBCLASSER *)ERROR_PTR("dirin not defined", procName, NULL);
|
|
if (reduction != 1 && reduction != 2)
|
|
return (JBCLASSER *)ERROR_PTR("reduction not in {1,2}", procName, NULL);
|
|
|
|
safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
|
|
nfiles = sarrayGetCount(safiles);
|
|
|
|
/* Classify components */
|
|
classer = jbCorrelationInit(JB_WORDS, maxwidth, maxheight, thresh, weight);
|
|
classer->safiles = sarrayCopy(safiles);
|
|
natl = numaCreate(0);
|
|
*pnatl = natl;
|
|
for (i = 0; i < nfiles; i++) {
|
|
fname = sarrayGetString(safiles, i, 0);
|
|
if ((pix = pixRead(fname)) == NULL) {
|
|
L_WARNING_INT("image file %d not read", procName, i);
|
|
continue;
|
|
}
|
|
pixGetDimensions(pix, &w, &h, NULL);
|
|
if (reduction == 1) {
|
|
classer->w = w;
|
|
classer->h = h;
|
|
}
|
|
else { /* reduction == 2 */
|
|
classer->w = w / 2;
|
|
classer->h = h / 2;
|
|
}
|
|
pixGetWordsInTextlines(pix, reduction, JB_WORDS_MIN_WIDTH,
|
|
JB_WORDS_MIN_HEIGHT, maxwidth, maxheight,
|
|
&boxa, &pixa, &nai);
|
|
jbAddPageComponents(classer, pix, boxa, pixa);
|
|
numaJoin(natl, nai, 0, 0);
|
|
pixDestroy(&pix);
|
|
numaDestroy(&nai);
|
|
boxaDestroy(&boxa);
|
|
pixaDestroy(&pixa);
|
|
}
|
|
|
|
sarrayDestroy(&safiles);
|
|
return classer;
|
|
}
|
|
|
|
|
|
/*!
|
|
* pixGetWordsInTextlines()
|
|
*
|
|
* Input: pixs (1 bpp, 300 ppi)
|
|
* reduction (1 for full res; 2 for half-res)
|
|
* minwidth, minheight (of saved components; smaller are discarded)
|
|
* maxwidth, maxheight (of saved components; larger are discarded)
|
|
* &boxad (<return> word boxes sorted in textline line order)
|
|
* &pixad (<return> word images sorted in textline line order)
|
|
* &naindex (<return> index of textline for each word)
|
|
* Return: 0 if OK, 1 on error
|
|
*
|
|
* Notes:
|
|
* (1) The input should be at a resolution of about 300 ppi.
|
|
* The word masks can be computed at either 150 ppi or 300 ppi.
|
|
* For the former, set reduction = 2.
|
|
* (2) The four size constraints on saved components are all
|
|
* used at 2x reduction.
|
|
* (3) The result are word images (and their b.b.), extracted in
|
|
* textline order, all at 2x reduction, and with a numa giving
|
|
* the textline index for each word.
|
|
* (4) The pixa and boxa interfaces should make this type of
|
|
* application simple to put together. The steps are:
|
|
* - generate first estimate of word masks
|
|
* - get b.b. of these, and remove the small and big ones
|
|
* - extract pixa of the word mask from these boxes
|
|
* - extract pixa of the actual word images, using word masks
|
|
* - sort actual word images in textline order (2d)
|
|
* - flatten them to a pixa (1d), saving the textline index
|
|
* for each pix
|
|
* (5) In an actual application, it may be desirable to pre-filter
|
|
* the input image to remove large components, to extract
|
|
* single columns of text, and to deskew them.
|
|
*/
|
|
l_int32
|
|
pixGetWordsInTextlines(PIX *pixs,
|
|
l_int32 reduction,
|
|
l_int32 minwidth,
|
|
l_int32 minheight,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
BOXA **pboxad,
|
|
PIXA **ppixad,
|
|
NUMA **pnai)
|
|
{
|
|
l_int32 maxsize;
|
|
BOXA *boxa1, *boxa2, *boxa3, *boxad;
|
|
BOXAA *baa;
|
|
NUMA *nai;
|
|
NUMAA *naa;
|
|
PIXA *pixa1, *pixa2, *pixad;
|
|
PIX *pixt1, *pixt2;
|
|
PIXAA *paa;
|
|
|
|
PROCNAME("pixGetWordsInTextlines");
|
|
|
|
if (!pboxad || !ppixad || !pnai)
|
|
return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1);
|
|
*pboxad = NULL;
|
|
*ppixad = NULL;
|
|
*pnai = NULL;
|
|
if (!pixs)
|
|
return ERROR_INT("pixs not defined", procName, 1);
|
|
if (reduction != 1 && reduction != 2)
|
|
return ERROR_INT("reduction not in {1,2}", procName, 1);
|
|
|
|
if (reduction == 1) {
|
|
pixt1 = pixClone(pixs);
|
|
maxsize = 14;
|
|
}
|
|
else { /* reduction == 2 */
|
|
pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
|
|
maxsize = 7;
|
|
}
|
|
|
|
/* First estimate of the word masks */
|
|
pixt2 = pixWordMaskByDilation(pixt1, maxsize, NULL);
|
|
|
|
/* Get the bounding boxes of the words. First remove the
|
|
* small ones, which can be due to punctuation that was
|
|
* not joined to a word. Then remove the large ones, which are
|
|
* also not likely to be words. Here, pixa1 contains
|
|
* the masks over each word. */
|
|
boxa1 = pixConnComp(pixt2, NULL, 8);
|
|
boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH,
|
|
L_SELECT_IF_GTE, NULL);
|
|
boxa3 = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH,
|
|
L_SELECT_IF_LTE, NULL);
|
|
pixa1 = pixaCreateFromBoxa(pixt2, boxa3, NULL);
|
|
|
|
/* Generate a pixa of the actual word images, not the mask images. */
|
|
pixa2 = pixaClipToPix(pixa1, pixt1);
|
|
|
|
/* Sort the bounding boxes of these words, saving the
|
|
* index mapping that will allow us to sort the pixa identically. */
|
|
baa = boxaSort2d(boxa3, &naa, -1, -1, 4);
|
|
paa = pixaSort2dByIndex(pixa2, naa, L_CLONE);
|
|
|
|
/* Flatten the word pixa */
|
|
pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE);
|
|
boxad = pixaGetBoxa(pixad, L_COPY);
|
|
|
|
*pnai = nai;
|
|
*pboxad = boxad;
|
|
*ppixad = pixad;
|
|
|
|
pixDestroy(&pixt1);
|
|
pixDestroy(&pixt2);
|
|
pixaDestroy(&pixa1);
|
|
pixaDestroy(&pixa2);
|
|
boxaDestroy(&boxa1);
|
|
boxaDestroy(&boxa2);
|
|
boxaDestroy(&boxa3);
|
|
boxaaDestroy(&baa);
|
|
pixaaDestroy(&paa);
|
|
numaaDestroy(&naa);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* pixGetWordBoxesInTextlines()
|
|
*
|
|
* Input: pixs (1 bpp, 300 ppi)
|
|
* reduction (1 for full res; 2 for half-res)
|
|
* minwidth, minheight (of saved components; smaller are discarded)
|
|
* maxwidth, maxheight (of saved components; larger are discarded)
|
|
* &boxad (<return> word boxes sorted in textline line order)
|
|
* &naindex (<return> index of textline for each word)
|
|
* Return: 0 if OK, 1 on error
|
|
*
|
|
* Notes:
|
|
* (1) The input should be at a resolution of about 300 ppi.
|
|
* The word masks can be computed at either 150 ppi or 300 ppi.
|
|
* For the former, set reduction = 2.
|
|
* (2) In an actual application, it may be desirable to pre-filter
|
|
* the input image to remove large components, to extract
|
|
* single columns of text, and to deskew them.
|
|
* (3) This is a special version that just finds the word boxes
|
|
* in line order, with a numa giving the textline index for
|
|
* each word. See pixGetWordsInTextlines() for more details.
|
|
*/
|
|
l_int32
|
|
pixGetWordBoxesInTextlines(PIX *pixs,
|
|
l_int32 reduction,
|
|
l_int32 minwidth,
|
|
l_int32 minheight,
|
|
l_int32 maxwidth,
|
|
l_int32 maxheight,
|
|
BOXA **pboxad,
|
|
NUMA **pnai)
|
|
{
|
|
l_int32 maxsize;
|
|
BOXA *boxa1, *boxa2, *boxa3, *boxad;
|
|
BOXAA *baa;
|
|
NUMA *nai;
|
|
PIX *pixt1, *pixt2;
|
|
|
|
PROCNAME("pixGetWordBoxesInTextlines");
|
|
|
|
if (!pboxad || !pnai)
|
|
return ERROR_INT("&boxad and &nai not both defined", procName, 1);
|
|
*pboxad = NULL;
|
|
*pnai = NULL;
|
|
if (!pixs)
|
|
return ERROR_INT("pixs not defined", procName, 1);
|
|
if (reduction != 1 && reduction != 2)
|
|
return ERROR_INT("reduction not in {1,2}", procName, 1);
|
|
|
|
if (reduction == 1) {
|
|
pixt1 = pixClone(pixs);
|
|
maxsize = 14;
|
|
}
|
|
else { /* reduction == 2 */
|
|
pixt1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
|
|
maxsize = 7;
|
|
}
|
|
|
|
/* First estimate of the word masks */
|
|
pixt2 = pixWordMaskByDilation(pixt1, maxsize, NULL);
|
|
|
|
/* Get the bounding boxes of the words, and remove the
|
|
* small ones, which can be due to punctuation that was
|
|
* not joined to a word, and the large ones, which are
|
|
* also not likely to be words. */
|
|
boxa1 = pixConnComp(pixt2, NULL, 8);
|
|
boxa2 = boxaSelectBySize(boxa1, minwidth, minheight,
|
|
L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL);
|
|
boxa3 = boxaSelectBySize(boxa2, maxwidth, maxheight,
|
|
L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL);
|
|
|
|
/* 2D sort the bounding boxes of these words. */
|
|
baa = boxaSort2d(boxa3, NULL, 3, -5, 5);
|
|
|
|
/* Flatten the boxaa, saving the boxa index for each box */
|
|
boxad = boxaaFlattenToBoxa(baa, &nai, L_CLONE);
|
|
|
|
*pnai = nai;
|
|
*pboxad = boxad;
|
|
|
|
pixDestroy(&pixt1);
|
|
pixDestroy(&pixt2);
|
|
boxaDestroy(&boxa1);
|
|
boxaDestroy(&boxa2);
|
|
boxaDestroy(&boxa3);
|
|
boxaaDestroy(&baa);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*------------------------------------------------------------------*
|
|
* Use word bounding boxes to compare page images *
|
|
*------------------------------------------------------------------*/
|
|
/*!
|
|
* boxaExtractSortedPattern()
|
|
*
|
|
* Input: boxa (typ. of word bounding boxes, in textline order)
|
|
* numa (index of textline for each box in boxa)
|
|
* Return: naa (numaa, where each numa represents one textline),
|
|
* or null on error
|
|
*
|
|
* Notes:
|
|
* (1) The input is expected to come from pixGetWordBoxesInTextlines().
|
|
* (2) Each numa in the output consists of an average y coordinate
|
|
* of the first box in the textline, followed by pairs of
|
|
* x coordinates representing the left and right edges of each
|
|
* of the boxes in the textline.
|
|
*/
|
|
NUMAA *
|
|
boxaExtractSortedPattern(BOXA *boxa,
|
|
NUMA *na)
|
|
{
|
|
l_int32 index, nbox, row, prevrow, x, y, w, h;
|
|
BOX *box;
|
|
NUMA *nad;
|
|
NUMAA *naa;
|
|
|
|
PROCNAME("boxaExtractSortedPattern");
|
|
|
|
if (!boxa)
|
|
return (NUMAA *)ERROR_PTR("boxa not defined", procName, NULL);
|
|
if (!na)
|
|
return (NUMAA *)ERROR_PTR("na not defined", procName, NULL);
|
|
|
|
naa = numaaCreate(0);
|
|
nbox = boxaGetCount(boxa);
|
|
if (nbox == 0)
|
|
return naa;
|
|
|
|
prevrow = -1;
|
|
for (index = 0; index < nbox; index++) {
|
|
box = boxaGetBox(boxa, index, L_CLONE);
|
|
numaGetIValue(na, index, &row);
|
|
if (row > prevrow) {
|
|
if (index > 0)
|
|
numaaAddNuma(naa, nad, L_INSERT);
|
|
nad = numaCreate(0);
|
|
prevrow = row;
|
|
boxGetGeometry(box, NULL, &y, NULL, &h);
|
|
numaAddNumber(nad, y + h / 2);
|
|
}
|
|
boxGetGeometry(box, &x, NULL, &w, NULL);
|
|
numaAddNumber(nad, x);
|
|
numaAddNumber(nad, x + w - 1);
|
|
boxDestroy(&box);
|
|
}
|
|
numaaAddNuma(naa, nad, L_INSERT);
|
|
|
|
return naa;
|
|
}
|
|
|
|
|
|
/*!
|
|
* numaaCompareImagesByBoxes()
|
|
*
|
|
* Input: naa1 (for image 1, formatted by boxaExtractSortedPattern())
|
|
* naa2 (ditto; for image 2)
|
|
* nperline (number of box regions to be used in each textline)
|
|
* nreq (number of complete row matches required)
|
|
* maxshiftx (max allowed x shift between two patterns, in pixels)
|
|
* maxshifty (max allowed y shift between two patterns, in pixels)
|
|
* delx (max allowed difference in x data, after alignment)
|
|
* dely (max allowed difference in y data, after alignment)
|
|
* &same (<return> 1 if @nreq row matches are found; 0 otherwise)
|
|
* debugflag (1 for debug output)
|
|
* Return: 0 if OK, 1 on error
|
|
*
|
|
* Notes:
|
|
* (1) Each input numaa describes a set of sorted bounding boxes
|
|
* (sorted by textline and, within each textline, from
|
|
* left to right) in the images from which they are derived.
|
|
* See boxaExtractSortedPattern() for a description of the data
|
|
* format in each of the input numaa.
|
|
* (2) This function does an alignment between the input
|
|
* descriptions of bounding boxes for two images. The
|
|
* input parameter @nperline specifies the number of boxes
|
|
* to consider in each line when testing for a match, and
|
|
* @nreq is the required number of lines that must be well-aligned
|
|
* to get a match.
|
|
* (3) Testing by alignment has 3 steps:
|
|
* (a) Generating the location of word bounding boxes from the
|
|
* images (prior to calling this function).
|
|
* (b) Listing all possible pairs of aligned rows, based on
|
|
* tolerances in horizontal and vertical positions of
|
|
* the boxes. Specifically, all pairs of rows are enumerated
|
|
* whose first @nperline boxes can be brought into close
|
|
* alignment, based on the delx parameter for boxes in the
|
|
* line and within the overall the @maxshiftx and @maxshifty
|
|
* constraints.
|
|
* (c) Each pair, starting with the first, is used to search
|
|
* for a set of @nreq - 1 other pairs that can all be aligned
|
|
* with a difference in global translation of not more
|
|
* than (@delx, @dely).
|
|
*/
|
|
l_int32
|
|
numaaCompareImagesByBoxes(NUMAA *naa1,
|
|
NUMAA *naa2,
|
|
l_int32 nperline,
|
|
l_int32 nreq,
|
|
l_int32 maxshiftx,
|
|
l_int32 maxshifty,
|
|
l_int32 delx,
|
|
l_int32 dely,
|
|
l_int32 *psame,
|
|
l_int32 debugflag)
|
|
{
|
|
l_int32 n1, n2, i, j, nbox, y1, y2, xl1, xl2;
|
|
l_int32 shiftx, shifty, match;
|
|
l_int32 *line1, *line2; /* indicator for sufficient boxes in a line */
|
|
l_int32 *yloc1, *yloc2; /* arrays of y value for first box in a line */
|
|
l_int32 *xleft1, *xleft2; /* arrays of x value for left side of first box */
|
|
NUMA *na1, *na2, *nai1, *nai2, *nasx, *nasy;
|
|
|
|
PROCNAME("numaaCompareImagesByBoxes");
|
|
|
|
if (!psame)
|
|
return ERROR_INT("&same not defined", procName, 1);
|
|
*psame = 0;
|
|
if (!naa1)
|
|
return ERROR_INT("naa1 not defined", procName, 1);
|
|
if (!naa2)
|
|
return ERROR_INT("naa2 not defined", procName, 1);
|
|
if (nperline < 1)
|
|
return ERROR_INT("nperline < 1", procName, 1);
|
|
if (nreq < 1)
|
|
return ERROR_INT("nreq < 1", procName, 1);
|
|
|
|
n1 = numaaGetCount(naa1);
|
|
n2 = numaaGetCount(naa2);
|
|
if (n1 < nreq || n2 < nreq)
|
|
return 0;
|
|
|
|
/* Find the lines in naa1 and naa2 with sufficient boxes.
|
|
* Also, find the y-values for each of the lines, and the
|
|
* LH x-values of the first box in each line. */
|
|
line1 = (l_int32 *)CALLOC(n1, sizeof(l_int32));
|
|
line2 = (l_int32 *)CALLOC(n2, sizeof(l_int32));
|
|
yloc1 = (l_int32 *)CALLOC(n1, sizeof(l_int32));
|
|
yloc2 = (l_int32 *)CALLOC(n2, sizeof(l_int32));
|
|
xleft1 = (l_int32 *)CALLOC(n1, sizeof(l_int32));
|
|
xleft2 = (l_int32 *)CALLOC(n2, sizeof(l_int32));
|
|
for (i = 0; i < n1; i++) {
|
|
na1 = numaaGetNuma(naa1, i, L_CLONE);
|
|
numaGetIValue(na1, 0, yloc1 + i);
|
|
numaGetIValue(na1, 1, xleft1 + i);
|
|
nbox = (numaGetCount(na1) - 1) / 2;
|
|
if (nbox >= nperline)
|
|
line1[i] = 1;
|
|
numaDestroy(&na1);
|
|
}
|
|
for (i = 0; i < n2; i++) {
|
|
na2 = numaaGetNuma(naa2, i, L_CLONE);
|
|
numaGetIValue(na2, 0, yloc2 + i);
|
|
numaGetIValue(na2, 1, xleft2 + i);
|
|
nbox = (numaGetCount(na2) - 1) / 2;
|
|
if (nbox >= nperline)
|
|
line2[i] = 1;
|
|
numaDestroy(&na2);
|
|
}
|
|
|
|
/* Enumerate all possible line matches. A 'possible' line
|
|
* match is one where the x and y shifts for the first box
|
|
* in each line are within the maxshiftx and maxshifty
|
|
* constraints, and the left and right sides of the remaining
|
|
* (nperline - 1) successive boxes are within delx of each other.
|
|
* The result is a set of four numas giving parameters of
|
|
* each set of matching lines. */
|
|
nai1 = numaCreate(0); /* line index 1 of match */
|
|
nai2 = numaCreate(0); /* line index 2 of match */
|
|
nasx = numaCreate(0); /* shiftx for match */
|
|
nasy = numaCreate(0); /* shifty for match */
|
|
for (i = 0; i < n1; i++) {
|
|
if (line1[i] == 0) continue;
|
|
y1 = yloc1[i];
|
|
xl1 = xleft1[i];
|
|
na1 = numaaGetNuma(naa1, i, L_CLONE);
|
|
for (j = 0; j < n2; j++) {
|
|
if (line2[j] == 0) continue;
|
|
y2 = yloc2[j];
|
|
if (L_ABS(y1 - y2) > maxshifty) continue;
|
|
xl2 = xleft2[j];
|
|
if (L_ABS(xl1 - xl2) > maxshiftx) continue;
|
|
shiftx = xl1 - xl2; /* shift to add to x2 values */
|
|
shifty = y1 - y2; /* shift to add to y2 values */
|
|
na2 = numaaGetNuma(naa2, j, L_CLONE);
|
|
|
|
/* Now check if 'nperline' boxes in the two lines match */
|
|
match = testLineAlignmentX(na1, na2, shiftx, delx, nperline);
|
|
if (match) {
|
|
numaAddNumber(nai1, i);
|
|
numaAddNumber(nai2, j);
|
|
numaAddNumber(nasx, shiftx);
|
|
numaAddNumber(nasy, shifty);
|
|
}
|
|
numaDestroy(&na2);
|
|
}
|
|
numaDestroy(&na1);
|
|
}
|
|
|
|
/* Determine if there are a sufficient number of mutually
|
|
* aligned matches. Mutually aligned matches place an additional
|
|
* constraint on the 'possible' matches, where the relative
|
|
* shifts must not exceed the (delx, dely) distances. */
|
|
countAlignedMatches(nai1, nai2, nasx, nasy, n1, n2, delx, dely,
|
|
nreq, psame, debugflag);
|
|
|
|
FREE(line1);
|
|
FREE(line2);
|
|
FREE(yloc1);
|
|
FREE(yloc2);
|
|
FREE(xleft1);
|
|
FREE(xleft2);
|
|
numaDestroy(&nai1);
|
|
numaDestroy(&nai2);
|
|
numaDestroy(&nasx);
|
|
numaDestroy(&nasy);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static l_int32
|
|
testLineAlignmentX(NUMA *na1,
|
|
NUMA *na2,
|
|
l_int32 shiftx,
|
|
l_int32 delx,
|
|
l_int32 nperline)
|
|
{
|
|
l_int32 i, xl1, xr1, xl2, xr2, diffl, diffr;
|
|
|
|
PROCNAME("testLineAlignmentX");
|
|
|
|
if (!na1)
|
|
return ERROR_INT("na1 not defined", procName, 1);
|
|
if (!na2)
|
|
return ERROR_INT("na2 not defined", procName, 1);
|
|
|
|
for (i = 0; i < nperline; i++) {
|
|
numaGetIValue(na1, i + 1, &xl1);
|
|
numaGetIValue(na1, i + 2, &xr1);
|
|
numaGetIValue(na2, i + 1, &xl2);
|
|
numaGetIValue(na2, i + 2, &xr2);
|
|
diffl = L_ABS(xl1 - xl2 - shiftx);
|
|
diffr = L_ABS(xr1 - xr2 - shiftx);
|
|
if (diffl > delx || diffr > delx)
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
/*
|
|
* countAlignedMatches()
|
|
* Input: nai1, nai2 (numas of row pairs for matches)
|
|
* nasx, nasy (numas of x and y shifts for the matches)
|
|
* n1, n2 (number of rows in images 1 and 2)
|
|
* delx, dely (allowed difference in shifts of the match,
|
|
* compared to the reference match)
|
|
* nreq (number of required aligned matches)
|
|
* &same (<return> 1 if @nreq row matches are found; 0 otherwise)
|
|
* Return: 0 if OK, 1 on error
|
|
*
|
|
* Notes:
|
|
* (1) This takes 4 input arrays giving parameters of all the
|
|
* line matches. It looks for the maximum set of aligned
|
|
* matches (matches with approximately the same overall shifts)
|
|
* that do not use rows from either image more than once.
|
|
*/
|
|
static l_int32
|
|
countAlignedMatches(NUMA *nai1,
|
|
NUMA *nai2,
|
|
NUMA *nasx,
|
|
NUMA *nasy,
|
|
l_int32 n1,
|
|
l_int32 n2,
|
|
l_int32 delx,
|
|
l_int32 dely,
|
|
l_int32 nreq,
|
|
l_int32 *psame,
|
|
l_int32 debugflag)
|
|
{
|
|
l_int32 i, j, nm, shiftx, shifty, nmatch, diffx, diffy;
|
|
l_int32 *ia1, *ia2, *iasx, *iasy, *index1, *index2;
|
|
|
|
PROCNAME("countAlignedMatches");
|
|
|
|
if (!nai1 || !nai2 || !nasx || !nasy)
|
|
return ERROR_INT("4 input numas not defined", procName, 1);
|
|
if (!psame)
|
|
return ERROR_INT("&same not defined", procName, 1);
|
|
*psame = 0;
|
|
|
|
/* Check for sufficient aligned matches, doing a double iteration
|
|
* over the set of raw matches. The row index arrays
|
|
* are used to verify that the same rows in either image
|
|
* are not used in more than one match. Whenever there
|
|
* is a match that is properly aligned, those rows are
|
|
* marked in the index arrays. */
|
|
nm = numaGetCount(nai1); /* number of matches */
|
|
if (nm < nreq)
|
|
return 0;
|
|
|
|
ia1 = numaGetIArray(nai1);
|
|
ia2 = numaGetIArray(nai2);
|
|
iasx = numaGetIArray(nasx);
|
|
iasy = numaGetIArray(nasy);
|
|
index1 = (l_int32 *)CALLOC(n1, sizeof(l_int32)); /* keep track of rows */
|
|
index2 = (l_int32 *)CALLOC(n2, sizeof(l_int32));
|
|
for (i = 0; i < nm; i++) {
|
|
if (*psame == 1)
|
|
break;
|
|
|
|
/* Reset row index arrays */
|
|
memset(index1, 0, 4 * n1);
|
|
memset(index2, 0, 4 * n2);
|
|
nmatch = 1;
|
|
index1[ia1[i]] = nmatch; /* mark these rows as taken */
|
|
index2[ia2[i]] = nmatch;
|
|
shiftx = iasx[i]; /* reference shift between two rows */
|
|
shifty = iasy[i]; /* ditto */
|
|
if (nreq == 1) {
|
|
*psame = 1;
|
|
break;
|
|
}
|
|
for (j = 0; j < nm; j++) {
|
|
if (j == i) continue;
|
|
/* Rows must both be different from any previously seen */
|
|
if (index1[ia1[j]] > 0 || index2[ia2[j]] > 0) continue;
|
|
/* Check the shift for this match */
|
|
diffx = L_ABS(shiftx - iasx[j]);
|
|
diffy = L_ABS(shifty - iasy[j]);
|
|
if (diffx > delx || diffy > dely) continue;
|
|
/* We have a match */
|
|
nmatch++;
|
|
index1[ia1[j]] = nmatch; /* mark the rows */
|
|
index2[ia2[j]] = nmatch;
|
|
if (nmatch >= nreq) {
|
|
*psame = 1;
|
|
if (debugflag)
|
|
printRowIndices(index1, n1, index2, n2);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
FREE(ia1);
|
|
FREE(ia2);
|
|
FREE(iasx);
|
|
FREE(iasy);
|
|
FREE(index1);
|
|
FREE(index2);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void
|
|
printRowIndices(l_int32 *index1,
|
|
l_int32 n1,
|
|
l_int32 *index2,
|
|
l_int32 n2)
|
|
{
|
|
l_int32 i;
|
|
|
|
fprintf(stderr, "Index1: ");
|
|
for (i = 0; i < n1; i++) {
|
|
if (i && (i % 20 == 0))
|
|
fprintf(stderr, "\n ");
|
|
fprintf(stderr, "%3d", index1[i]);
|
|
}
|
|
fprintf(stderr, "\n");
|
|
|
|
fprintf(stderr, "Index2: ");
|
|
for (i = 0; i < n2; i++) {
|
|
if (i && (i % 20 == 0))
|
|
fprintf(stderr, "\n ");
|
|
fprintf(stderr, "%3d", index2[i]);
|
|
}
|
|
fprintf(stderr, "\n");
|
|
return;
|
|
}
|
|
|
|
|