Catmandu-HOCR
view release on metacpan or search on metacpan
t/html/strong.html view on Meta::CPAN
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>
</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name='ocr-system' content='tesseract 3.04.00' />
<meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word'/>
</head>
<body>
<div class='ocr_page' id='page_1' title='image "out.tif"; bbox 0 0 2737 3820; ppageno 0'>
<div class='ocr_carea' id='block_1_3' title="bbox 204 292 947 2304">
<p class='ocr_par' dir='ltr' id='par_1_3' title="bbox 208 292 947 404">
<span class='ocr_line' id='line_1_3' title="bbox 209 292 947 324; baseline -0.005 -6"><span class='ocrx_word' id='word_1_3' title='bbox 209 295 389 318; x_wconf 82' lang='nld' dir='ltr'><strong>stormsoldaten</strong></span> <span class='ocrx_wor...
</span>
<span class='ocr_line' id='line_1_4' title="bbox 208 333 492 356; baseline -0.007 0"><span class='ocrx_word' id='word_1_10' title='bbox 208 333 252 356; x_wconf 79' lang='nld' dir='ltr'><strong>wel</strong></span> <span class='ocrx_word' id='wor...
</span>
<span class='ocr_line' id='line_1_5' title="bbox 290 375 865 404; baseline -0.005 -4"><span class='ocrx_word' id='word_1_14' title='bbox 290 377 438 404; x_wconf 71' lang='nld' dir='ltr'><strong>Belgische</strong></span> <span class='ocrx_word' ...
</span>
</p>
</div>
</div>
</body>
</html>
( run in 0.713 second using v1.01-cache-2.11-cpan-39bf76dae61 )