-
Notifications
You must be signed in to change notification settings - Fork 0
/
pseg-to-lineimage
executable file
·100 lines (86 loc) · 3.04 KB
/
pseg-to-lineimage
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/python
from pylab import *
import os
import os.path
import argparse
import numpy as np
import scipy.ndimage as ndi
import simplejson
import PIL
import sqlite3
import StringIO
from ocropy2 import CenterNormalizer
rc("image", cmap="gray")
parser = argparse.ArgumentParser("""
Extract textlines from document images and store them in a database for training.
Example:
`pseg-extract -o mylines.db *.json`
For each input image, there should be three files:
- image.png -- the grayscale document image (black on white)
- image.pseg.png -- the page segmentation image
- image.json -- the textual ground truth, bounding boxes, and pseg values
""")
parser.add_argument("-d", "--display", action="store_true")
parser.add_argument("-D", "--dilate", type=int, default=3)
parser.add_argument("inputs", nargs="*")
args = parser.parse_args()
def pildumps(image, format="PNG"):
"""Compress an image and return it as a string."""
result = StringIO.StringIO()
if image.dtype in [np.dtype('f'), np.dtype('d')]:
assert np.amin(image) > -0.001 and np.amax(image) < 1.001
image = np.clip(image, 0.0, 1.0)
image = np.array(image * 255.0, 'uint8')
PIL.Image.fromarray(image).save(result, format=format)
return result.getvalue()
def grow_bbox(bbox, shape, r=10):
w, h = shape
x0, y0, x1, y1 = bbox
x0 = max(x0-r, 0)
y0 = max(y0-r, 0)
x1 = min(x1+r, w)
y1 = min(y1+r, h)
return (x0, y0, x1, y1)
def pack_rgb(image):
image = array(image, 'i')
return (image[:,:,0]<<16) | (image[:,:,1]<<8) | image[:,:,2]
def process_image(fname):
basename, _ = os.path.splitext(fname)
assert os.path.exists(basename+".png")
assert os.path.exists(basename+".json")
assert os.path.exists(basename+".pseg.png")
segmentation = simplejson.load(open(basename+".json"))
image = imread(basename+".png")
print basename, image.shape
image = amax(image) - image
image /= amax(image)
pseg = pack_rgb(PIL.Image.open(basename+".pseg.png"))
line_image = zeros(pseg.shape)
h, w = pseg.shape
cn = CenterNormalizer()
for line in segmentation["lines"]:
bbox0 = line["bbox"]
x0, y0, x1, y1 = grow_bbox(bbox0, (w, h), 10)
text = (line.get("text") or line.get("transcript"))
assert text is not None
value = line["pseg"]
mask = (value == pseg[y0:y1, x0:x1])
mask = ndi.maximum_filter(mask, 5)
limage = image[y0:y1, x0:x1]
masked_image = mask * limage
cn.measure(masked_image)
cimage = zeros(masked_image.shape)
for i, j in enumerate(cn.center):
cimage[j, i] = 1.0
line_image[y0:y1, x0:x1] = cimage
if args.dilate > 0:
# line_image = ndi.maximum_filter(line_image, args.dilate)
line_image = ndi.binary_dilation(line_image, iterations=args.dilate)
if args.display:
ion()
subplot(121); imshow(image)
subplot(122); imshow(line_image);
ginput(1, 1.0)
imsave(basename+".lines.png", line_image)
for fname in args.inputs:
process_image(fname)