xref: /OpenGrok/opengrok-indexer/src/test/resources/analysis/python/sample.py (revision eeb7e5b33d1bcc524fcc9d1d560447b044e286a4)
1*eeb7e5b3SAdam Hornáček# MIT License
2*eeb7e5b3SAdam Hornáček#
3*eeb7e5b3SAdam Hornáček# Copyright (c) 2017 OsciiArt
4*eeb7e5b3SAdam Hornáček#
5*eeb7e5b3SAdam Hornáček# Permission is hereby granted, free of charge, to any person obtaining a copy
6*eeb7e5b3SAdam Hornáček# of this software and associated documentation files (the "Software"), to deal
7*eeb7e5b3SAdam Hornáček# in the Software without restriction, including without limitation the rights
8*eeb7e5b3SAdam Hornáček# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9*eeb7e5b3SAdam Hornáček# copies of the Software, and to permit persons to whom the Software is
10*eeb7e5b3SAdam Hornáček# furnished to do so, subject to the following conditions:
11*eeb7e5b3SAdam Hornáček#
12*eeb7e5b3SAdam Hornáček# The above copyright notice and this permission notice shall be included in all
13*eeb7e5b3SAdam Hornáček# copies or substantial portions of the Software.
14*eeb7e5b3SAdam Hornáček#
15*eeb7e5b3SAdam Hornáček# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*eeb7e5b3SAdam Hornáček# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*eeb7e5b3SAdam Hornáček# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18*eeb7e5b3SAdam Hornáček# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*eeb7e5b3SAdam Hornáček# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20*eeb7e5b3SAdam Hornáček# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21*eeb7e5b3SAdam Hornáček# SOFTWARE.
22*eeb7e5b3SAdam Hornáček
23*eeb7e5b3SAdam Hornáček#!/usr/bin/env python
24*eeb7e5b3SAdam Hornáček# -*- coding: utf-8 -*-
25*eeb7e5b3SAdam Hornáček
26*eeb7e5b3SAdam Hornáčekfrom keras.models import model_from_json
27*eeb7e5b3SAdam Hornáčekimport numpy as np
28*eeb7e5b3SAdam Hornáčekimport pandas as pd
29*eeb7e5b3SAdam Hornáčekfrom PIL import Image
30*eeb7e5b3SAdam Hornáčekimport pickle
31*eeb7e5b3SAdam Hornáčekimport os
32*eeb7e5b3SAdam Hornáček
33*eeb7e5b3SAdam Hornáček
34*eeb7e5b3SAdam Hornáček# parameters
35*eeb7e5b3SAdam Hornáčekmodel_path = "model/model.json"
36*eeb7e5b3SAdam Hornáčekweight_path = "model/weight.hdf5"
37*eeb7e5b3SAdam Hornáčekimage_path = 'sample images/original images/21 original.png' # put the path of the image that you convert.
38*eeb7e5b3SAdam Hornáčeknew_width = 0 # adjust the width of the image. the original width is used if new_width = 0.
39*eeb7e5b3SAdam Hornáčekinput_shape = [64, 64, 1]
40*eeb7e5b3SAdam Hornáček
41*eeb7e5b3SAdam Hornáček
42*eeb7e5b3SAdam Hornáčekdef add_mergin(img, mergin):
43*eeb7e5b3SAdam Hornáček    if mergin!=0:
44*eeb7e5b3SAdam Hornáček        img_new = np.ones([img.shape[0] + 2 * mergin, img.shape[1] + 2 * mergin], dtype=np.uint8) * 255
45*eeb7e5b3SAdam Hornáček        img_new[mergin:-mergin, mergin:-mergin] = img
46*eeb7e5b3SAdam Hornáček    else:
47*eeb7e5b3SAdam Hornáček        img_new = img
48*eeb7e5b3SAdam Hornáček    return img_new
49*eeb7e5b3SAdam Hornáček
50*eeb7e5b3SAdam Hornáček
51*eeb7e5b3SAdam Hornáčekdef pickleload(path):
52*eeb7e5b3SAdam Hornáček    with open(path, mode='rb') as f:
53*eeb7e5b3SAdam Hornáček        data = pickle.load(f)
54*eeb7e5b3SAdam Hornáček    return data
55*eeb7e5b3SAdam Hornáček
56*eeb7e5b3SAdam Hornáček
57*eeb7e5b3SAdam Hornáček# load model
58*eeb7e5b3SAdam Hornáčekjson_string = open(model_path).read()
59*eeb7e5b3SAdam Hornáčekmodel = model_from_json(json_string)
60*eeb7e5b3SAdam Hornáčekmodel.load_weights(weight_path)
61*eeb7e5b3SAdam Hornáčekprint("model load done")
62*eeb7e5b3SAdam Hornáček
63*eeb7e5b3SAdam Hornáčekchar_list_path = "data/char_list.csv"
64*eeb7e5b3SAdam Hornáčekchar_list = pd.read_csv(char_list_path, encoding="cp932")
65*eeb7e5b3SAdam Hornáčekprint("len(char_list)", len(char_list))
66*eeb7e5b3SAdam Hornáček# print(char_list.head())
67*eeb7e5b3SAdam Hornáčekchar_list = char_list[char_list['frequency']>=10]
68*eeb7e5b3SAdam Hornáčekchar_list = char_list['char'].as_matrix()
69*eeb7e5b3SAdam Hornáček
70*eeb7e5b3SAdam Hornáčekfor k, v in enumerate(char_list):
71*eeb7e5b3SAdam Hornáček    if v==" ":
72*eeb7e5b3SAdam Hornáček        space = k
73*eeb7e5b3SAdam Hornáček        break
74*eeb7e5b3SAdam Hornáčekprint("class index of 1B space:", space)
75*eeb7e5b3SAdam Hornáček
76*eeb7e5b3SAdam Hornáček
77*eeb7e5b3SAdam Hornáčekmergin = (input_shape[0] - 18) // 2
78*eeb7e5b3SAdam Hornáčekimg = Image.open(image_path)
79*eeb7e5b3SAdam Hornáčekorig_width, orig_height = img.size
80*eeb7e5b3SAdam Hornáčekif new_width==0: new_width = orig_width
81*eeb7e5b3SAdam Hornáčeknew_height = int(img.size[1] * new_width / img.size[0])
82*eeb7e5b3SAdam Hornáčekimg = img.resize((new_width, new_height), Image.LANCZOS)
83*eeb7e5b3SAdam Hornáčekimg = np.array(img)
84*eeb7e5b3SAdam Hornáčekif len(img.shape) == 3:
85*eeb7e5b3SAdam Hornáček    img = img[:, :, 0]
86*eeb7e5b3SAdam Hornáček
87*eeb7e5b3SAdam Hornáčekimg_new = np.ones([img.shape[0]+2*mergin+18, img.shape[1]+2*mergin+18],
88*eeb7e5b3SAdam Hornáček                  dtype=np.uint8) * 255
89*eeb7e5b3SAdam Hornáčekimg_new[mergin:mergin+new_height, mergin:mergin+new_width] = img
90*eeb7e5b3SAdam Hornáčekimg = (img_new.astype(np.float32)) / 255
91*eeb7e5b3SAdam Hornáček
92*eeb7e5b3SAdam Hornáčekchar_dict_path = "data/char_dict.pkl"
93*eeb7e5b3SAdam Hornáčekchar_dict = pickleload(char_dict_path)
94*eeb7e5b3SAdam Hornáček
95*eeb7e5b3SAdam Hornáčekprint("len(char_dict)", len(char_dict))
96*eeb7e5b3SAdam Hornáček
97*eeb7e5b3SAdam Hornáčekoutput_dir = "output/"
98*eeb7e5b3SAdam Hornáčekif not os.path.isdir(output_dir):
99*eeb7e5b3SAdam Hornáček    os.makedirs(output_dir)
100*eeb7e5b3SAdam Hornáček
101*eeb7e5b3SAdam Hornáčekfor slide in range(18):
102*eeb7e5b3SAdam Hornáček    print("converting:", slide)
103*eeb7e5b3SAdam Hornáček    num_line = (img.shape[0] - input_shape[0]) // 18
104*eeb7e5b3SAdam Hornáček    img_width = img.shape[1]
105*eeb7e5b3SAdam Hornáček    new_line = np.ones([1, img_width])
106*eeb7e5b3SAdam Hornáček    img = np.concatenate([new_line, img], axis=0)
107*eeb7e5b3SAdam Hornáček    predicts = []
108*eeb7e5b3SAdam Hornáček    text = []
109*eeb7e5b3SAdam Hornáček    for h in range(num_line):
110*eeb7e5b3SAdam Hornáček        w = 0
111*eeb7e5b3SAdam Hornáček        penalty = 1
112*eeb7e5b3SAdam Hornáček        predict_line = []
113*eeb7e5b3SAdam Hornáček        text_line = ""
114*eeb7e5b3SAdam Hornáček        while w <= img_width - input_shape[1]:
115*eeb7e5b3SAdam Hornáček            input_img = img[h*18:h*18+ input_shape[0], w:w+input_shape[1]]
116*eeb7e5b3SAdam Hornáček            input_img = input_img.reshape([1,input_shape[0], input_shape[1], 1])
117*eeb7e5b3SAdam Hornáček            predict = model.predict(input_img)
118*eeb7e5b3SAdam Hornáček            if penalty: predict[0, space] = 0
119*eeb7e5b3SAdam Hornáček            predict = np.argmax(predict[0])
120*eeb7e5b3SAdam Hornáček            penalty = (predict==space)
121*eeb7e5b3SAdam Hornáček            char = char_list[predict]
122*eeb7e5b3SAdam Hornáček            predict_line.append(char)
123*eeb7e5b3SAdam Hornáček            char_width = char_dict[char].shape[1]
124*eeb7e5b3SAdam Hornáček            w += char_width
125*eeb7e5b3SAdam Hornáček            text_line += char
126*eeb7e5b3SAdam Hornáček        predicts.append(predict_line)
127*eeb7e5b3SAdam Hornáček        text.append(text_line+'\r\n')
128*eeb7e5b3SAdam Hornáček    # print(text)
129*eeb7e5b3SAdam Hornáček
130*eeb7e5b3SAdam Hornáček    img_aa = np.ones_like(img, dtype=np.uint8) * 0xFF
131*eeb7e5b3SAdam Hornáček
132*eeb7e5b3SAdam Hornáček    for h in range(num_line):
133*eeb7e5b3SAdam Hornáček        w = 0
134*eeb7e5b3SAdam Hornáček        for char in predicts[h]:
135*eeb7e5b3SAdam Hornáček            # print("w", w)
136*eeb7e5b3SAdam Hornáček            char_width = char_dict[char].shape[1]
137*eeb7e5b3SAdam Hornáček            char_img = 255 - char_dict[char].astype(np.uint8) * 255
138*eeb7e5b3SAdam Hornáček            img_aa[h*18:h*18+16, w:w+char_width] = char_img
139*eeb7e5b3SAdam Hornáček            w += char_width
140*eeb7e5b3SAdam Hornáček
141*eeb7e5b3SAdam Hornáček    img_aa = Image.fromarray(img_aa)
142*eeb7e5b3SAdam Hornáček    img_aa = img_aa.crop([0,slide,new_width, new_height+slide])
143*eeb7e5b3SAdam Hornáček    save_path = output_dir + os.path.basename(image_path)[:-4] + '_'\
144*eeb7e5b3SAdam Hornáček                + 'w' + str(new_width) \
145*eeb7e5b3SAdam Hornáček                + '_slide' + str(slide) + '.png'
146*eeb7e5b3SAdam Hornáček    img_aa.save(save_path)
147*eeb7e5b3SAdam Hornáček
148*eeb7e5b3SAdam Hornáček    f=open(save_path[:-4] + '.txt', 'w')
149*eeb7e5b3SAdam Hornáček    f.writelines(text)
150*eeb7e5b3SAdam Hornáček    f.close()
151*eeb7e5b3SAdam Hornáčekprint('http://example.com?a=')
152*eeb7e5b3SAdam Hornáčekprint('''http://example.com?a='b'&''')
153