forked from hrwhisper/Ucas_course_ppt_auto_download
-
Notifications
You must be signed in to change notification settings - Fork 2
/
MyOCR.py
43 lines (34 loc) · 1.14 KB
/
MyOCR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# -*- coding: utf-8 -*-
# @Date : 2017/2/1
# @Author : hrwhisper
import os
from sys import exit
import re
import subprocess
from PIL import Image
devnull = open(os.devnull, 'w')
cut_size = 1
def pre_process(func):
def _wrapper(filename):
image = Image.open(filename).point(lambda p: 255 if p > 127 else 0).convert("1")
w, h = image.size
image = image.crop((cut_size, cut_size, w - cut_size, h - cut_size))
save_name = filename # + '1.jpg'
image.save(save_name)
try:
res = func(save_name)
os.remove(save_name)
return res
except FileNotFoundError:
print('请检查是否安装tesseract-OCR')
os.remove(save_name)
os.system("pause")
exit(1)
return _wrapper
@pre_process
def image_to_string(img):
res = subprocess.check_output('tesseract ' + img + ' stdout', stderr=devnull).decode() # tesseract a.png result
return (re.subn('\W', '', res.strip()) if res else ('', ''))[0].lower()
if __name__ == '__main__':
print(image_to_string('ucas_code1.jpg'))
print(image_to_string('ucas_code2.jpg'))