-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultifunction_tools.py
159 lines (134 loc) · 6.03 KB
/
multifunction_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Multifunction Tools Version 2, Author: Marco Anian and Review by Nova Astra
#---------------------Doc--------------------------#
"""
📄 MultiOCR AI Tools – Version 2
Author: Marco Anian | Review & AI Support: Nova Astra
This module provides core functionality for image-based OCR, AI-powered image descriptions,
and summarization tools using Tesseract and Ollama-compatible models like Mistral and LLaVA.
Features:
- Webcam capture
- Web image downloader
- Tesseract OCR text extraction
- AI-powered image analysis and summarization
- Plain text export
Use this as a backend utility inside the GUI shell (ocr_shell_gui.py), or as a standalone helper in future projects.
Dependencies:
pip install ollama
pip install opencv-python
pip install pytesseract
"""
#------------------Imports-------------------------#
import ollama
from tkinter import filedialog, messagebox
import requests # for loading Image from web
from PIL import Image, ImageTk
import pytesseract # Need be also installed on windows
import cv2
#----------------Constants-------------------------#
IMAGE_PATH = "assets/image/"
#--------------Class-------------------------------#
class MultiOCRAiTools:
def __init__(self,master=None):
# Init Section / if come
self.master = master
# ---------------Method / Function------------------------#
#---------------------Get Image From multiple Sources-----#
def read_image_from_web(self, image_link, image_name):
"""Download Image from web and save it in Asset folder """
url = image_link # Image link will be past in the Gui to here
image_name_by_user = image_name
try:
response = requests.get(url)
with open(f"{IMAGE_PATH}{image_name_by_user}.jpg", "wb") as file:
data = file.write(response.content)
print(f"Data Downloaded to: {IMAGE_PATH}")
return data
except Exception as e:
messagebox.showerror("Download Error", f"{e}\nCheck the image link.")
def extract_text_from_image(self, file_path,lang):
"""Using Tesseract OCR to extract text from Image
lang = deu, eng, jpn, esp, chi_sim, chi_tra, fas """
# Very Important for Tesseract, if not inside, will get Error (DO NOT CHANGE)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = Image.open(file_path) # To open image path
text = pytesseract.image_to_string(image, lang=lang) # Main Process
return text
def live_web_cam_image(self):
"""Using CV2 for live mode, capture Image and save it"""
cap = cv2.VideoCapture(0) # init / open camera / 0 = main camera
# Error handling
if not cap.isOpened():
print("❌ Cannot open camera.")
return False # Indicate failure
print("🎥 Camera initialized. Press 's' to analyze frame or 'q' to exit.")
messagebox.showinfo(message="🎥 Camera initialized.\n Press 's' to analyze frame\n or 'q' to exit.")
n = 0
ext = "jpg" # image format
while True:
ret, frame = cap.read()
if not ret:
print("⚠️ Can't receive frame (stream ended?). Exiting...")
break
# Display live video feed
cv2.imshow('Live Feed', frame)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break # Quit program
elif key == ord('s'):
print("\n📸 Capturing and processing image with AI... 🧠💡")
cv2.imwrite(f"{IMAGE_PATH}image_{n}.{ext}", frame)
n += 1 # for other image name will be +1
print("saved!!!")
# Cleanup
cap.release()
cv2.destroyAllWindows()
print("✅ Camera closed.")
return True
#-------------------Ai Section-------------------------------------#
def image_to_text_description(self,image_path):
"""This is for Scrap class --> Scrape Image func"""
response = ollama.chat(
model="llava:13b",
messages=[
{
"role": "user",
"content": "Describe this image in detail and extract any readable data.",
"images": [image_path]
}
]
)
description = response['message']['content']
text_format = f"Image analyze: {description}"
return {"role": "assistant", "content": text_format} # ✅ return ready-to-use format
def summarize_text(self, text_description):
"""This is for Scrap class --> Summarize func"""
system_msg = (
"You are an AI summarizer. Read the following text carefully and summarize it using the same language as the input. "
"Do not translate. If the text is in English, the summary must also be in English, If the text is in German, the summary must also be in German. "
"Focus on key points and important content only. Use bullet points or short paragraphs."
)
response = ollama.chat(
model="mistral:latest", # or "gemma3:12b" for softer tone
messages=[
{"role": "system", "content": system_msg},
{"role": "user", "content": text_description}
]
)
return response["message"]["content"]
def save_text(self, data):
"""This function is to write AI respond to text"""
path_file = filedialog.asksaveasfilename(defaultextension="*.txt")
try:
with open(path_file,"w", encoding="utf-8") as f:
f.write(data)
messagebox.showinfo(message=f"file saved{path_file}")
except Exception as e:
messagebox.showinfo(message=f"{e},uppsss!!\nsomething went wrong :/")
# Test Run
if __name__=="__main__":
tools = MultiOCRAiTools()
#tools.live_web_cam_image()
path = filedialog.askopenfilename()
desc = tools.extract_text_from_image(path, "deu")
summa_res = tools.summarize_text(desc)
print(summa_res)