Skip to content

Commit

Permalink
Removed comments
Browse files Browse the repository at this point in the history
  • Loading branch information
aniket22n committed Apr 27, 2022
1 parent 206f5ca commit 2a8f349
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 50 deletions.
49 changes: 17 additions & 32 deletions program.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,25 @@
from flask import Flask, jsonify, render_template, redirect, url_for, request, flash, json
# from flask_mongoengine import MongoEngine
from flask_pymongo import PyMongo
# from numpy import imag, result_type
from pymongo import MongoClient

from werkzeug.utils import secure_filename
import os

import PIL
from PIL import Image, ImageDraw
# import spacy

import pytesseract as pt

from pdf2image import convert_from_path, convert_from_bytes
from pdf2image.exceptions import (
PDFInfoNotInstalledError,
PDFPageCountError,
PDFSyntaxError
)

import transliterate

from fuzzywuzzy import fuzz
from fuzzywuzzy import process
# from thefuzz import fuzz, process

program = Flask(__name__) #program is a variable that contains a website and load in memory
program.secret_key = "key" #If app.secret_key isn't set, Flask will not allow you to set or access the session dictionary.

# program.config['MONGODB_SETTINGS'] = {
# 'db': 'MyDB',
# 'host': 'localhost',
# 'port': 27017
# }
# db = MongoEngine() #Initialization of MongoEngine object
# db.init_app(program)

#Database connection
program.config["MONGO_URI"] = "mongodb://localhost:27017/MyDB"
client = MongoClient()
db = client.MyDB #database MyDB
Expand Down Expand Up @@ -66,44 +49,43 @@ def upload():

if file and allowed_file(file.filename): #file shouldn't empty and shoudn't be any other extension
file.save(os.path.join(program.config['UPLOAD_FOLDER'], filename)) #save() method to save file on location
file_address = os.getcwd() + "\\static\\saved_files\\" + file.filename #stored file address

if '.' in filename and filename.rsplit('.', 1)[1].lower() == "pdf": #checking whether the file is pdf or not
images = convert_from_path("C:\\Users\\ap888\\Desktop\\Internship CLIDE\\Transliteration_flask\\static\\saved_files\\" + file.filename)
images = convert_from_path(file_address)
text = " "
for i in range(len(images)):
text = text + pt.image_to_string(images[i], lang="hin")
db.user.insert_one({'file_name': file.filename, 'content' : text})

elif '.' in filename and filename.rsplit('.', 1)[1].lower() == "txt":
f = open("C:\\Users\\ap888\\Desktop\\Internship CLIDE\\Transliteration_flask\\static\\saved_files\\" + file.filename,'r',encoding = 'utf-8') #if the file is text file
f = open(file_address,'r',encoding = 'utf-8') #if the file is text file
text = f.read() #read the entire content, it tet should be UTF-8 text
db.user.insert_one({'file_name': file.filename, 'content' : text}) #insert content into database
f.close()

else: # if file is image file
img = Image.open("C:\\Users\\ap888\\Desktop\\Internship CLIDE\\Transliteration_flask\\static\\saved_files\\" + file.filename)
img = Image.open(file_address)
text = pt.image_to_string(img, lang="hin") #only for hindi language
db.user.insert_one({'file_name': file.filename, 'content' : text})

# db.user.insert(text_file_doc)
flash(file.filename + ' is successfully uploaded to the database!') # flash message
# file_names = db.user.find()
return redirect('/') # redirect of main page

else:
flash('Invalid Uplaod only txt, pdf, png, jpg, jpeg, gif') #flash message,if condition not satisfied

return redirect('/')


# This fucntion is used to get input value from user
@program.route("/search", methods=["POST", "GET"])
def search():
#Can be found by inspecting http response of google input tools page
lang = {'hindi':"hi-t-i0-und"}
lang = {'hindi':"hi-t-i0-und"}#Transliteration Can be found by inspecting http response of google input tools page
if request.method == "POST":
user_serach = request.form["user_search"]
output = transliterate.transliteration(user_serach.strip(), lang['hindi'])
return redirect(url_for("transliteration_search", search = output))
output = transliterate.transliteration(user_serach.strip(), lang['hindi'])#Transliteration of input string
return redirect(url_for("transliteration_search", search = output))#redirecting o transliteration_serach funcction with transliteration
else:
return redirect('/')

Expand All @@ -120,24 +102,27 @@ def transliteration_search(search):
for x in doc_content:
if len(x) < len(search):
doc_content.remove(x)
res = process.extract(search, doc_content,scorer=fuzz.partial_ratio)
res = process.extract(search, doc_content,scorer=fuzz.partial_ratio) #Fuzzywuzzy to extract all matching strings from file

for x in res:
if len(x[0]) < len(search):
res.remove(x)
best_sub_res = {}
for sub_res in res:

for sub_res in res: #Finding best matching string among all
count = 0
for x in search_list:
if x in sub_res[0]:
count += 1
if count >= len(search_list) - (len(search_list) // 2):
if count >= len(search_list) - (len(search_list) // 2): #string matching score should be greater than 50%
best_sub_res[sub_res[0]] = count

if best_sub_res:
sort_best_sub_res = sorted(best_sub_res.items(), key=lambda x: x[1], reverse=True)
accuracy = fuzz.partial_ratio(sort_best_sub_res[0][0],search)
if accuracy >= 25:
result[doc["file_name"]] = accuracy
matching_content[doc["file_name"]] = sort_best_sub_res[0][0]
result[doc["file_name"]] = accuracy #storing file name and matching accuracy in dict
matching_content[doc["file_name"]] = sort_best_sub_res[0][0] #storing file name and matching string in dict
sort_result = sorted(result.items(), key=lambda x: x[1], reverse=True)
return render_template('result.html', file_names = sort_result, searching_for = search, number=len(sort_result), content = matching_content)

Expand Down
18 changes: 0 additions & 18 deletions templates/upload.html
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,5 @@ <h1 class="ins">Please search here...</h1>
<input type="submit" value="Search" class="btn btn-primary"/>
</form>
</div>
<!-- resulted files -->
<!-- {%if searching_for%}
<h1 class="flash">Hindi Transliteration : {{searching_for }}</h1>

{% for key, value in file_names %}
<h1 class="flash"> {{key}} : {{value}}% matching</h1>
{% endfor %}
{%endif %} -->

<!-- on change event for file is removed -->
<!-- <script>
var loadFile = function(event) {
var output = document.getElementById('output');
output.src = URL.createObjectURL(event.target.files[0]);
output.onload = function() {
URL.revokeObjectURL(output.src)
}
};
</script> -->
{% endblock %}

0 comments on commit 2a8f349

Please sign in to comment.