Skip to content

Commit

Permalink
Modifications in Seraching
Browse files Browse the repository at this point in the history
  • Loading branch information
aniket22n committed Apr 18, 2022
1 parent 256ab55 commit b3502d9
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 48 deletions.
36 changes: 6 additions & 30 deletions program.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from cgitb import reset
from flask import Flask, jsonify, render_template, redirect, url_for, request, flash, json
# from flask_mongoengine import MongoEngine
from flask_pymongo import PyMongo
from numpy import imag, result_type
# from numpy import imag, result_type
from pymongo import MongoClient

from werkzeug.utils import secure_filename
Expand All @@ -22,19 +21,11 @@
PDFSyntaxError
)

from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate

from elt import translit
import transliterate

import transliterate

from fuzzywuzzy import fuzz
from fuzzywuzzy import process
# from thefuzz import fuzz, process
# import easyocr
# import cv2 as cv
#import urllib.request

program = Flask(__name__) #program is a variable that contains a website and load in memory
program.secret_key = "key" #If app.secret_key isn't set, Flask will not allow you to set or access the session dictionary.
Expand All @@ -46,9 +37,8 @@
# }
# db = MongoEngine() #Initialization of MongoEngine object
# db.init_app(program)

program.config["MONGO_URI"] = "mongodb://localhost:27017/MyDB"
# mongodb_client = PyMongo(program)
# db = mongodb_client.db
client = MongoClient()
db = client.MyDB #database MyDB
collection = db.user #inside MyDB, a collection called user
Expand All @@ -62,9 +52,6 @@
def allowed_file(filename): # This function is used to check file extenstion
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

# class User(db.Document): #database
# file_name = db.StringField()

#specifying the path of tesseract / it can also be dont using environment variable
pt.pytesseract.tesseract_cmd = r'C:\Users\ap888\Desktop\Internship CLIDE\Transliteration_flask\env\Tesseract.exe'

Expand All @@ -76,24 +63,12 @@ def index():
@program.route('/upload', methods=['POST']) #upload page
def upload():
file = request.files['inputFile']
#rs_username = request.form['txtusername']
filename = secure_filename(file.filename) #secure_filename to make sure data can't be forged

if file and allowed_file(file.filename): #file shouldn't empty and shoudn't be any other extension
file.save(os.path.join(program.config['UPLOAD_FOLDER'], filename)) #save() method to save file on location
# usersave = User(file_name=file.filename) #calling user class
# usersave.save() #to save file name in db

if '.' in filename and filename.rsplit('.', 1)[1].lower() == "pdf": #checking whether the file is pdf or not
# f = open("C:\\Users\\ap888\\Desktop\\Internship CLIDE\\Transliteration_flask\\static\\saved_files\\" + file.filename,'rb') #pdf files with images are not allowed
# Pdfreader = PyPDF2.PdfFileReader(f) #pdf reader object
# for i in range(0, Pdfreader.getNumPages()):
# page = Pdfreader.getPage(i)
# image = convert_from_path(page)
# flash(image)
# text = text + page.extractText()
# db.user.insert_one({'file_name': file.filename, 'content' : text})
# f.close()
#poppler_path = r'C:\Program Files\poppler-0.68.0\bin'
images = convert_from_path("C:\\Users\\ap888\\Desktop\\Internship CLIDE\\Transliteration_flask\\static\\saved_files\\" + file.filename)
text = " "
for i in range(len(images)):
Expand All @@ -115,6 +90,7 @@ def upload():
flash(file.filename + ' is successfully uploaded to the database!') # flash message
# file_names = db.user.find()
return redirect('/') # redirect of main page

else:
flash('Invalid Uplaod only txt, pdf, png, jpg, jpeg, gif') #flash message,if condition not satisfied
return redirect('/')
Expand Down Expand Up @@ -160,7 +136,7 @@ def temp(search):
if best_sub_res:
sort_best_sub_res = sorted(best_sub_res.items(), key=lambda x: x[1], reverse=True)
accuracy = fuzz.partial_ratio(sort_best_sub_res[0][0],search)
if accuracy > 40:
if accuracy >= 25:
result[doc["file_name"]] = accuracy
matching_content[doc["file_name"]] = sort_best_sub_res[0][0]
sort_result = sorted(result.items(), key=lambda x: x[1], reverse=True)
Expand Down
6 changes: 5 additions & 1 deletion static/CSS/HomePage.css
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,12 @@ body {

.tran{
color: white;

}

.tran2{
color: rgb(220, 131, 131);
}

.flash{
color: beige;
}
Expand Down
44 changes: 27 additions & 17 deletions templates/result.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,42 @@
{% block title %} Result {% endblock %}
{% block content %}

<p><h1 class="tran">Hindi Transliteration : {{searching_for }}</h1></p>
<p>
<h1 class="tran">Hindi Transliteration : {{searching_for }}</h1>
</p>
<br>
<br>

{% if number%}
<h1 class="tran">{{number}} Files found... </h1>
<br>

<table>
<tr>
<th>Serial number</th>
<th>File Name</th>
<th>Matching</th>
<th>Matching Content</th>
</tr>
{% for key, value in file_names %}
<tr>
<td>{{loop.index}}</td> <!-- Serial Number -->
<td>{{key}}</td> <!-- file name -->
<td>{{value}} %</td> <!-- matching accuracy -->
{% if value >= 50 %}
<td>{{content[key]}}</td> <!-- matching content -->
<th>Serial number</th>
<th>File Name</th>
<th>Matching</th>
<th>Matching Content</th>
</tr>
{% for key, value in file_names %}
<tr>
<td>{{loop.index}}</td> <!-- Serial Number -->
<td>{{key}}</td> <!-- file name -->
<td>{{value}} %</td> <!-- matching accuracy -->
{% if value >= 70 %}
<td>{{content[key]}}</td> <!-- matching content -->
{% else %}
<td>----</td>
<td>----</td>
{% endif %}
</tr>
{% endfor %}
</tr>
{% endfor %}
</table>

{% else %}
<br>
<br>
<br>
<br>
<h1 class="tran2">No files matching the text pattern </h1>
{% endif %}

{% endblock %}

0 comments on commit b3502d9

Please sign in to comment.