-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add image/pdf to text conversion system
- Loading branch information
1 parent
17700cf
commit c2cfcd8
Showing
9 changed files
with
313 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,116 @@ | ||
<?php | ||
namespace App\Controllers; | ||
|
||
use Spatie\PdfToImage\Pdf; | ||
use thiagoalessio\TesseractOCR\TesseractOCR; | ||
|
||
class HomeController extends Controllers | ||
{ | ||
public function index() | ||
{ | ||
return view('welcome'); | ||
} | ||
|
||
public function process() { | ||
$message = ''; | ||
$text = ''; | ||
$startPage = 20; | ||
$endPage = 0; | ||
$language = ''; | ||
$book = ''; | ||
|
||
if (isset($_POST['startPage'])) { | ||
$startPage = (int)$_POST['startPage']; | ||
} | ||
|
||
if (isset($_POST['endPage'])) { | ||
$endPage = (int)$_POST['endPage']; | ||
} | ||
|
||
if (isset($_POST['language'])) { | ||
$language = $_POST['language']; | ||
} | ||
|
||
if (isset($_FILES['book'])) { | ||
$book = $_FILES['book']; | ||
|
||
if ($book['error'] === UPLOAD_ERR_OK) { | ||
$fileExtension = pathinfo($book['name'], PATHINFO_EXTENSION); | ||
$filePath = 'books/' . basename($book['name']); | ||
|
||
if (move_uploaded_file($book['tmp_name'], $filePath)) { | ||
$message = 'File uploaded successfully'; | ||
|
||
if ($fileExtension === 'pdf') { | ||
$this->processPdf($filePath, $startPage, $endPage, $language, $text, $message); | ||
} else { | ||
$this->processImage($filePath, $language, $text, $message); | ||
} | ||
} else { | ||
$message = 'Failed to upload file'; | ||
} | ||
} else { | ||
$message = 'No file uploaded or upload error'; | ||
} | ||
} | ||
|
||
return view('welcome', [ | ||
'message' => $message, | ||
'text' => $text, | ||
'startPage' => $startPage, | ||
'endPage' => $endPage, | ||
'language' => $language, | ||
'book' => $book, | ||
'is_speechable' => true | ||
]); | ||
} | ||
|
||
private function processImage($filePath, $language, &$text, &$message) { | ||
try { | ||
$ocr = new TesseractOCR($filePath); | ||
$text = $ocr->lang($language)->run(); | ||
|
||
$message = 'Image processing completed successfully.'; | ||
|
||
unlink($filePath); | ||
} catch (\Exception $e) { | ||
error_log('OCR Processing Failed: ' . $e->getMessage()); | ||
$text = 'Error: ' . $e->getMessage(); | ||
$message = 'Image processing failed'; | ||
} | ||
} | ||
|
||
private function processPdf($filePath, $startPage, $endPage, $language, &$text, &$message) { | ||
try { | ||
$pdf = new Pdf($filePath); | ||
$pages = $pdf->pageCount(); | ||
$text = ''; | ||
|
||
if (($endPage > 0) && ($endPage <= $pages)) { | ||
$pages = $endPage; | ||
} | ||
|
||
if ($startPage > $pages) { | ||
$message = "Start page exceeds total pages in the PDF."; | ||
return; | ||
} | ||
|
||
for ($page = $startPage; $page <= $pages; $page++) { | ||
$imagePath = 'books/page_' . $page . '.jpg'; | ||
$pdf->selectPage($page)->save($imagePath); | ||
|
||
$ocr = new TesseractOCR($imagePath); | ||
$text .= $ocr->lang($language)->run(); | ||
|
||
unlink($imagePath); | ||
} | ||
|
||
$message = 'PDF processing completed successfully.'; | ||
unlink($filePath); | ||
} catch (\Exception $e) { | ||
error_log('PDF Processing Failed: ' . $e->getMessage()); | ||
$text = 'Error: ' . $e->getMessage(); | ||
$message = 'PDF processing failed'; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<div class="text-center"> | ||
<div class="flex items-center p-4 mb-4 text-sm text-green-800 border border-green-300 rounded-lg bg-green-50 dark:bg-gray-800 dark:text-green-400 dark:border-green-800" role="alert"> | ||
<svg class="flex-shrink-0 inline w-4 h-4 me-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="currentColor" viewBox="0 0 20 20"> | ||
<path d="M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5ZM9.5 4a1.5 1.5 0 1 1 0 3 1.5 1.5 0 0 1 0-3ZM12 15H8a1 1 0 0 1 0-2h1v-3H8a1 1 0 0 1 0-2h2a1 1 0 0 1 1 1v4h1a1 1 0 0 1 0 2Z"/> | ||
</svg> | ||
<span class="sr-only">Info</span> | ||
<div class="text-lg"> | ||
<?= $message ?> | ||
</div> | ||
</div> | ||
</div> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
<div class="grid grid-cols-1 md:grid-cols-2 md:gap-10"> | ||
<div> | ||
<form action="/"method="post" enctype="multipart/form-data"> | ||
<div class="mb-5"> | ||
<label for="email" class="block mb-2 text-sm font-medium text-gray-900 dark:text-white">Start Page ( Fill it if the doc is contain pdf image )</label> | ||
<input name="startPage" type="number" id="email" class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500" placeholder="20(Default)" /> | ||
</div> | ||
<div class="mb-5"> | ||
<label for="email" class="block mb-2 text-sm font-medium text-gray-900 dark:text-white">End Page ( Fill it if the doc is contain pdf image )</label> | ||
<input name="endPage" type="number" id="email" class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500" placeholder="Full Pdf (Default)" /> | ||
</div> | ||
<div class="mb-5"> | ||
<label for="countries" class="block text-sm font-medium text-gray-900 dark:text-white">Select an language</label> | ||
<select name="language" id="countries" class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500"> | ||
<option selected>Choose a language</option> | ||
<option value="ben">Bangla</option> | ||
<option value="ara">Arabic</option> | ||
<option value="eng">English</option> | ||
</select> | ||
</div> | ||
<div class="flex items-center justify-center w-full"> | ||
<label for="dropzone-file" class="flex flex-col items-center justify-center w-full h-64 border-2 border-gray-300 border-dashed rounded-lg cursor-pointer bg-gray-50 dark:hover:bg-gray-800 dark:bg-gray-700 hover:bg-gray-100 dark:border-gray-600 dark:hover:border-gray-500 dark:hover:bg-gray-600"> | ||
<div class="flex flex-col items-center justify-center pt-5 pb-6"> | ||
<svg class="w-8 h-8 mb-4 text-gray-500 dark:text-gray-400" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 20 16"> | ||
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 13h3a3 3 0 0 0 0-6h-.025A5.56 5.56 0 0 0 16 6.5 5.5 5.5 0 0 0 5.207 5.021C5.137 5.017 5.071 5 5 5a4 4 0 0 0 0 8h2.167M10 15V6m0 0L8 8m2-2 2 2"/> | ||
</svg> | ||
<p class="mb-2 text-sm text-gray-500 dark:text-gray-400"><span class="font-semibold">Click to upload</span> or drag and drop</p> | ||
<p class="text-xs text-gray-500 dark:text-gray-400">PNG, JPG or PDF)</p> | ||
</div> | ||
<input id="dropzone-file" type="file" name="book" /> | ||
</label> | ||
</div> | ||
<div class="text-center my-3"> | ||
<input type="submit" value="Upload File" class="hover:cursor-pointer text-white bg-gradient-to-r from-purple-500 to-pink-500 hover:bg-gradient-to-l focus:ring-4 focus:outline-none focus:ring-purple-200 dark:focus:ring-purple-800 font-medium rounded-lg text-sm px-5 py-2.5 text-center me-2 mb-2"/> | ||
</div> | ||
</form> | ||
</div> | ||
<div> | ||
<textarea id="inputText" rows="25" class="block p-2.5 w-full text-sm text-gray-900 bg-gray-50 rounded-lg border border-gray-300 focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500" placeholder="Write your thoughts here..."><?php echo $text; ?></textarea> | ||
</div> | ||
</div> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<?php | ||
$lang = "en"; | ||
$converted = ""; | ||
// dd(urlencode($text)); | ||
$mp3 = 'http://translate.google.com/translate_tts?ie=UTF-8&q='. urlencode($text) .'&tl='. $lang .'&total=1&idx=0&textlen=5&prev=input'; | ||
?> | ||
|
||
<a href="<?= $mp3 ?>" target="_blank">Download Speech</a> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters