Skip to content

Commit

Permalink
Add image/pdf to text conversion system
Browse files Browse the repository at this point in the history
  • Loading branch information
Jakiur1234 committed Dec 27, 2024
1 parent 17700cf commit c2cfcd8
Show file tree
Hide file tree
Showing 9 changed files with 313 additions and 4 deletions.
106 changes: 106 additions & 0 deletions app/Controllers/HomeController.php
Original file line number Diff line number Diff line change
@@ -1,10 +1,116 @@
<?php
namespace App\Controllers;

use Spatie\PdfToImage\Pdf;
use thiagoalessio\TesseractOCR\TesseractOCR;

class HomeController extends Controllers
{
public function index()
{
return view('welcome');
}

public function process() {
$message = '';
$text = '';
$startPage = 20;
$endPage = 0;
$language = '';
$book = '';

if (isset($_POST['startPage'])) {
$startPage = (int)$_POST['startPage'];
}

if (isset($_POST['endPage'])) {
$endPage = (int)$_POST['endPage'];
}

if (isset($_POST['language'])) {
$language = $_POST['language'];
}

if (isset($_FILES['book'])) {
$book = $_FILES['book'];

if ($book['error'] === UPLOAD_ERR_OK) {
$fileExtension = pathinfo($book['name'], PATHINFO_EXTENSION);
$filePath = 'books/' . basename($book['name']);

if (move_uploaded_file($book['tmp_name'], $filePath)) {
$message = 'File uploaded successfully';

if ($fileExtension === 'pdf') {
$this->processPdf($filePath, $startPage, $endPage, $language, $text, $message);
} else {
$this->processImage($filePath, $language, $text, $message);
}
} else {
$message = 'Failed to upload file';
}
} else {
$message = 'No file uploaded or upload error';
}
}

return view('welcome', [
'message' => $message,
'text' => $text,
'startPage' => $startPage,
'endPage' => $endPage,
'language' => $language,
'book' => $book,
'is_speechable' => true
]);
}

private function processImage($filePath, $language, &$text, &$message) {
try {
$ocr = new TesseractOCR($filePath);
$text = $ocr->lang($language)->run();

$message = 'Image processing completed successfully.';

unlink($filePath);
} catch (\Exception $e) {
error_log('OCR Processing Failed: ' . $e->getMessage());
$text = 'Error: ' . $e->getMessage();
$message = 'Image processing failed';
}
}

private function processPdf($filePath, $startPage, $endPage, $language, &$text, &$message) {
try {
$pdf = new Pdf($filePath);
$pages = $pdf->pageCount();
$text = '';

if (($endPage > 0) && ($endPage <= $pages)) {
$pages = $endPage;
}

if ($startPage > $pages) {
$message = "Start page exceeds total pages in the PDF.";
return;
}

for ($page = $startPage; $page <= $pages; $page++) {
$imagePath = 'books/page_' . $page . '.jpg';
$pdf->selectPage($page)->save($imagePath);

$ocr = new TesseractOCR($imagePath);
$text .= $ocr->lang($language)->run();

unlink($imagePath);
}

$message = 'PDF processing completed successfully.';
unlink($filePath);
} catch (\Exception $e) {
error_log('PDF Processing Failed: ' . $e->getMessage());
$text = 'Error: ' . $e->getMessage();
$message = 'PDF processing failed';
}
}
}
6 changes: 5 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,9 @@
"name": "Jakiur Rahman",
"email": "70786208+Jakiur1234@users.noreply.github.com"
}
]
],
"require": {
"thiagoalessio/tesseract_ocr": "^2.13",
"spatie/pdf-to-image": "^3.1"
}
}
124 changes: 122 additions & 2 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions resources/views/partials/alert.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<div class="text-center">
<div class="flex items-center p-4 mb-4 text-sm text-green-800 border border-green-300 rounded-lg bg-green-50 dark:bg-gray-800 dark:text-green-400 dark:border-green-800" role="alert">
<svg class="flex-shrink-0 inline w-4 h-4 me-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="currentColor" viewBox="0 0 20 20">
<path d="M10 .5a9.5 9.5 0 1 0 9.5 9.5A9.51 9.51 0 0 0 10 .5ZM9.5 4a1.5 1.5 0 1 1 0 3 1.5 1.5 0 0 1 0-3ZM12 15H8a1 1 0 0 1 0-2h1v-3H8a1 1 0 0 1 0-2h2a1 1 0 0 1 1 1v4h1a1 1 0 0 1 0 2Z"/>
</svg>
<span class="sr-only">Info</span>
<div class="text-lg">
<?= $message ?>
</div>
</div>
</div>
41 changes: 41 additions & 0 deletions resources/views/partials/form.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<div class="grid grid-cols-1 md:grid-cols-2 md:gap-10">
<div>
<form action="/"method="post" enctype="multipart/form-data">
<div class="mb-5">
<label for="email" class="block mb-2 text-sm font-medium text-gray-900 dark:text-white">Start Page ( Fill it if the doc is contain pdf image )</label>
<input name="startPage" type="number" id="email" class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500" placeholder="20(Default)" />
</div>
<div class="mb-5">
<label for="email" class="block mb-2 text-sm font-medium text-gray-900 dark:text-white">End Page ( Fill it if the doc is contain pdf image )</label>
<input name="endPage" type="number" id="email" class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500" placeholder="Full Pdf (Default)" />
</div>
<div class="mb-5">
<label for="countries" class="block text-sm font-medium text-gray-900 dark:text-white">Select an language</label>
<select name="language" id="countries" class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500">
<option selected>Choose a language</option>
<option value="ben">Bangla</option>
<option value="ara">Arabic</option>
<option value="eng">English</option>
</select>
</div>
<div class="flex items-center justify-center w-full">
<label for="dropzone-file" class="flex flex-col items-center justify-center w-full h-64 border-2 border-gray-300 border-dashed rounded-lg cursor-pointer bg-gray-50 dark:hover:bg-gray-800 dark:bg-gray-700 hover:bg-gray-100 dark:border-gray-600 dark:hover:border-gray-500 dark:hover:bg-gray-600">
<div class="flex flex-col items-center justify-center pt-5 pb-6">
<svg class="w-8 h-8 mb-4 text-gray-500 dark:text-gray-400" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 20 16">
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 13h3a3 3 0 0 0 0-6h-.025A5.56 5.56 0 0 0 16 6.5 5.5 5.5 0 0 0 5.207 5.021C5.137 5.017 5.071 5 5 5a4 4 0 0 0 0 8h2.167M10 15V6m0 0L8 8m2-2 2 2"/>
</svg>
<p class="mb-2 text-sm text-gray-500 dark:text-gray-400"><span class="font-semibold">Click to upload</span> or drag and drop</p>
<p class="text-xs text-gray-500 dark:text-gray-400">PNG, JPG or PDF)</p>
</div>
<input id="dropzone-file" type="file" name="book" />
</label>
</div>
<div class="text-center my-3">
<input type="submit" value="Upload File" class="hover:cursor-pointer text-white bg-gradient-to-r from-purple-500 to-pink-500 hover:bg-gradient-to-l focus:ring-4 focus:outline-none focus:ring-purple-200 dark:focus:ring-purple-800 font-medium rounded-lg text-sm px-5 py-2.5 text-center me-2 mb-2"/>
</div>
</form>
</div>
<div>
<textarea id="inputText" rows="25" class="block p-2.5 w-full text-sm text-gray-900 bg-gray-50 rounded-lg border border-gray-300 focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500" placeholder="Write your thoughts here..."><?php echo $text; ?></textarea>
</div>
</div>
9 changes: 9 additions & 0 deletions resources/views/partials/speech.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?php
$lang = "en";
$converted = "";
// dd(urlencode($text));
$mp3 = 'http://translate.google.com/translate_tts?ie=UTF-8&q='. urlencode($text) .'&tl='. $lang .'&total=1&idx=0&textlen=5&prev=input';
?>

<a href="<?= $mp3 ?>" target="_blank">Download Speech</a>

11 changes: 10 additions & 1 deletion resources/views/welcome.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,18 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Welcome</title>
<link rel="stylesheet" href="css/style.css">
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body>
<p class="name">Welcome</p>
<div class="p-5 my-12">
<?php if (isset($message)){
view('partials.alert', ['message' => $message]);
} ?>
<?php view('partials.form', [ 'text' => isset($text) ? $text : '']); ?>
<?php if( $is_speechable ) {
view('partials.speech', [ 'text' => $text ]);
}?>
</div>
<script src="js/main.js"></script>
</body>
</html>
8 changes: 8 additions & 0 deletions routes/router.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ public static function get( $route, $controller )
}
}

public static function post( $route, $controller )
{
if( $_SERVER['REQUEST_METHOD'] === 'POST' && $_SERVER['REQUEST_URI'] === $route )
{
(new static)->callController( $controller );
}
}

private function callController($data)
{
$controller = '';
Expand Down
1 change: 1 addition & 0 deletions routes/web.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
use App\Controllers\HomeController;

Route::get('/', [ HomeController::class, 'index' ] );
Route::post('/', [ HomeController::class, 'process' ] );

0 comments on commit c2cfcd8

Please sign in to comment.