-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAudiobook_code.py
47 lines (34 loc) · 1.32 KB
/
Audiobook_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from gtts import gTTS
import PyPDF2
def pdf_to_text(pdf_path):
# Create a PDF reader object
pdf_reader = PyPDF2.PdfReader(pdf_path)
# Get the total number of pages in the PDF file
num_pages = len(pdf_reader.pages)
# Initialize an empty string variable to store the extracted text
extracted_text = ""
# Loop through each page in the PDF file
for page_index in range(num_pages):
# Get the page object
page = pdf_reader.pages[page_index]
# Extract the text from the page
page_text = page.extract_text()
# Append the extracted text to the variable
extracted_text += page_text
return extracted_text
# Specify the languages you want to include
languages = ['en', 'de', 'fr', 'es']
def text_to_audio(text, languages=['en'], save_paths=None):
if not text:
print("No text to convert to audio.")
return
if save_paths is None:
save_paths = ['output_{}.mp3'.format(lang) for lang in languages]
for lang, save_path in zip(languages, save_paths):
tts = gTTS(text=text, lang=lang, slow=False)
tts.save(save_path)
print(f'Audiobook saved as {save_path}')
pass
pdf_path = "D:\Portfolio projects\Audiobook\content.pdf"
extracted_text = pdf_to_text(pdf_path)
text_to_audio(extracted_text,languages=languages)