-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconversion_2.py
28 lines (21 loc) · 975 Bytes
/
conversion_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from docling.document_converter import DocumentConverter
from pathlib import Path
input_path = "C:\\Users\\Nitheesh kumar\\PycharmProjects\\file_conversion_2\\cities.pdf.md"
converter = DocumentConverter()
result = converter.convert(input_path)
output_dir = Path("md_conversion")
output_dir.mkdir(parents = True, exist_ok = True)
doc_filename = Path(input_path).stem
#converting to file formats by .html
html_filename = output_dir / f"{doc_filename}.html"
result.document.save_as_html(html_filename)
#converting to file formats by .yaml
yaml_filename = output_dir / f"{doc_filename}.yaml"
result.document.save_as_yaml(yaml_filename)
#converting to file formats by .xml
xml_filename = output_dir / f"{doc_filename}.xml"
result.document.save_as_document_tokens(xml_filename)
#converting to file formats by .html, .yaml, .xml, .json
json_filename = output_dir / f"{doc_filename}.json"
result.document.save_as_json(json_filename)
print(result)