Djnago rest framework html/url в docx
Я создаю Django API, который конвертирует любой URL или HTML файл в pdf и Docx. Реализованный ниже код уже конвертирует в формат pdf с помощью пакета pdfkit. Я использую python-docx для генерации в Docx, но я не знаю, как с этим работать. Я хотел бы получить любую поддержку, пожалуйста. У меня нет глубоких знаний, и любая помощь будет оценена по достоинству.
Вот мой convert.py файл:
import io
from pydoc import doc
from tempfile import NamedTemporaryFile
from typing import IO
from urllib.parse import urlparse
import pdfkit
from docx import Document
class ConvertingError(Exception):
"""
This exception represents an error during converting.
In example, when Host of a url is unreachable.
In other words, this is a wrapper for wkhtmltopdf errors.
"""
pass
def url_to_pdf(url: str) -> IO:
"""Fetch HTML from url and convert the page to pdf,"""
with NamedTemporaryFile('w+b') as tmpf:
try:
pdfkit.from_url(url, tmpf.name)
except OSError as e:
raise ConvertingError from e
pdf = io.BytesIO(tmpf.read())
return pdf
def html_to_pdf(html: str) -> IO:
"""Convert HTML string to pdf."""
with NamedTemporaryFile('w+b') as tmpf:
try:
pdfkit.from_string(html, tmpf.name)
except OSError as e:
raise ConvertingError from e
pdf = io.BytesIO(tmpf.read())
return pdf
def filename_from_url(url: str) -> str:
"""
Generate pdf filename using a hostname of a URL.
If no hostname is provided, return 'default.pdf' as filename.
"""
parsed = urlparse(URL)
return (parsed.hostname or 'default') + '.pdf'
def url_to_docx(url: str) -> IO:
pass
def html_to_docx(html: str) -> IO:
pass
И мой views.py файл
from fileinput import filename
from typing import IO
from django.http import FileResponse
from rest_framework.exceptions import ValidationError
from rest_framework.parsers import MultiPartParser
from rest_framework.viewsets import ViewSet
from .converter import filename_from_url, html_to_pdf, url_to_pdf, ConvertingError,
url_to_docx, html_to_docx
from .serializers import HtmlFileInputSerializer, UrlInputSerializer
def generate_from_html(self, request):
serializer = HtmlFileInputSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
file: IO = serializer.validated_data['file']
content = str(file.read())
try:
pdf = html_to_pdf(content)
except ConvertingError:
raise ValidationError('The file is of inappropriate type or corrupted.')
response = FileResponse(pdf)
response["Content-Type"] = 'application/pdf'
return response
def generate_docx_from_html(self, request):
pass
# class UrlConverterViewSet(ViewSet):
def generate_from_url(self, request):
serializer = UrlInputSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
url: str = serializer.validated_data['url']
try:
pdf = url_to_pdf(URL)
except ConvertingError:
raise ValidationError('The url is invalid or unreachable.')
filename = serializer.validated_data.get('filename') or filename_from_url(URL)
response = FileResponse(pdf, filename=filename)
response["Content-Type"] = 'application/pdf'
return response
def generate_docx_from_url(self, request):
pass
class GeneratePdf(ViewSet):
# generate pdf view from html file and URL
parser_classes = (MultiPartParser,)
def create(self, request):
if request.data.get('file'):
return generate_from_html(self, request)
elif request.data.get('url'):
return generate_from_url(self, request)
else:
raise ValidationError('The file or url is invalid or unreachable.')