Getting Text from epub Files in Python

import ebooklib
from ebooklib import epub
book = epub.read_epub(file_name)
items = list(book.get_items_of_type(ebooklib.ITEM_DOCUMENT))
ebooklib.ITEM_UNKNOWN
ebooklib.ITEM_IMAGE
ebooklib.ITEM_STYLE
ebooklib.ITEM_SCRIPT
ebooklib.ITEM_NAVIGATION
ebooklib.ITEM_VECTOR
ebooklib.ITEM_FONT
ebooklib.ITEM_VIDEO
ebooklib.ITEM_AUDIO
ebooklib.ITEM_DOCUMENT
ebooklib.ITEM_COVER
from bs4 import BeautifulSoupdef chapter_to_str(chapter):
soup = BeautifulSoup(chapter.get_body_content(), ‘html.parser’)
text = [para.get_text() for para in soup.find_all(‘p’)]
return ‘ ‘.join(text)
texts = {}
for c in chapters:
texts[c.get_name()] = chapter_to_str(c)]

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store