Hello everyone! In this article, I will share my implementation of a telegram bot that can translate articles from the Internet into mp3 files. For this I will be using python 3.6 and related libraries. So let's get started.
pip install pyttsx3 pip install langdetect pip install pydub pip install bs4 pip install telebot pip install PyTelegramBotAPI
:
bot.py
parser.py
voice.py
. bot.py , . parser.py , , voice.py . , , (url- , <article>, ), bs4 pyttsx3 , mp3 . . ...
bot.py
message_handler, /start
import telebot
from voice import get_mp3_file, get_file_name
from parser import get_article_text, get_article_language, get_link
bot = telebot.TeleBot('TOKEN')
@bot.message_handler(commands=['start'])
def forward_message(message):
bot.send_message(message.from_user.id, ", ,"
" , "
" mp3 .")
is_running = False
@bot.message_handler(content_types=['text'])
def forward_message(message):
global is_running #
if not is_running:
link = get_link(message.text)
if link: # , , -
is_running = True
article_text = get_article_text(link)
article_language = get_article_language(article_text)
if article_language: # ,
bot.send_message(message.from_user.id, ", .")
bot.send_message(message.from_user.id, f" - {article_language[0]}.")
bot.send_message(message.from_user.id, " ...")
file_name = get_file_name(link)
get_mp3_file(file_name, article_text, article_language[1])
bot.send_audio(message.from_user.id, audio=open(file_name, 'rb'))
else:
bot.send_message(message.from_user.id, " ,"
" ...")
is_running = False
else:
bot.send_message(message.from_user.id, ", , "
" .")
else:
bot.send_message(message.from_user.id, " , "
" ...")
is_running , , . , , , - . link , url-, article_language, .
if link: # , , -
is_running = True
article_text = get_article_text(link)
article_language = get_article_language(article_text)
parser.py
import requests
from bs4 import BeautifulSoup
from langdetect import detect
import re
def get_link(message_text):
#
link_arr = re.findall(r'^https?:\/\/?[\w-]{1,32}'
r'\.[\w-]{1,32}[^\s@]*$', message_text)
if len(link_arr) > 0:
link = link_arr[0]
return link
return False
, , get_link. url , , False, : ", , ."
def get_article_text(link):
try:
# ,
response = requests.get(link)
except requests.exceptions.ConnectionError:
return False
#
parser = BeautifulSoup(response.content, 'html.parser')
try:
# <article>
article_text = parser.select_one('article').get_text(separator='. ')
except AttributeError:
return False
return article_text
, , url. requests.exceptions.ConnectionError False, get_article_language.
BeautifulSoup4 <article>, , . , , , . get_article_text , False.
def get_article_language(article_text):
try:
language = detect(article_text) #
except TypeError:
return False
if language == 'en':
return ['EN', ['en_GB']]
if language == 'ru':
return ['RU', ['ru_RU']]
return False
, langdetect. 2 : , . , 0- , 1- pyttsx3 , False, : " , β¦".
voice.py
import pyttsx3
from pydub import AudioSegment
import re
def engine_settings(engine, article_language):
voices = engine.getProperty('voices')
engine.setProperty('rate', 185) #
for voice in voices:
if voice.languages == article_language and \
voice.gender == 'VoiceGenderMale':
return engine.setProperty('voice', voice.id) #
def get_mp3_file(file_name, article_text, article_language):
engine = pyttsx3.init()
engine_settings(engine, article_language) #
engine.save_to_file(article_text, file_name) #
engine.runAndWait()
convert_file_to_mp3(file_name) # mp3
def convert_file_to_mp3(file_name):
converter = AudioSegment
converter_file = converter.from_file(file_name)
converter_file.export(file_name, format="mp3")
def get_file_name(link):
# -
file_name = re.split(r'^https?:\/\/?', link)[1]
for symbols_in_file_name in ['/', '.', '-']:
# '_', OS
file_name = file_name.replace(symbols_in_file_name, '_')
file_name = file_name+'.mp3' # mp3
return file_name
pyttsx3 , mp3 pydub. engine_settings, , - - .
get_mp3_file engine_settings, ( get_file_name mp3, pyttsx3 , , AudioSegment). engine.runAndWait mp3, .
. .
python bot.py
, , <article> pyttsx3 , . .