We write a telegram bot that will translate Internet articles into mp3 files

Hello everyone! In this article, I will share my implementation of a telegram bot that can translate articles from the Internet into mp3 files. For this I will be using python 3.6 and related libraries. So let's get started.





telegram. , . pip, . , , :





pip install pyttsx3
pip install langdetect
pip install pydub
pip install bs4
pip install telebot
pip install PyTelegramBotAPI
      
      



:





bot.py





parser.py





voice.py





. bot.py , . parser.py , , voice.py . , , (url- , <article>, ), bs4 pyttsx3 , mp3 . . ...





bot.py

message_handler, /start





import telebot
from voice import get_mp3_file, get_file_name
from parser import get_article_text, get_article_language, get_link


bot = telebot.TeleBot('TOKEN')

@bot.message_handler(commands=['start'])
def forward_message(message):
    bot.send_message(message.from_user.id, ",     ,"
                                           "    , "
                                           "    mp3 .")
      
      







is_running = False

@bot.message_handler(content_types=['text'])
def forward_message(message):
    global is_running  #         
    if not is_running:
        link = get_link(message.text)
        if link:  # ,    ,   - 
            is_running = True
            article_text = get_article_text(link)
            article_language = get_article_language(article_text)
            if article_language:  # ,                 
                bot.send_message(message.from_user.id, ",   .")
                bot.send_message(message.from_user.id, f"  - {article_language[0]}.")
                bot.send_message(message.from_user.id, " ...")
                file_name = get_file_name(link)
                get_mp3_file(file_name, article_text, article_language[1])
                bot.send_audio(message.from_user.id, audio=open(file_name, 'rb'))
            else:
                bot.send_message(message.from_user.id, "   ,"
                                                       "  ...")
            is_running = False
        else:
            bot.send_message(message.from_user.id, ",   ,  " 
                                                   "   .")
    else:
        bot.send_message(message.from_user.id, "   , " 
                                               " ...")
      
      



is_running , , . , , , - . link , url-, article_language, .





if link:  # ,    ,   - 
            is_running = True
            article_text = get_article_text(link)
            article_language = get_article_language(article_text)       
      
      



parser.py

import requests
from bs4 import BeautifulSoup
from langdetect import detect
import re


def get_link(message_text):
  	#      
    link_arr = re.findall(r'^https?:\/\/?[\w-]{1,32}'
                          r'\.[\w-]{1,32}[^\s@]*$', message_text)
    if len(link_arr) > 0:
        link = link_arr[0]
        return link
    return False
      
      



, , get_link. url , , False, : ", , ."





def get_article_text(link):
    try:
        #    ,    
        response = requests.get(link)
    except requests.exceptions.ConnectionError:
        return False
    #    
    parser = BeautifulSoup(response.content, 'html.parser')
    try:
      	#     <article>
        article_text = parser.select_one('article').get_text(separator='. ')
    except AttributeError:
        return False
    return article_text
      
      



, , url. requests.exceptions.ConnectionError False, get_article_language.





BeautifulSoup4 <article>, , . , , , . get_article_text , False.





def get_article_language(article_text):
    try:
        language = detect(article_text)  #   
    except TypeError:
        return False
    if language == 'en':
        return ['EN', ['en_GB']]
    if language == 'ru':
        return ['RU', ['ru_RU']]
    return False
      
      



, langdetect. 2 : , . , 0- , 1- pyttsx3 , False, : " , …".





voice.py

import pyttsx3
from pydub import AudioSegment
import re


def engine_settings(engine, article_language):
    voices = engine.getProperty('voices')
    engine.setProperty('rate', 185)  #    
    for voice in voices:
        if voice.languages == article_language and \
                voice.gender == 'VoiceGenderMale':
            return engine.setProperty('voice', voice.id)  #   


def get_mp3_file(file_name, article_text, article_language):
    engine = pyttsx3.init()
    engine_settings(engine, article_language)  #   
    engine.save_to_file(article_text, file_name)  #     
    engine.runAndWait()
    convert_file_to_mp3(file_name)  #   mp3 


def convert_file_to_mp3(file_name):
    converter = AudioSegment
    converter_file = converter.from_file(file_name)
    converter_file.export(file_name, format="mp3")


def get_file_name(link):
    #   -   
    file_name = re.split(r'^https?:\/\/?', link)[1]
    for symbols_in_file_name in ['/', '.', '-']:
      #       '_',     OS
        file_name = file_name.replace(symbols_in_file_name, '_')
    file_name = file_name+'.mp3'  #     mp3 
    return file_name
      
      



pyttsx3 , mp3 pydub. engine_settings, , - - .





get_mp3_file engine_settings, ( get_file_name mp3, pyttsx3 , , AudioSegment). engine.runAndWait mp3, .





. .





python bot.py
      
      



, , <article> pyttsx3 , . .








All Articles