In this article, I want to share with you the results of the study of the main features Web Speech API
(further - WSA
).
Introduction
WSA
Is an experimental technology consisting of two interfaces: SpeechSynthesis
(interface for text-to-speech) and SpeechRecognition
(interface for speech recognition).
, MDN ( , , - ).
, Can I use:
, SpeechSynthesis
. , SpeechRecognition
. " ", SpeechRecognition
-, . , .
4 , 2 :
- ,
SpeechSynthesis
, () - ""
- , , , ( ),
, .
:
<div id="wrapper">
<h1>Speech Synthesis - Player</h1>
<label
>Text:
<textarea id="textarea">! ?</textarea>
</label>
<label
>Voice:
<select id="select"></select>
</label>
<label
>Volume:
<input id="volume" type="range" min="0" max="1" step="0.1" value="1" />
<span>1</span>
</label>
<label
>Rate:
<input id="rate" type="range" min="0" max="3" step="0.5" value="1" />
<span>1</span>
</label>
<label
>Pitch:
<input id="pitch" type="range" min="0" max="2" step="0.5" value="1" />
<span>1</span>
</label>
<div id="buttons">
<button class="speak">Speak</button>
<button class="cancel">Cancel</button>
<button class="pause">Pause</button>
<button class="resume">Resume</button>
</div>
</div>
(textarea
), ; (select
) , ; - (volume
), (rate
) (pitch
), (speak
), (cancel
), (pause
) (resume
) . , .
min
, max
, step
value
. , , .. . . id
, .
JavaScript
. SpeechSynthesisUtterance
("utterance" " "):
const U = new SpeechSynthesisUtterance()
( "", , , Chrome ):
let voices = speechSynthesis.getVoices()
getVoices()
voiceschanged
. "" :
speechSynthesis.onvoiceschanged = () => { voices = speechSynthesis.getVoices() populateVoices(voices) }
( ) :
0: SpeechSynthesisVoice default: true lang: "de-DE" localService: false name: "Google Deutsch" voiceURI: "Google Deutsch"
:
function populateVoices(voices) { // `option` // `option` , - voices.forEach((voice, index) => { select.options[index] = new Option(voice.name, index) }) // `Google ` // , `Microsoft` const defaultVoiceIndex = voices.findIndex( (voice) => voice.name === 'Google ' ) select.selectedIndex = defaultVoiceIndex // initializeHandlers() }
:
function initializeHandlers() { // , U.onstart = () => console.log('Started') U.onend = () => console.log('Finished') U.onerror = (err) => console.error(err) // U.onpause = () => console.log('Paused') U.onresume = () => console.log('Resumed') // wrapper.onchange = ({ target }) => { if (target.type !== 'range') return handleChange(target) } // buttons.addEventListener('click', ({ target: { className } }) => { // SpeechSynthesis `speak()`, `cancel()`, `pause()` `resume()` // `speak()` // , `speaking` // : `pending` `paused`, switch (className) { case 'speak': if (!speechSynthesis.speaking) { convertTextToSpeech() } break case 'cancel': return speechSynthesis.cancel() case 'pause': return speechSynthesis.pause() case 'resume': return speechSynthesis.resume() default: return } }) }
:
function handleChange(el) { el.nextElementSibling.textContent = el.value }
:
function convertTextToSpeech() { // const trimmed = textarea.value.trim() if (!trimmed) return // `SpeechSynthesisUtterance` U.text = trimmed // const voice = voices[select.value] // U.voice = voice // U.lang = voice.lang // U.volume = volume.value // U.rate = rate.value // U.pitch = pitch.value // ! speechSynthesis.speak(U) }
SpeechSynthesisUtterance
:
SpeechSynthesisUtterance lang: "ru-RU" onboundary: null onend: () => console.log('Finished') onerror: (err) => console.error(err) onmark: null onpause: () => console.log('Paused') onresume: () => console.log('Resumed') onstart: () => console.log('Started') pitch: 1 rate: 1 text: "! ?" voice: SpeechSynthesisVoice { voiceURI: "Google ", name: "Google ", lang: "ru-RU", localService: false, default: false } volume: 1
:
window.onkeydown = ({ key }) => { switch (key.toLowerCase()) { case 's': if (!speechSynthesis.speaking) { convertTextToSpeech() } break case 'c': return speechSynthesis.cancel() case 'p': return speechSynthesis.pause() case 'r': return speechSynthesis.resume() default: return } }
:
SpeechSynthesis
, , - , . .
SpeechSynthesis
. , , , , , - . , , .
:
<div id="wrapper"> <h1>Speech Synthesis - Page Reader</h1> <div> <button class="play" tabindex="1"></button> <p> JavaScript β . -, . ECMAScript ( ECMA-262). </p> </div> <div> <button class="play" tabindex="2"></button> <p> JavaScript . -. </p> </div> <div> <button class="play" tabindex="3"></button> <p> : , , , , . </p> </div> </div>
(div
) (play
) , , , ( JavaScript
). , tabindex
, tab
space
. , , tabindex
, tab
.
, :
// let voices = speechSynthesis.getVoices() let defaultVoice speechSynthesis.onvoiceschanged = () => { voices = speechSynthesis.getVoices() defaultVoice = voices.find((voice) => voice.name === 'Google ') wrapper.addEventListener('click', handleClick) window.addEventListener('keydown', handleKeydown) } const PLAY = 'play' const PAUSE = 'pause' const RESUME = 'resume' function handleClick({ target }) { switch (target.className) { case PLAY: // `play` , // speechSynthesis.cancel() const { textContent } = target.nextElementSibling // . textContent.split('.').forEach((text) => { const trimmed = text.trim() if (trimmed) { const U = getUtterance(target, text) speechSynthesis.speak(U) } }) break case PAUSE: // CSS- // ``- /, `` - /, `` - // `` , target.className = RESUME speechSynthesis.pause() break case RESUME: target.className = PAUSE speechSynthesis.resume() break default: break } } // `escape` // function handleKeydown({ code }) { switch (code) { case 'Escape': return speechSynthesis.cancel() default: break } } function getUtterance(target, text) { const U = new SpeechSynthesisUtterance(text) U.voice = defaultVoice U.lang = defaultVoice.lang U.volume = 1 U.rate = 1 U.pitch = 1 // / U.onstart = () => { console.log('Started') target.className = PAUSE } U.onend = () => { console.log('Finished') target.className = PLAY } U.onerror = (err) => console.error(err) return U }
β ( (.
)), , (, ) β textContent.split('.').forEach(...)
. , 220 ( Chrome). text-to-long
( ), , , , SpeechSynthesis
( ). , .
:
""
, SpeechSynthesis
, , , , , , WSA
β SpeechRecoginition
.
:
<div id="wrapper">
<h1>Speech Recognition - Dictaphone</h1>
<textarea id="final_text" cols="30" rows="10"></textarea>
<input type="text" id="interim_text" />
<div id="buttons">
<button class="start"></button>
<button class="stop"></button>
<button class="abort"></button>
<button class="copy"></button>
<button class="clear"></button>
</div>
</div>
() (final_text
) ( ) (interim_text
), (buttons
). textarea
input
, , , . , , , Chrome .
(start
, stop
abort
), Clipboard API
.
:
let final_transcript = '' let recognizing = false
, SpeechRecognition
:
// , `WSA`, , , , `SpeechRecognition` const speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition // `SpeechRecognition` const recognition = new speechRecognition() // `continuous` , recognition.continuous = true // recognition.interimResults = true // recognition.maxAlternatives = 3 // recognition.lang = 'ru-RU'
, :
recognition.onstart = () => { console.log(' ') } recognition.onerror = ({ error }) => { console.error(error) } recognition.onend = () => { console.log(' ') // , `true` if (!recognizing) return recognition.start() }
SpeechRecognition
, , "" . "" . , :
const DICTIONARY = { : '.', : ',', : '?', : '!', : ':', : '-', : '\n', : '\t' }
, , , SpeechGrammar
SpeechGrammarList
, JSpeech Grammar Format
, , .
/, . , . , , , , " ", " " .. , : '?'
, .
:
function editInterim(s) { return s .split(' ') .map((word) => { word = word.trim() return DICTIONARY[word.toLowerCase()] ? DICTIONARY[word.toLowerCase()] : word }) .join(' ') } function editFinal(s) { return s.replace(/\s{1,}([\.+,?!:-])/g, '$1') }
editInterim()
, , . , . word = word.trim()
, "" , .
editFinal()
β editInterim()
. , .
, , β result
. :
recognition.onresult = (e) => { // let interim_transcript = '' // , for (let i = e.resultIndex; i < e.results.length; i++) { // `isFinal` , if (e.results[i].isFinal) { // const result = editInterim(e.results[i][0].transcript) // final_transcript += result } else { // , interim_transcript += e.results[i][0].transcript } } // `input` interim_text.value = interim_transcript // final_transcript = editFinal(final_transcript) // `textarea` final_text.value = final_transcript }
:
SpeechRecognitionEvent bubbles: false cancelBubble: false cancelable: false composed: false currentTarget: SpeechRecognition {grammars: SpeechGrammarList, lang: "ru-RU", continuous: true, interimResults: true, maxAlternatives: 3, β¦} defaultPrevented: false emma: null eventPhase: 0 interpretation: null isTrusted: true path: [] resultIndex: 1 // results: SpeechRecognitionResultList {0: SpeechRecognitionResult, 1: SpeechRecognitionResult, length: 2} returnValue: true srcElement: SpeechRecognition {grammars: SpeechGrammarList, lang: "ru-RU", continuous: true, interimResults: true, maxAlternatives: 3, β¦} target: SpeechRecognition {grammars: SpeechGrammarList, lang: "ru-RU", continuous: true, interimResults: true, maxAlternatives: 3, β¦} timeStamp: 59862.61999979615 type: "result"
(SpeechRecognitionResultList) :
results: SpeechRecognitionResultList 0: SpeechRecognitionResult 0: SpeechRecognitionAlternative confidence: 0.7990190982818604 transcript: "" isFinal: true length: 1 length: 1
e.results[i][0].transcript
. , maxAlternatives = 3
, SpeechRecognitionAlternative
. ( 0
) .
, , :
buttons.onclick = ({ target }) => { switch (target.className) { case 'start': // final_transcript = '' // recognition.start() // `true` recognizing = true // `textarea` final_text.value = '' // `input` interim_text.value = '' break case 'stop': // recognition.stop() // `false` recognizing = false break case 'abort': // recognition.abort() recognizing = false break case 'copy': // `textarea` navigator.clipboard.writeText(final_text.value) // target.textContent = '' const timerId = setTimeout(() => { target.textContent = '' clearTimeout(timerId) }, 3000) break case 'clear': // final_transcript = '' // `textarea` final_text.value = '' break default: break } }
. "", , ( speechstart
), ( ), , " ". input
- " ", textarea
". ?". , .
:
, , . : () => removeLastWord()
( , typeof DICTIONARY[word] === 'function'
) :
function removeLastWord() { const oldStr = final_text.value const newStr = oldStr.substring(0, oldStr.lastIndexOf(' ')) final_text.value = newStr }
, β , , .
SpeechRecognition
, , .
, .
pages
:
// home.js export default /*html*/ ` <div id="wrapper"> <div>Section 1</div> <div>Section 2</div> <div>Section 3</div> <div>Section 4</div> <div>Section 5</div> <div>Section 6</div> <div>Section 7</div> <div>Section 8</div> <div>Section 9</div> </div> ` // product.js export default /*html*/ ` <h1>This is the Product Page</h1> ` // about.js export default /*html*/ ` <h1>This is the About Page</h1> `
:
import HomePage from './pages/home.js' import ProductPage from './pages/product.js' import AboutPage from './pages/about.js' const { body } = document body.innerHTML = HomePage
:
// const DOWN = 'down' const UP = 'up' const RIGHT = 'right' const LEFT = 'left' // const ACTIONS = { // home: () => (body.innerHTML = HomePage), product: () => (body.innerHTML = ProductPage), about: () => (body.innerHTML = AboutPage), // down: () => scroll(DOWN), up: () => scroll(UP), left: () => scroll(LEFT), right: () => scroll(RIGHT), // light: () => body.removeAttribute('class'), dark: () => (body.className = 'dark') }
SpeechRecognition
, , ( β : recognition.lang = 'en-US'
).
result
:
recognition.onresult = (e) => { // for (let i = e.resultIndex; i < e.results.length; i++) { if (e.results[i].isFinal) { const result = e.results[i][0].transcript.toLowerCase() // , ( , `product` , , ) console.log(result) // , result .split(' ') .forEach((word) => { word = word.trim().toLowerCase() // ACTION[word] - return ACTIONS[word] ? ACTIONS[word]() : '' }) } } }
:
function scroll(direction) { let newPosition switch (direction) { case DOWN: newPosition = scrollY + innerHeight break case UP: newPosition = scrollY - innerHeight break case RIGHT: newPosition = scrollX + innerWidth break case LEFT: newPosition = scrollX - innerWidth break default: break } if (direction === DOWN || direction === UP) { scrollTo({ top: newPosition, behavior: 'smooth' }) } else { scrollTo({ left: newPosition, behavior: 'smooth' }) } }
, :
window.addEventListener('keydown', (e) => { e.preventDefault() switch (e.code) { // case 'Space': recognition.start() recognizing = true break // `escape` case 'Escape': recognition.stop() recognizing = false default: break } })
. , , :
home
,product
,about
βdark
,light
βdown
,up
,left
,right
β ( ,scroll
, ,scroll down
)
:
, WSA
JavaScript
, , "" -. , . , WSA
β , , , , .
, WSA
. , , SpeechSynthesis
SpeechRecognition
. , β , , , , , . : β , , - ""
,
, , ,
. , . .
10% !