Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
E
ESP32-audioI2S
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Analytics
Analytics
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
xpstem
ESP32-audioI2S
Commits
53254f36
Commit
53254f36
authored
Feb 10, 2024
by
v
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Text to speech API provides a speech endpoint based on our TTS (text-to-speech) model.
parent
f2f1f5bc
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
109 additions
and
14 deletions
+109
-14
src/Audio.cpp
src/Audio.cpp
+108
-14
src/Audio.h
src/Audio.h
+1
-0
No files found.
src/Audio.cpp
View file @
53254f36
...
...
@@ -369,6 +369,103 @@ void Audio::setConnectionTimeout(uint16_t timeout_ms, uint16_t timeout_ms_ssl) {
if
(
timeout_ms_ssl
)
m_timeout_ms_ssl
=
timeout_ms_ssl
;
}
/*
Text to speech API provides a speech endpoint based on our TTS (text-to-speech) model.
More info: https://platform.openai.com/docs/guides/text-to-speech/text-to-speech
Request body:
model (string) [Required] - One of the available TTS models: tts-1 or tts-1-hd
input (string) [Required] - The text to generate audio for. The maximum length is 4096 characters.
voice (string) [Required] - The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
response_format (string) [Optional] - Defaults to mp3. The format to audio in. Supported formats are mp3, opus, aac, and flac.
speed (number) [Optional] - Defaults to 1. The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.
Usage: audio.openai_speech(OPENAI_API_KEY, "tts-1", input, "shimmer", "mp3", "1");
*/
bool
Audio
::
openai_speech
(
const
String
&
api_key
,
const
String
&
model
,
const
String
&
input
,
const
String
&
voice
,
const
String
&
response_format
,
const
String
&
speed
)
{
char
host
[]
=
"api.openai.com"
;
char
path
[]
=
"/v1/audio/speech"
;
xSemaphoreTakeRecursive
(
mutex_audio
,
portMAX_DELAY
);
if
(
input
==
""
)
{
AUDIO_INFO
(
"input text is empty"
);
stopSong
();
xSemaphoreGiveRecursive
(
mutex_audio
);
return
false
;
}
AUDIO_INFO
(
"Connect to new host:
\"
%s
\"
"
,
host
);
setDefaults
();
m_f_ssl
=
true
;
String
input_clean
=
""
;
for
(
int
i
=
0
;
i
<
input
.
length
();
i
++
)
{
char
c
=
input
.
charAt
(
i
);
if
(
c
==
'\"'
)
{
input_clean
+=
"
\\\"
"
;
}
else
if
(
c
==
'\n'
)
{
input_clean
+=
"
\\
n"
;
}
else
{
input_clean
+=
c
;
}
}
String
post_body
=
"{"
"
\"
model
\"
:
\"
"
+
model
+
"
\"
,"
+
"
\"
input
\"
:
\"
"
+
input_clean
+
"
\"
,"
+
"
\"
voice
\"
:
\"
"
+
voice
+
"
\"
,"
+
"
\"
response_format
\"
:
\"
"
+
response_format
+
"
\"
,"
+
"
\"
speed
\"
:
\"
"
+
speed
+
"
\"
"
+
"}"
;
String
http_request
=
"POST "
+
String
(
path
)
+
" HTTP/1.0
\r\n
"
// UNKNOWN ERROR CODE (0050) - crashing on HTTP/1.1 need to use HTTP/1.0
+
"Host: "
+
String
(
host
)
+
"
\r\n
"
+
"Authorization: Bearer "
+
api_key
+
"
\r\n
"
+
"Accept-Encoding: identity;q=1,*;q=0
\r\n
"
+
"User-Agent: nArija/1.0
\r\n
"
+
"Content-Type: application/json; charset=utf-8
\r\n
"
+
"Content-Length: "
+
post_body
.
length
()
+
"
\r\n
"
+
"Connection: keep-alive
\r\n
"
+
"
\r\n
"
+
post_body
+
"
\r\n
"
;
bool
res
=
true
;
int
port
=
443
;
_client
=
static_cast
<
WiFiClient
*>
(
&
clientsecure
);
uint32_t
t
=
millis
();
if
(
m_f_Log
)
AUDIO_INFO
(
"connect to %s on port %d path %s"
,
host
,
port
,
path
);
res
=
_client
->
connect
(
host
,
port
,
m_timeout_ms_ssl
);
if
(
res
)
{
uint32_t
dt
=
millis
()
-
t
;
strcpy
(
m_lastHost
,
host
);
AUDIO_INFO
(
"%s has been established in %lu ms, free Heap: %lu bytes"
,
"SSL"
,
(
long
unsigned
int
)
dt
,
(
long
unsigned
int
)
ESP
.
getFreeHeap
());
m_f_running
=
true
;
}
m_expectedCodec
=
CODEC_NONE
;
m_expectedPlsFmt
=
FORMAT_NONE
;
if
(
res
)
{
log_i
(
"connecttohost(): %s"
,
http_request
);
_client
->
print
(
http_request
);
if
(
response_format
==
"mp3"
)
m_expectedCodec
=
CODEC_MP3
;
if
(
response_format
==
"opus"
)
m_expectedCodec
=
CODEC_OPUS
;
if
(
response_format
==
"aac"
)
m_expectedCodec
=
CODEC_AAC
;
if
(
response_format
==
"flac"
)
m_expectedCodec
=
CODEC_FLAC
;
setDatamode
(
HTTP_RESPONSE_HEADER
);
m_streamType
=
ST_WEBSTREAM
;
}
else
{
AUDIO_INFO
(
"Request %s failed!"
,
host
);
m_lastHost
[
0
]
=
0
;
}
xSemaphoreGiveRecursive
(
mutex_audio
);
return
res
;
}
//------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
bool
Audio
::
connecttohost
(
const
char
*
host
,
const
char
*
user
,
const
char
*
pwd
)
{
// user and pwd for authentification only, can be empty
...
...
@@ -506,6 +603,7 @@ bool Audio::connecttohost(const char* host, const char* user, const char* pwd) {
m_expectedPlsFmt
=
FORMAT_NONE
;
if
(
res
)
{
log_i
(
"connecttohost(): %s"
,
rqh
);
_client
->
print
(
rqh
);
if
(
endsWith
(
extension
,
".mp3"
))
m_expectedCodec
=
CODEC_MP3
;
if
(
endsWith
(
extension
,
".aac"
))
m_expectedCodec
=
CODEC_AAC
;
...
...
@@ -3525,7 +3623,7 @@ bool Audio::parseHttpResponseHeader() { // this is the response to a GET / reque
continue
;
}
//
log_i("httpResponseHeader: %s", rhl);
//log_i("httpResponseHeader: %s", rhl);
int16_t
posColon
=
indexOf
(
rhl
,
":"
,
0
);
// lowercase all letters up to the colon
if
(
posColon
>=
0
)
{
...
...
@@ -3599,16 +3697,6 @@ bool Audio::parseHttpResponseHeader() { // this is the response to a GET / reque
AUDIO_INFO
(
"Filename is %s"
,
rhl
+
pos1
);
}
// if(startsWith(rhl, "set-cookie:") ||
// startsWith(rhl, "pragma:") ||
// startsWith(rhl, "expires:") ||
// startsWith(rhl, "cache-control:") ||
// startsWith(rhl, "icy-pub:") ||
// startsWith(rhl, "p3p:") ||
// startsWith(rhl, "accept-ranges:") ){
// ; // do nothing
// }
else
if
(
startsWith
(
rhl
,
"connection:"
))
{
if
(
indexOf
(
rhl
,
"close"
,
0
)
>=
0
)
{
;
/* do nothing */
}
}
...
...
@@ -3670,7 +3758,7 @@ bool Audio::parseHttpResponseHeader() { // this is the response to a GET / reque
if
(
audio_icydescription
)
audio_icydescription
(
c_idesc
);
}
else
if
(
(
startsWith
(
rhl
,
"transfer-encoding:"
)
))
{
else
if
(
startsWith
(
rhl
,
"transfer-encoding:"
))
{
if
(
endsWith
(
rhl
,
"chunked"
)
||
endsWith
(
rhl
,
"Chunked"
))
{
// Station provides chunked transfer
m_f_chunked
=
true
;
if
(
m_f_Log
)
AUDIO_INFO
(
"chunked data transfer"
);
...
...
@@ -5551,8 +5639,14 @@ boolean Audio::streamDetection(uint32_t bytesAvail) {
tmr_lost
=
millis
()
+
1000
;
if
(
cnt_lost
==
5
)
{
// 5s no data?
cnt_lost
=
0
;
AUDIO_INFO
(
"Stream lost -> try new connection"
);
connecttohost
(
m_lastHost
);
if
(
String
(
m_lastHost
)
==
"api.openai.com"
)
{
AUDIO_INFO
(
"End of Stream."
);
m_f_running
=
false
;
setDatamode
(
AUDIO_NONE
);
}
else
{
AUDIO_INFO
(
"Stream lost -> try new connection"
);
connecttohost
(
m_lastHost
);
}
return
true
;
}
}
...
...
src/Audio.h
View file @
53254f36
...
...
@@ -127,6 +127,7 @@ public:
Audio
(
bool
internalDAC
=
false
,
uint8_t
channelEnabled
=
3
,
uint8_t
i2sPort
=
I2S_NUM_0
);
// #99
~
Audio
();
void
setBufsize
(
int
rambuf_sz
,
int
psrambuf_sz
);
bool
openai_speech
(
const
String
&
api_key
,
const
String
&
model
,
const
String
&
input
,
const
String
&
voice
,
const
String
&
response_format
,
const
String
&
speed
);
bool
connecttohost
(
const
char
*
host
,
const
char
*
user
=
""
,
const
char
*
pwd
=
""
);
bool
connecttospeech
(
const
char
*
speech
,
const
char
*
lang
);
bool
connecttoFS
(
fs
::
FS
&
fs
,
const
char
*
path
,
int32_t
resumeFilePos
=
-
1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment