switch to realtime API
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
parent
bc4ebeff8c
commit
90aebf681a
1 changed files with 76 additions and 110 deletions
186
src/App.jsx
186
src/App.jsx
|
@ -1,5 +1,5 @@
|
||||||
// ai-tutor-poc/src/App.jsx
|
// src/App.jsx
|
||||||
import React, { useState, useRef, useEffect } from 'react';
|
import React, { useState, useRef } from 'react';
|
||||||
import './App.css';
|
import './App.css';
|
||||||
|
|
||||||
const OPENAI_API_KEY = import.meta.env.VITE_OPENAI_API_KEY;
|
const OPENAI_API_KEY = import.meta.env.VITE_OPENAI_API_KEY;
|
||||||
|
@ -8,9 +8,10 @@ function App() {
|
||||||
const [transcript, setTranscript] = useState('');
|
const [transcript, setTranscript] = useState('');
|
||||||
const [aiReply, setAiReply] = useState('');
|
const [aiReply, setAiReply] = useState('');
|
||||||
const [isRecording, setIsRecording] = useState(false);
|
const [isRecording, setIsRecording] = useState(false);
|
||||||
|
|
||||||
|
const wsRef = useRef(null);
|
||||||
const mediaRecorderRef = useRef(null);
|
const mediaRecorderRef = useRef(null);
|
||||||
const audioChunksRef = useRef([]);
|
|
||||||
const audioStreamRef = useRef(null);
|
|
||||||
const systemContent = `คุณเป็นครูสอนภาษาอังกฤษที่ใจดี เป็นกันเอง และสอนภาษาอังกฤษผ่านการใช้เรื่องราวหรือบทสนทนาได้เป็นอย่างดีอและสนุกสนาน
|
const systemContent = `คุณเป็นครูสอนภาษาอังกฤษที่ใจดี เป็นกันเอง และสอนภาษาอังกฤษผ่านการใช้เรื่องราวหรือบทสนทนาได้เป็นอย่างดีอและสนุกสนาน
|
||||||
|
|
||||||
คุณได้รับมอบหมายให้พูดคุยกับนักเรียนระดับประถมที่เรียนภาษาอังกฤษเป็นภาษาต่างประเทศ (EFL) เพื่อช่วยให้นักเรียนเรียนรู้ภาษาอังกฤษอย่างเป็นธรรมชาติ
|
คุณได้รับมอบหมายให้พูดคุยกับนักเรียนระดับประถมที่เรียนภาษาอังกฤษเป็นภาษาต่างประเทศ (EFL) เพื่อช่วยให้นักเรียนเรียนรู้ภาษาอังกฤษอย่างเป็นธรรมชาติ
|
||||||
|
@ -81,129 +82,94 @@ Teacher: Good job! How about Monster – a big scary creature.
|
||||||
Student will try to pronounce
|
Student will try to pronounce
|
||||||
|
|
||||||
Teacher: Good job! How about Magic – something special and powerful.
|
Teacher: Good job! How about Magic – something special and powerful.
|
||||||
`;
|
`; // (keep your full system prompt here)
|
||||||
|
|
||||||
useEffect(() => {
|
const connectRealtime = async () => {
|
||||||
const initRecording = async () => {
|
if (wsRef.current) return;
|
||||||
audioStreamRef.current = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
||||||
mediaRecorderRef.current = new MediaRecorder(audioStreamRef.current);
|
|
||||||
|
|
||||||
mediaRecorderRef.current.ondataavailable = (e) => {
|
const ws = new WebSocket(`wss://api.openai.com/v1/realtime?authorization=Bearer ${OPENAI_API_KEY}`);
|
||||||
audioChunksRef.current.push(e.data);
|
wsRef.current = ws;
|
||||||
};
|
|
||||||
|
|
||||||
mediaRecorderRef.current.onstop = async () => {
|
ws.onopen = () => {
|
||||||
const inputAudioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' });
|
ws.send(JSON.stringify({
|
||||||
audioChunksRef.current = []; // Clear for next recording
|
type: 'start',
|
||||||
const formData = new FormData();
|
model: 'gpt-4o',
|
||||||
formData.append('file', inputAudioBlob, 'input.webm');
|
config: {
|
||||||
formData.append('model', 'whisper-1');
|
audio: {
|
||||||
|
input: { encoding: 'webm-opus' },
|
||||||
const whisperRes = await fetch('https://api.openai.com/v1/audio/transcriptions', {
|
output: { voice: 'nova' }
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
Authorization: `Bearer ${OPENAI_API_KEY}`
|
|
||||||
},
|
|
||||||
body: formData
|
|
||||||
});
|
|
||||||
const { text } = await whisperRes.json();
|
|
||||||
setTranscript(text);
|
|
||||||
|
|
||||||
setAiReply(''); // Clear previous reply
|
|
||||||
let fullText = '';
|
|
||||||
|
|
||||||
const response = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'Authorization': `Bearer ${OPENAI_API_KEY}`,
|
|
||||||
'Content-Type': 'application/json'
|
|
||||||
},
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: 'gpt-4o',
|
|
||||||
stream: true,
|
|
||||||
messages: [
|
|
||||||
{ role: 'system', content: systemContent },
|
|
||||||
{ role: 'user', content: text }
|
|
||||||
]
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
const reader = response.body.getReader();
|
|
||||||
const decoder = new TextDecoder("utf-8");
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
const { done, value } = await reader.read();
|
|
||||||
if (done) break;
|
|
||||||
|
|
||||||
const chunk = decoder.decode(value);
|
|
||||||
const lines = chunk.split('\n').filter(line => line.trim() !== '');
|
|
||||||
|
|
||||||
for (const line of lines) {
|
|
||||||
if (line.startsWith('data: ')) {
|
|
||||||
const data = line.replace('data: ', '');
|
|
||||||
if (data === '[DONE]') {
|
|
||||||
// After full reply received, synthesize speech
|
|
||||||
const speechRes = await fetch('https://api.openai.com/v1/audio/speech', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'Authorization': `Bearer ${OPENAI_API_KEY}`,
|
|
||||||
'Content-Type': 'application/json'
|
|
||||||
},
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: 'tts-1-hd',
|
|
||||||
voice: 'nova',
|
|
||||||
input: fullText
|
|
||||||
})
|
|
||||||
});
|
|
||||||
const outputAudioBlob = await speechRes.blob();
|
|
||||||
const audioUrl = URL.createObjectURL(outputAudioBlob);
|
|
||||||
const audio = new Audio(audioUrl);
|
|
||||||
audio.play();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const parsed = JSON.parse(data);
|
|
||||||
const delta = parsed.choices?.[0]?.delta?.content;
|
|
||||||
if (delta) {
|
|
||||||
fullText += delta;
|
|
||||||
setAiReply(prev => prev + delta);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
};
|
messages: [{ role: 'system', content: systemContent }]
|
||||||
|
}));
|
||||||
};
|
};
|
||||||
|
|
||||||
initRecording();
|
ws.onmessage = (event) => {
|
||||||
}, []);
|
const msg = JSON.parse(event.data);
|
||||||
|
if (msg.type === 'transcript') {
|
||||||
|
setTranscript(msg.text || '');
|
||||||
|
} else if (msg.type === 'content') {
|
||||||
|
setAiReply(prev => prev + (msg.delta || ''));
|
||||||
|
} else if (msg.type === 'audio') {
|
||||||
|
const audioBlob = new Blob([msg.audio], { type: 'audio/mpeg' });
|
||||||
|
const audioUrl = URL.createObjectURL(audioBlob);
|
||||||
|
const audio = new Audio(audioUrl);
|
||||||
|
audio.play().catch(() => {
|
||||||
|
console.warn("Mobile autoplay may be blocked until user gesture");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const startRecording = () => {
|
ws.onerror = (err) => console.error("WebSocket error", err);
|
||||||
if (!isRecording && mediaRecorderRef.current) {
|
ws.onclose = () => {
|
||||||
audioChunksRef.current = [];
|
console.log("WebSocket closed");
|
||||||
mediaRecorderRef.current.start();
|
wsRef.current = null;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const startRecording = async () => {
|
||||||
|
await connectRealtime();
|
||||||
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||||
|
|
||||||
|
const recorder = new MediaRecorder(stream, {
|
||||||
|
mimeType: 'audio/webm;codecs=opus',
|
||||||
|
audioBitsPerSecond: 64000
|
||||||
|
});
|
||||||
|
|
||||||
|
recorder.ondataavailable = (e) => {
|
||||||
|
if (e.data.size > 0 && wsRef.current?.readyState === WebSocket.OPEN) {
|
||||||
|
wsRef.current.send(e.data);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
recorder.start(250); // Send every 250ms
|
||||||
|
mediaRecorderRef.current = recorder;
|
||||||
|
setTranscript('');
|
||||||
|
setAiReply('');
|
||||||
setIsRecording(true);
|
setIsRecording(true);
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const stopRecording = () => {
|
const stopRecording = () => {
|
||||||
if (isRecording && mediaRecorderRef.current) {
|
if (mediaRecorderRef.current) {
|
||||||
mediaRecorderRef.current.stop();
|
mediaRecorderRef.current.stop();
|
||||||
setIsRecording(false);
|
mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop());
|
||||||
|
mediaRecorderRef.current = null;
|
||||||
|
}
|
||||||
|
setIsRecording(false);
|
||||||
|
|
||||||
|
if (wsRef.current?.readyState === WebSocket.OPEN) {
|
||||||
|
wsRef.current.send(JSON.stringify({ type: 'stop' }));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
||||||
<div className="app-container">
|
<div className="app-container">
|
||||||
|
|
||||||
<div className="page-title">
|
<div className="page-title">
|
||||||
<strong>AI English Tutor - Yesterday's movie</strong>
|
<strong>AI English Tutor - Yesterday's movie</strong>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="scene-wrapper">
|
|
||||||
|
|
||||||
|
<div className="scene-wrapper">
|
||||||
<img src="/tutor_f.png" alt="Tutor Avatar" className="avatar" />
|
<img src="/tutor_f.png" alt="Tutor Avatar" className="avatar" />
|
||||||
|
|
||||||
<div className="dialogue-box">
|
<div className="dialogue-box">
|
||||||
|
@ -217,11 +183,11 @@ Teacher: Good job! How about Magic – something special and powerful.
|
||||||
|
|
||||||
<div className="button-container">
|
<div className="button-container">
|
||||||
<button
|
<button
|
||||||
onMouseDown={startRecording}
|
|
||||||
onMouseUp={stopRecording}
|
|
||||||
onMouseLeave={stopRecording} // Ensures it stops if mouse leaves the button
|
|
||||||
onTouchStart={startRecording}
|
onTouchStart={startRecording}
|
||||||
onTouchEnd={stopRecording}
|
onTouchEnd={stopRecording}
|
||||||
|
onMouseDown={startRecording}
|
||||||
|
onMouseUp={stopRecording}
|
||||||
|
onMouseLeave={stopRecording}
|
||||||
className={`control-button ${isRecording ? 'recording' : 'idle'}`}
|
className={`control-button ${isRecording ? 'recording' : 'idle'}`}
|
||||||
>
|
>
|
||||||
{isRecording ? 'กำลังพูด...' : 'กดค้างเพื่อพูด'}
|
{isRecording ? 'กำลังพูด...' : 'กดค้างเพื่อพูด'}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue