switch to realtime API
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
Late Night Defender 2025-05-27 16:02:03 +07:00
parent bc4ebeff8c
commit 90aebf681a

View file

@ -1,5 +1,5 @@
// ai-tutor-poc/src/App.jsx // src/App.jsx
import React, { useState, useRef, useEffect } from 'react'; import React, { useState, useRef } from 'react';
import './App.css'; import './App.css';
const OPENAI_API_KEY = import.meta.env.VITE_OPENAI_API_KEY; const OPENAI_API_KEY = import.meta.env.VITE_OPENAI_API_KEY;
@ -8,9 +8,10 @@ function App() {
const [transcript, setTranscript] = useState(''); const [transcript, setTranscript] = useState('');
const [aiReply, setAiReply] = useState(''); const [aiReply, setAiReply] = useState('');
const [isRecording, setIsRecording] = useState(false); const [isRecording, setIsRecording] = useState(false);
const wsRef = useRef(null);
const mediaRecorderRef = useRef(null); const mediaRecorderRef = useRef(null);
const audioChunksRef = useRef([]);
const audioStreamRef = useRef(null);
const systemContent = `คุณเป็นครูสอนภาษาอังกฤษที่ใจดี เป็นกันเอง และสอนภาษาอังกฤษผ่านการใช้เรื่องราวหรือบทสนทนาได้เป็นอย่างดีอและสนุกสนาน const systemContent = `คุณเป็นครูสอนภาษาอังกฤษที่ใจดี เป็นกันเอง และสอนภาษาอังกฤษผ่านการใช้เรื่องราวหรือบทสนทนาได้เป็นอย่างดีอและสนุกสนาน
ณไดบมอบหมายใหดคยกบนกเรยนระดบประถมทเรยนภาษาองกฤษเปนภาษาตางประเทศ (EFL) เพอชวยใหกเรยนเรยนรภาษาองกฤษอยางเปนธรรมชาต ณไดบมอบหมายใหดคยกบนกเรยนระดบประถมทเรยนภาษาองกฤษเปนภาษาตางประเทศ (EFL) เพอชวยใหกเรยนเรยนรภาษาองกฤษอยางเปนธรรมชาต
@ -81,129 +82,94 @@ Teacher: Good job! How about Monster a big scary creature.
Student will try to pronounce Student will try to pronounce
Teacher: Good job! How about Magic something special and powerful. Teacher: Good job! How about Magic something special and powerful.
`; `; // (keep your full system prompt here)
useEffect(() => { const connectRealtime = async () => {
const initRecording = async () => { if (wsRef.current) return;
audioStreamRef.current = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorderRef.current = new MediaRecorder(audioStreamRef.current);
mediaRecorderRef.current.ondataavailable = (e) => { const ws = new WebSocket(`wss://api.openai.com/v1/realtime?authorization=Bearer ${OPENAI_API_KEY}`);
audioChunksRef.current.push(e.data); wsRef.current = ws;
};
mediaRecorderRef.current.onstop = async () => { ws.onopen = () => {
const inputAudioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' }); ws.send(JSON.stringify({
audioChunksRef.current = []; // Clear for next recording type: 'start',
const formData = new FormData(); model: 'gpt-4o',
formData.append('file', inputAudioBlob, 'input.webm'); config: {
formData.append('model', 'whisper-1'); audio: {
input: { encoding: 'webm-opus' },
const whisperRes = await fetch('https://api.openai.com/v1/audio/transcriptions', { output: { voice: 'nova' }
method: 'POST',
headers: {
Authorization: `Bearer ${OPENAI_API_KEY}`
},
body: formData
});
const { text } = await whisperRes.json();
setTranscript(text);
setAiReply(''); // Clear previous reply
let fullText = '';
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-4o',
stream: true,
messages: [
{ role: 'system', content: systemContent },
{ role: 'user', content: text }
]
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder("utf-8");
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n').filter(line => line.trim() !== '');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.replace('data: ', '');
if (data === '[DONE]') {
// After full reply received, synthesize speech
const speechRes = await fetch('https://api.openai.com/v1/audio/speech', {
method: 'POST',
headers: {
'Authorization': `Bearer ${OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'tts-1-hd',
voice: 'nova',
input: fullText
})
});
const outputAudioBlob = await speechRes.blob();
const audioUrl = URL.createObjectURL(outputAudioBlob);
const audio = new Audio(audioUrl);
audio.play();
return;
}
const parsed = JSON.parse(data);
const delta = parsed.choices?.[0]?.delta?.content;
if (delta) {
fullText += delta;
setAiReply(prev => prev + delta);
}
}
} }
} },
}; messages: [{ role: 'system', content: systemContent }]
}));
}; };
initRecording(); ws.onmessage = (event) => {
}, []); const msg = JSON.parse(event.data);
if (msg.type === 'transcript') {
setTranscript(msg.text || '');
} else if (msg.type === 'content') {
setAiReply(prev => prev + (msg.delta || ''));
} else if (msg.type === 'audio') {
const audioBlob = new Blob([msg.audio], { type: 'audio/mpeg' });
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.play().catch(() => {
console.warn("Mobile autoplay may be blocked until user gesture");
});
}
};
const startRecording = () => { ws.onerror = (err) => console.error("WebSocket error", err);
if (!isRecording && mediaRecorderRef.current) { ws.onclose = () => {
audioChunksRef.current = []; console.log("WebSocket closed");
mediaRecorderRef.current.start(); wsRef.current = null;
};
};
const startRecording = async () => {
await connectRealtime();
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const recorder = new MediaRecorder(stream, {
mimeType: 'audio/webm;codecs=opus',
audioBitsPerSecond: 64000
});
recorder.ondataavailable = (e) => {
if (e.data.size > 0 && wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(e.data);
}
};
recorder.start(250); // Send every 250ms
mediaRecorderRef.current = recorder;
setTranscript('');
setAiReply('');
setIsRecording(true); setIsRecording(true);
}
}; };
const stopRecording = () => { const stopRecording = () => {
if (isRecording && mediaRecorderRef.current) { if (mediaRecorderRef.current) {
mediaRecorderRef.current.stop(); mediaRecorderRef.current.stop();
setIsRecording(false); mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop());
mediaRecorderRef.current = null;
}
setIsRecording(false);
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({ type: 'stop' }));
} }
}; };
return ( return (
<div className="app-container"> <div className="app-container">
<div className="page-title"> <div className="page-title">
<strong>AI English Tutor - Yesterday's movie</strong> <strong>AI English Tutor - Yesterday's movie</strong>
</div> </div>
<div className="scene-wrapper">
<div className="scene-wrapper">
<img src="/tutor_f.png" alt="Tutor Avatar" className="avatar" /> <img src="/tutor_f.png" alt="Tutor Avatar" className="avatar" />
<div className="dialogue-box"> <div className="dialogue-box">
@ -217,11 +183,11 @@ Teacher: Good job! How about Magic something special and powerful.
<div className="button-container"> <div className="button-container">
<button <button
onMouseDown={startRecording}
onMouseUp={stopRecording}
onMouseLeave={stopRecording} // Ensures it stops if mouse leaves the button
onTouchStart={startRecording} onTouchStart={startRecording}
onTouchEnd={stopRecording} onTouchEnd={stopRecording}
onMouseDown={startRecording}
onMouseUp={stopRecording}
onMouseLeave={stopRecording}
className={`control-button ${isRecording ? 'recording' : 'idle'}`} className={`control-button ${isRecording ? 'recording' : 'idle'}`}
> >
{isRecording ? 'กำลังพูด...' : 'กดค้างเพื่อพูด'} {isRecording ? 'กำลังพูด...' : 'กดค้างเพื่อพูด'}