ai-english-tutor-poc/src/App.jsx

235 lines
7.5 KiB
React
Raw Normal View History

2025-05-16 16:27:50 +07:00
// ai-tutor-poc/src/App.jsx
import React, { useState, useRef, useEffect } from 'react';
2025-05-12 21:54:32 +07:00
import './App.css';
const OPENAI_API_KEY = import.meta.env.VITE_OPENAI_API_KEY;
function App() {
const [transcript, setTranscript] = useState('');
const [aiReply, setAiReply] = useState('');
const [isRecording, setIsRecording] = useState(false);
const mediaRecorderRef = useRef(null);
const audioChunksRef = useRef([]);
2025-05-16 16:27:50 +07:00
const audioStreamRef = useRef(null);
const silenceTimerRef = useRef(null);
const canvasRef = useRef(null);
const analyserRef = useRef(null);
const dataArrayRef = useRef(null);
const audioContextRef = useRef(null);
useEffect(() => {
const initRecording = async () => {
audioStreamRef.current = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorderRef.current = new MediaRecorder(audioStreamRef.current);
mediaRecorderRef.current.ondataavailable = (e) => {
audioChunksRef.current.push(e.data);
};
mediaRecorderRef.current.onstop = async () => {
const inputAudioBlob = new Blob(audioChunksRef.current, { type: 'audio/webm' });
audioChunksRef.current = []; // Clear for next recording
const formData = new FormData();
formData.append('file', inputAudioBlob, 'input.webm');
formData.append('model', 'whisper-1');
const whisperRes = await fetch('https://api.openai.com/v1/audio/transcriptions', {
method: 'POST',
headers: {
Authorization: `Bearer ${OPENAI_API_KEY}`
},
body: formData
});
const { text } = await whisperRes.json();
setTranscript(text);
const chatRes = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-4o',
messages: [
2025-05-16 21:46:23 +07:00
{ role: 'system', content: 'You are an English tutor that is kind, fun to be around and can teach English language lessons through adventurous stories very well. You are assigned to talk with a primary school EFL student about a movie they watched yesterday. The conversation will start in Thai and the teacheer will transition the conversation to English.' },
2025-05-16 16:27:50 +07:00
{ role: 'user', content: text }
]
})
});
const chatData = await chatRes.json();
if (!chatData.choices || !chatData.choices[0]) {
console.error('Chat API response error:', chatData);
setAiReply('Sorry, something went wrong with the AI response.');
return;
}
const reply = chatData.choices[0].message.content;
setAiReply(reply);
const speechRes = await fetch('https://api.openai.com/v1/audio/speech', {
method: 'POST',
headers: {
'Authorization': `Bearer ${OPENAI_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'tts-1-hd',
voice: 'nova',
input: reply
})
});
const outputAudioBlob = await speechRes.blob();
const audioUrl = URL.createObjectURL(outputAudioBlob);
const audio = new Audio(audioUrl);
audio.play();
audio.onended = () => {
if (mediaRecorderRef.current && audioStreamRef.current) {
audioChunksRef.current = [];
mediaRecorderRef.current.start();
setIsRecording(true);
monitorSilence();
}
};
};
2025-05-12 21:54:32 +07:00
};
2025-05-16 16:27:50 +07:00
initRecording();
}, []);
const monitorSilence = () => {
if (!audioStreamRef.current) return;
if (audioContextRef.current) {
audioContextRef.current.close();
}
audioContextRef.current = new AudioContext();
// Resume AudioContext to avoid browser autoplay policy issues
audioContextRef.current.resume().then(() => {
const source = audioContextRef.current.createMediaStreamSource(audioStreamRef.current);
const analyser = audioContextRef.current.createAnalyser();
analyser.fftSize = 2048;
analyserRef.current = analyser;
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
dataArrayRef.current = dataArray;
source.connect(analyser);
const canvas = canvasRef.current;
const canvasCtx = canvas.getContext('2d');
const SILENCE_THRESHOLD = 0.02; // Adjust as needed (0 to 1 scale)
const SILENCE_TIMEOUT = 1500; // ms
let silenceStart = null;
const checkSilenceAndDraw = () => {
analyser.getByteTimeDomainData(dataArray);
// Draw waveform (same as before)
canvasCtx.fillStyle = '#000';
canvasCtx.fillRect(0, 0, canvas.width, canvas.height);
canvasCtx.lineWidth = 2;
canvasCtx.strokeStyle = '#00ff00';
canvasCtx.beginPath();
const sliceWidth = canvas.width / bufferLength;
let x = 0;
for (let i = 0; i < bufferLength; i++) {
const v = dataArray[i] / 128.0;
const y = v * canvas.height / 2;
if (i === 0) {
canvasCtx.moveTo(x, y);
} else {
canvasCtx.lineTo(x, y);
}
x += sliceWidth;
}
canvasCtx.lineTo(canvas.width, canvas.height / 2);
canvasCtx.stroke();
// RMS calculation
let sumSquares = 0;
for (let i = 0; i < bufferLength; i++) {
const normalized = (dataArray[i] - 128) / 128;
sumSquares += normalized * normalized;
}
const rms = Math.sqrt(sumSquares / bufferLength);
if (rms < SILENCE_THRESHOLD) {
// Silence detected
if (!silenceStart) silenceStart = Date.now();
else if (Date.now() - silenceStart > SILENCE_TIMEOUT) {
if (mediaRecorderRef.current && isRecording) {
mediaRecorderRef.current.stop();
setIsRecording(false);
audioContextRef.current.close();
}
silenceStart = null; // reset after stopping
return; // stop animation loop on silence stop
}
} else {
// Sound detected
silenceStart = null;
2025-05-12 21:54:32 +07:00
}
2025-05-16 16:27:50 +07:00
requestAnimationFrame(checkSilenceAndDraw);
2025-05-12 21:54:32 +07:00
};
2025-05-16 16:27:50 +07:00
});
};
2025-05-16 15:21:02 +07:00
2025-05-16 16:27:50 +07:00
const toggleRecording = () => {
if (!isRecording) {
audioChunksRef.current = [];
mediaRecorderRef.current.start();
setIsRecording(true);
monitorSilence();
} else {
mediaRecorderRef.current.stop();
setIsRecording(false);
2025-05-16 15:21:02 +07:00
}
2025-05-12 21:54:32 +07:00
};
return (
2025-05-16 16:27:50 +07:00
<div className="app-container">
<div className="scene-wrapper">
<img src="/tutor_f.png" alt="Tutor Avatar" className="avatar" />
<div className="dialogue-box">
<div className="dialogue-text">
<strong>You:</strong> {transcript || <em>Say something</em>}
</div>
<div className="dialogue-text">
<strong>Tutor:</strong> {aiReply || <em>Waiting for your question</em>}
</div>
</div>
{isRecording && (
<canvas
ref={canvasRef}
width={300}
height={60}
className="waveform-canvas"
/>
)}
<div className="button-container">
<button
onClick={toggleRecording}
className={`control-button ${isRecording ? 'recording' : 'idle'}`}
>
2025-05-16 21:46:23 +07:00
{isRecording ? 'ฉันพูดเสร็จแล้ว' : 'พูด'}
2025-05-16 16:27:50 +07:00
</button>
</div>
</div>
2025-05-12 21:54:32 +07:00
</div>
);
}
2025-05-16 16:27:50 +07:00
export default App;