Android+STM32语音控制系统的实现(Android端) 1、新建Android工程目录结构如下2、界面设计--res/layout/activity_main.xml?xml version1.0 encodingutf-8? LinearLayout xmlns:androidhttp://schemas.android.com/apk/res/android android:layout_widthmatch_parent android:layout_heightmatch_parent android:gravitycenter_horizontal android:orientationvertical android:padding30dp android:background#f9f9f9 ImageView android:idid/iv_light android:layout_width160dp android:layout_height160dp android:srcdrawable/l0/ pl.droidsonroids.gif.GifImageView android:idid/iv_fan android:layout_width160dp android:layout_height160dp android:srcdrawable/fan2 / TextView android:idid/tv_result android:layout_widthwrap_content android:layout_heightwrap_content android:text长按按钮说话 android:textSize24sp android:layout_marginTop20dp/ !-- 核心长按录音按钮 -- Button android:idid/btn_record android:layout_width240dp android:layout_height60dp android:text按住说话 android:backgroundTint#2196F3 android:textColor#fff android:layout_marginTop30dp/ /LinearLayout3、添加权限--AndroidManifest.xmluses-permission android:nameandroid.permission.RECORD_AUDIO / uses-permission android:nameandroid.permission.INTERNET / uses-permission android:nameandroid.permission.ACCESS_NETWORK_STATE /4、添加依赖--build.gradle// TFLite 核心库 implementation org.tensorflow:tensorflow-lite:2.16.1 // 支持 TensorFlow 选择操作符 implementation org.tensorflow:tensorflow-lite-select-tf-ops:2.16.1 // GIF 支持 implementation pl.droidsonroids.gif:android-gif-drawable:1.2.295、Java代码1--AudioCapture.javapackage com.example.voicecontrol; import static com.example.voicecontrol.Config.SAMPLE_POINTS; import static com.example.voicecontrol.Config.SAMPLE_RATE; import android.Manifest; import android.media.AudioFormat; import android.media.AudioRecord; import android.media.MediaRecorder; import android.util.Log; import androidx.annotation.RequiresPermission; public class AudioCapture { private static final String TAG AudioCapture; private AudioRecord audioRecord; private final Object lock new Object(); private volatile boolean isRecording false; private int bufferSize; RequiresPermission(Manifest.permission.RECORD_AUDIO) public boolean init() { synchronized (lock) { release(); int minBufferSize AudioRecord.getMinBufferSize(SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT); if (minBufferSize 0) { return false; } bufferSize Math.max(minBufferSize * 2, SAMPLE_POINTS * 2); try { audioRecord new AudioRecord( MediaRecorder.AudioSource.MIC, // 原始麦克风 SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, bufferSize ); } catch (Exception e) { return false; } if (audioRecord.getState() ! AudioRecord.STATE_INITIALIZED) { release(); return false; } return true; } } public boolean startRecording() { synchronized (lock) { if (audioRecord null) return false; try { audioRecord.startRecording(); isRecording true; return true; } catch (Exception e) { return false; } } } public float[] readAndStop() { synchronized (lock) { if (!isRecording || audioRecord null) return null; short[] buffer new short[SAMPLE_POINTS]; int totalRead 0; long timeout System.currentTimeMillis() 3000; // 循环读取直到填满或超时 while (totalRead SAMPLE_POINTS System.currentTimeMillis() timeout) { int read audioRecord.read(buffer, totalRead, SAMPLE_POINTS - totalRead); if (read 0) { totalRead read; } else if (read 0) { Log.e(TAG, AudioRecord error: read); break; } } try { audioRecord.stop(); } catch (Exception ignored) {} isRecording false; Log.d(TAG, Audio read: totalRead samples); if (totalRead SAMPLE_POINTS * 0.9) { // 允许 10% 的误差 Log.w(TAG, Insufficient audio data); return null; } // 转换为 float [-1.0, 1.0] float[] audio new float[SAMPLE_POINTS]; for (int i 0; i SAMPLE_POINTS; i) { if (i totalRead) { audio[i] buffer[i] / 32768.0f; } else { audio[i] 0.0f; // 填充剩余部分 } } return audio; } } public void release() { synchronized (lock) { if (audioRecord ! null) { try { audioRecord.release(); } catch (Exception ignored) {} audioRecord null; } isRecording false; } } }(2)--RecordingForegroundServicepackage com.example.voicecontrol; import android.app.Notification; import android.app.NotificationChannel; import android.app.NotificationManager; import android.app.Service; import android.content.Intent; import android.os.Build; import android.os.IBinder; import androidx.annotation.Nullable; import androidx.core.app.NotificationCompat; public class RecordingForegroundService extends Service { private static final String CHANNEL_ID RecordingServiceChannel; private static final int NOTIFICATION_ID 1001; Override public void onCreate() { super.onCreate(); createNotificationChannel(); startForeground(NOTIFICATION_ID, getNotification()); } Nullable Override public IBinder onBind(Intent intent) { return null; } private void createNotificationChannel() { if (Build.VERSION.SDK_INT Build.VERSION_CODES.O) { NotificationChannel channel new NotificationChannel( CHANNEL_ID, 语音录音服务, NotificationManager.IMPORTANCE_LOW ); channel.setDescription(用于后台稳定录音); NotificationManager manager getSystemService(NotificationManager.class); if (manager ! null) { manager.createNotificationChannel(channel); } } } private Notification getNotification() { return new NotificationCompat.Builder(this, CHANNEL_ID) .setContentTitle(语音控制) .setContentText(正在录音...) .setSmallIcon(R.mipmap.ic_launcher) .setPriority(NotificationCompat.PRIORITY_LOW) .build(); } Override public void onDestroy() { super.onDestroy(); } }(3)--Config.javapackage com.example.voicecontrol; public class Config { public static String model_name cnn_model.tflite; public static String label_name label.txt; // 音频参数 public static final int SAMPLE_RATE 16000; public static final int SAMPLE_POINTS 36000; // 2.25秒 public static final int N_MFCC 13; public static final int TIME_STEPS 67; public static final float CONFIDENCE_THRESHOLD 0.5f; }(4)--TFLiteClassifier.javapackage com.example.voicecontrol; import static com.example.voicecontrol.Config.CONFIDENCE_THRESHOLD; import static com.example.voicecontrol.Config.N_MFCC; import static com.example.voicecontrol.Config.SAMPLE_POINTS; import static com.example.voicecontrol.Config.TIME_STEPS; import static com.example.voicecontrol.Config.label_name; import static com.example.voicecontrol.Config.model_name; import android.content.Context; import android.content.res.AssetFileDescriptor; import android.util.Log; import org.tensorflow.lite.Interpreter; import java.io.FileInputStream; import java.io.InputStream; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.util.HashMap; import java.util.Map; import java.util.Scanner; public class TFLiteClassifier { private static final String TAG TFLiteClassifier; private Interpreter interpreter; private MapInteger, String labels new HashMap(); private boolean isLoaded false; private float[][] preAllocatedOutput; public TFLiteClassifier(Context context) { try { Log.d(TAG, Loading TFLite model...); MappedByteBuffer modelBuffer loadModelFile(context); Interpreter.Options options new Interpreter.Options(); options.setNumThreads(4); interpreter new Interpreter(modelBuffer, options); interpreter.allocateTensors(); int outputSize interpreter.getOutputTensor(0).shape()[1]; preAllocatedOutput new float[1][outputSize]; loadLabels(context); isLoaded true; printModelInfo(); Log.d(TAG, Model loaded successfully); } catch (Exception e) { Log.e(TAG, Failed to load model, e); isLoaded false; } } private MappedByteBuffer loadModelFile(Context context) throws Exception { AssetFileDescriptor afd context.getAssets().openFd(model_name); FileInputStream fis new FileInputStream(afd.getFileDescriptor()); FileChannel channel fis.getChannel(); MappedByteBuffer buffer channel.map( FileChannel.MapMode.READ_ONLY, afd.getStartOffset(), afd.getDeclaredLength() ); channel.close(); fis.close(); afd.close(); return buffer; } private void loadLabels(Context context) throws Exception { InputStream is context.getAssets().open(label_name); Scanner scanner new Scanner(is); int index 0; while (scanner.hasNextLine()) { String line scanner.nextLine().trim(); if (!line.isEmpty()) { if (line.contains( )) { String[] parts line.split( ); try { labels.put(Integer.parseInt(parts[0]), parts[1]); } catch (NumberFormatException e) { labels.put(index, parts[0]); } } else { labels.put(index, line); } index; } } scanner.close(); is.close(); Log.d(TAG, Loaded labels.size() labels); } private void printModelInfo() { if (interpreter null) return; try { Log.d(TAG, Model Information ); for (int i 0; i interpreter.getInputTensorCount(); i) { int[] shape interpreter.getInputTensor(i).shape(); Log.d(TAG, Input i shape: arrayToString(shape)); } } catch (Exception e) {} } private String arrayToString(int[] array) { StringBuilder sb new StringBuilder([); for (int i 0; i array.length; i) { if (i 0) sb.append(, ); sb.append(array[i]); } sb.append(]); return sb.toString(); } public String predict(float[] audio) { if (!isLoaded || interpreter null) { Log.e(TAG, Interpreter not loaded); return 模型未加载; } try { float[][] input new float[1][SAMPLE_POINTS]; input[0] audio; float[][] output preAllocatedOutput; // 运行推理 interpreter.run(input, output); // 找到最大概率 int maxIndex 0; float maxValue output[0][0]; for (int i 1; i output[0].length; i) { if (output[0][i] maxValue) { maxValue output[0][i]; maxIndex i; } } Log.d(TAG, Prediction: index maxIndex , confidence maxValue); // 阈值判断 if (maxValue CONFIDENCE_THRESHOLD) { return 未识别; } return labels.getOrDefault(maxIndex, 未知); } catch (Exception e) { Log.e(TAG, Prediction failed, e); return 识别失败; } } public void close() { if (interpreter ! null) { interpreter.close(); interpreter null; } } }(5)--TcpClient.javapackage com.example.voicecontrol; import android.os.Handler; import android.util.Log; import java.io.IOException; import java.io.OutputStream; import java.net.Socket; // TCP 客户端长连接 自动重连 public class TcpClient { private Socket socket; private OutputStream outputStream; private final String host; private final int port; private final Handler mainHandler; private boolean isConnecting false; private volatile boolean isClosing false; TcpClient(String host, int port, Handler mainHandler) { this.host host; this.port port; this.mainHandler mainHandler; } synchronized void connect() { if (isClosing) return; if (isConnecting || isConnected()) return; isConnecting true; new Thread(() - { try { if (socket ! null) closeQuietly(); socket new Socket(host, port); outputStream socket.getOutputStream(); Log.d(TCP, 连接成功: host : port); } catch (Exception e) { Log.e(TCP, 连接失败, e); scheduleReconnect(); } finally { isConnecting false; } }).start(); } synchronized void send(String command, Runnable onSuccess, Runnable onFail) { if (isClosing) { if (onFail ! null) mainHandler.post(onFail); return; } new Thread(() - { try { // 确保连接可用 if (!isConnected()) { connect(); // 等待连接建立最多等待 1 秒 int waitCount 0; while (!isConnected() waitCount 10) { Thread.sleep(100); waitCount; } if (!isConnected()) throw new IOException(连接失败); } outputStream.write((command \n).getBytes()); outputStream.flush(); if (onSuccess ! null) mainHandler.post(onSuccess); } catch (Exception e) { Log.e(TCP, 发送命令失败: command, e); if (onFail ! null) mainHandler.post(onFail); scheduleReconnect(); } }).start(); } private boolean isConnected() { return socket ! null socket.isConnected() !socket.isClosed() outputStream ! null; } private void scheduleReconnect() { if (isClosing) return; mainHandler.postDelayed(() - { if (!isClosing !isConnected()) connect(); }, 3000); } synchronized void close() { isClosing true; closeQuietly(); } private void closeQuietly() { try { if (outputStream ! null) outputStream.close(); } catch (Exception ignored) {} try { if (socket ! null) socket.close(); } catch (Exception ignored) {} outputStream null; socket null; } }(6)--MainActivity.javapackage com.example.voicecontrol; import androidx.annotation.NonNull; import androidx.appcompat.app.AppCompatActivity; import androidx.core.app.ActivityCompat; import androidx.core.content.ContextCompat; import android.Manifest; import android.content.Intent; import android.content.pm.PackageManager; import android.os.Bundle; import android.os.Handler; import android.os.Looper; import android.os.Process; import android.util.Log; import android.view.MotionEvent; import android.widget.Button; import android.widget.ImageView; import android.widget.TextView; import android.widget.Toast; import java.io.IOException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import pl.droidsonroids.gif.GifDrawable; import pl.droidsonroids.gif.GifImageView; public class MainActivity extends AppCompatActivity { private static final int REQUEST_RECORD_AUDIO 100; private static final long MIN_RECORD_INTERVAL 300; // UI 组件 private ImageView ivLight; private GifImageView ivFan; private GifDrawable gifDrawable; private TextView tvResult; private Button btnRecord; // 录音与模型 private AudioCapture audioCapture; private TFLiteClassifier classifier; // 线程池 private final ExecutorService audioExecutor Executors.newSingleThreadExecutor(); private final ExecutorService networkExecutor Executors.newSingleThreadExecutor(); private final Handler mainHandler new Handler(Looper.getMainLooper()); private boolean isRecording false; private long lastRecordTime 0; // 网络相关长连接客户端 private TcpClient tcpClient; private String esp8266_ip 192.168.4.1; private int esp8266_port 8080; Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); initViews(); initComponents(); initGif(); checkPermissions(); initRecordButton(); // 初始化 TCP 客户端并建立长连接 tcpClient new TcpClient(esp8266_ip, esp8266_port, mainHandler); tcpClient.connect(); } private void initViews() { ivLight findViewById(R.id.iv_light); ivFan findViewById(R.id.iv_fan); tvResult findViewById(R.id.tv_result); btnRecord findViewById(R.id.btn_record); } private void initComponents() { classifier new TFLiteClassifier(this); audioCapture new AudioCapture(); } private void initGif() { try { gifDrawable new GifDrawable(getResources(), R.drawable.fan2); ivFan.setImageDrawable(gifDrawable); ivFan.setImageResource(R.drawable.fan1); } catch (IOException e) { showToast(GIF加载失败); gifDrawable null; } } private void initRecordButton() { btnRecord.setOnTouchListener((v, event) - { switch (event.getAction()) { case MotionEvent.ACTION_DOWN: handleDown(); return true; case MotionEvent.ACTION_UP: case MotionEvent.ACTION_CANCEL: handleUp(); return true; } return false; }); } private void handleDown() { long now System.currentTimeMillis(); if (now - lastRecordTime MIN_RECORD_INTERVAL) return; lastRecordTime now; if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) ! PackageManager.PERMISSION_GRANTED) { showToast(请开启录音权限); return; } startForegroundService(new Intent(this, RecordingForegroundService.class)); if (audioCapture.init() audioCapture.startRecording()) { isRecording true; tvResult.setText(正在录音...); btnRecord.setText(松开识别); btnRecord.setBackgroundTintList(ContextCompat.getColorStateList(this, com.google.android.material.R.color.design_default_color_primary)); } else { showToast(启动录音失败); stopService(new Intent(this, RecordingForegroundService.class)); } } private void handleUp() { if (!isRecording) return; isRecording false; btnRecord.setText(识别中...); audioExecutor.execute(() - { Process.setThreadPriority(Process.THREAD_PRIORITY_AUDIO); float[] data audioCapture.readAndStop(); mainHandler.post(() - { stopService(new Intent(this, RecordingForegroundService.class)); btnRecord.setText(按住说话); btnRecord.setBackgroundTintList(ContextCompat.getColorStateList(this, R.color.black)); if (data null) { tvResult.setText(录音失败); showToast(录音失败请重试); return; } String cmd classifier.predict(data); tvResult.setText(结果 cmd); doAction(cmd); }); }); } private void doAction(String cmd) { if (未识别.equals(cmd) || 识别失败.equals(cmd)) { showToast(cmd); return; } switch (cmd) { case 关灯: ivLight.setImageResource(R.drawable.l0); showToast(已关灯); sendCommandToSTM32(CMD_LED_OFF); break; case 开灯: ivLight.setImageResource(R.drawable.l1); showToast(已开灯); sendCommandToSTM32(CMD_LED_ON); break; case 打开风扇: if (gifDrawable ! null !gifDrawable.isRunning()) { gifDrawable.start(); ivFan.setImageDrawable(gifDrawable); sendCommandToSTM32(CMD_FAN_ON); } showToast(风扇已开启); break; case 关闭风扇: if (gifDrawable ! null gifDrawable.isRunning()) { gifDrawable.stop(); ivFan.setImageResource(R.drawable.fan1); sendCommandToSTM32(CMD_FAN_OFF); } showToast(风扇已关闭); break; default: showToast(未知指令); } } // 通过长连接客户端发送命令 private void sendCommandToSTM32(String command) { if (tcpClient null) { showToast(网络未初始化); return; } tcpClient.send(command, () - Log.d(TCP, 命令发送成功: command), () - showToast(发送失败请检查网络) ); } private void checkPermissions() { if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) ! PackageManager.PERMISSION_GRANTED) { ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO); } } private void showToast(String s) { Toast.makeText(this, s, Toast.LENGTH_SHORT).show(); } Override public void onRequestPermissionsResult(int requestCode, NonNull String[] permissions, NonNull int[] grantResults) { super.onRequestPermissionsResult(requestCode, permissions, grantResults); if (requestCode REQUEST_RECORD_AUDIO) { if (grantResults.length 0 grantResults[0] ! PackageManager.PERMISSION_GRANTED) { showToast(必须允许录音权限才能使用语音控制); } } } Override protected void onDestroy() { super.onDestroy(); // 关闭网络连接 if (tcpClient ! null) tcpClient.close(); // 停止前台服务 stopService(new Intent(this, RecordingForegroundService.class)); // 释放录音资源 if (audioCapture ! null) audioCapture.release(); // 关闭模型 if (classifier ! null) classifier.close(); // 释放 GIF 资源 if (gifDrawable ! null) gifDrawable.recycle(); // 关闭线程池 audioExecutor.shutdownNow(); networkExecutor.shutdownNow(); } }6、测试