超简单的婴儿哭声检测实现方案--python版

2024-10-10 09:16:49 浏览数 (2)

超简单的婴儿哭声检测实现方案--python版

1. 构建项目

项目结构

代码语言:bash复制
└─audio_data ## 音频文件
    ├─mp3
    ├─test
    └─wav  ## 训练音频源文件 每个目录代表不同的标签,自己定义和随意增加,cry 目录存放的是婴儿的哭声
        ├─cry
        ├─non_cry
        └─other
└─get-model.py
└─main.py
└─requirements.txt

get-model.py

代码语言:python代码运行次数:0复制
## get-model.py
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from joblib import dump, load  # 使用 joblib 直接导入 dump 和 load
from packaging import version  # 用于版本比较

# 加载音频文件
def load_audio_file(file_path):
    signal, sample_rate = librosa.load(file_path, sr=None, mono=True)
    return signal, sample_rate

# 提取 MFCC 特征
def extract_features(file_path, sample_rate):
    signal, _ = load_audio_file(file_path)
    mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=13)
    mfccs_scaled_features = np.mean(mfccs.T, axis=0)
    return mfccs_scaled_features

# 数据目录
data_dir = './audio_data/wav'  # 不使用反斜杠
# 假设类别包括 'cry', 'non_cry', 'other' 等
labels = {'cry': 1, 'non_cry': 0, 'other': 2}  # 添加其他类别

# 初始化特征和标签列表
X, y = [], []

# 遍历数据目录中的文件
for root, dirs, files in os.walk(data_dir):
    print(f"Processing directory: {root}")
    for file in files:
        print(f"File found: {os.path.join(root, file)}")
        if file.endswith('.wav'):
            file_label = os.path.basename(root).split('_')[0]  # 获取文件夹名称
            print(f"File label: {file_label}")
            if file_label in labels:
                file_path = os.path.join(root, file)
                print(f"Processing file: {file_path}")
                try:
                    feature = extract_features(file_path, sample_rate=22050)
                    X.append(feature)
                    y.append(labels[file_label])
                    print(f"Feature extracted successfully.")
                    print(f"Feature: {feature}")
                    print(f"Label: {labels[file_label]}")
                except Exception as e:
                    print(f"Error processing file {file_path}: {e}")

# 检查是否有数据
if len(X) == 0 or len(y) == 0:
    raise ValueError("No valid data found in the directory.")

# 转换为 NumPy 数组
X = np.array(X)
y = np.array(y)

# 打印数据形状以确认是否正确加载
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")
print(f"X: {X}")
print(f"y: {y}")

# 检查标签分布
unique_labels, counts = np.unique(y, return_counts=True)
print(f"Unique labels: {unique_labels}")
print(f"Counts: {counts}")

# 如果标签数量少于2,则抛出异常
if len(unique_labels) < 2:
    raise ValueError("The dataset must contain at least two different labels.")

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# 标准化特征
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 训练模型
# 使用 'ovr'(一对多)或 'ovo'(一对一)策略来支持多类分类
model = SVC(kernel='linear', C=1, decision_function_shape='ovr')  # 支持多类分类
model.fit(X_train, y_train)

# 评估模型
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(report)

# 获取 sklearn 的版本号
from sklearn import __version__ as sklearn_version

# 检查并使用正确的 joblib 方法
if version.parse(sklearn_version) >= version.parse("0.24"):
    dump(model, 'model.pkl')
    dump(scaler, 'scaler.pkl')
else:
    joblib.dump(model, 'model.pkl')
    joblib.dump(scaler, 'scaler.pkl')

print("Model and scaler saved.")

main.py

代码语言:python代码运行次数:0复制
## main.py
import os
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from joblib import load  # 使用 joblib 直接导入 load

# 加载音频文件
def load_audio_file(file_path):
    signal, sample_rate = librosa.load(file_path, sr=None, mono=True)
    return signal, sample_rate

# 提取 MFCC 特征
def extract_features(file_path, sample_rate):
    signal, _ = load_audio_file(file_path)
    mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=13)
    mfccs_scaled_features = np.mean(mfccs.T, axis=0)
    return mfccs_scaled_features

# 加载模型和标量
# 假设你有一个名为 'model.pkl' 的训练好的模型文件
model = load('model.pkl')  # 加载模型
scaler = load('scaler.pkl')  # 加载标量

# 指定测试文件目录
test_dir = './audio_data/test'

# 遍历指定目录下的所有 .mp3 文件
for root, dirs, files in os.walk(test_dir):
    for file in files:
        if file.endswith('.mp3') or file.endswith('.wav'):
            file_path = os.path.join(root, file)
            print(f"Processing file: {file_path}")

            try:
                # 提取特征
                features = extract_features(file_path, sample_rate=22050)

                # 标准化特征
                features_scaled = scaler.transform([features])

                # 进行预测
                prediction = model.predict(features_scaled)

                # 输出预测结果
                print(f"文件名: {file} --- 结果值: {prediction[0]}")
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

requirements.txt

代码语言:txt复制
librosa
numpy
scipy
sklearn
joblib

2. 安装依赖

安装环境

1. cmd或者powershell 直接执行以下命令,Windows10 会自动跳转到应用市场,根据提示安装即可
代码语言:bash复制
Python
2. 安装依赖包
代码语言:bash复制
pip3 install -r requirements.txt

3.训练模型

代码语言:bash复制
python3 get-model.py ## 训练完成会生成两个文件: model.pkl scaler.pkl

4.测试模型

代码语言:bash复制
python3 main.py

0 人点赞