标注工具：parselmouth（歌声合成语音合成标注）

@toc

Parselmouth

Parselmouth$ pip install praat-parselmouth画图import parselmouth import numpy as np import matplotlib.pyplot as plt import seaborn as sns import pandas as pd def draw_spectrogram(spectrogram, dynamic_range=70): X, Y = spectrogram.x_grid(), spectrogram.y_grid() sg_db = 10 * np.log10(spectrogram.values) plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap='afmhot') plt.ylim([spectrogram.ymin, spectrogram.ymax]) plt.xlabel("time [s]") plt.ylabel("frequency [Hz]") def draw_intensity(intensity): plt.plot(intensity.xs(), intensity.values.T, linewidth=3, color='w') plt.plot(intensity.xs(), intensity.values.T, linewidth=1) plt.grid(False) plt.ylim(0) plt.ylabel("intensity [dB]") def draw_pitch(pitch): # Extract selected pitch contour, and # replace unvoiced samples by NaN to not plot pitch_values = pitch.selected_array['frequency'] pitch_values[pitch_values==0] = np.nan plt.plot(pitch.xs(), pitch_values, 'o', markersize=5, color='w') plt.plot(pitch.xs(), pitch_values, 'o', markersize=2) plt.grid(False) plt.ylim(0, pitch.ceiling) plt.ylabel("fundamental frequency [Hz]") def facet_util(data, **kwargs): digit, speaker_id = data[['digit', 'speaker_id']].iloc[0] sound = parselmouth.Sound("{}_{}.wav".format(digit, speaker_id)) draw_spectrogram(sound.to_spectrogram()) plt.twinx() draw_pitch(sound.to_pitch()) # If not the rightmost column, then clear the right side axis if digit != 5: plt.ylabel("") plt.yticks([]) results = pd.read_csv("digit_list.csv") grid = sns.FacetGrid(results, row='speaker_id', col='digit') grid.map_dataframe(facet_util) grid.set_titles(col_template="{col_name}", row_template="{row_name}") grid.set_axis_labels("time [s]", "frequency [Hz]") grid.set(facecolor='white', xlim=(0, None)) plt.show()
Parselmouth是praat的python接口。
论坛讨论地址：https://groups.google.com/g/parselmouthinstall
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-vfEhonAo-1611281659659)(evernotecid://7F9DBD47-611B-471F-AF91-E0154E425709/appyinxiangcom/11012738/ENResource/p2833)]

在这里插入图片描述
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-lLgyT3f1-1611281659662)(evernotecid://7F9DBD47-611B-471F-AF91-E0154E425709/appyinxiangcom/11012738/ENResource/p2834)]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-aebagOlc-1611281659664)(evernotecid://7F9DBD47-611B-471F-AF91-E0154E425709/appyinxiangcom/11012738/ENResource/p2831)]

音高处理

代码语言：txt复制

import parselmouth
from parselmouth.praat import call

sound = parselmouth.Sound("other/4_b.wav")
manipulation = call(sound, "To Manipulation", 0.01, 75, 600)

print(type(manipulation))

pitch_tier = call(manipulation, "Extract pitch tier")

call(pitch_tier, "Multiply frequencies", sound.xmin, sound.xmax, 2)

call([pitch_tier, manipulation], "Replace pitch tier")
sound_octave_up = call(manipulation, "Get resynthesis (overlap-add)")

print(type(sound_octave_up))

sound_octave_up.save("4_b_octave_up.wav", "WAV")

draw frequency grid import plot

0 人点赞