[toc]

使用C语言将PCM文件转为对应的音符与频率(毫无准确率)

1.准备FFTW64库

  • 使用vcpkg安装fftw3

2.使用C语言将PCM文件转为对应的频率

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <fftw3.h>
#include <math.h>

void analyze_pcm(const char* filename) {
// 打开 PCM 文件
FILE* file = fopen(filename, "rb");
if (!file) {
printf("无法打开文件: %s\n", filename);
return;
}

// 假设 PCM 数据是 16-bit 单声道
double* buffer;
size_t num_samples;

// 读取 PCM 数据
fseek(file, 0, SEEK_END);
long fileSize = ftell(file);
fseek(file, 0, SEEK_SET);
num_samples = fileSize / sizeof(double);
buffer = (double*)malloc(fileSize);
fread(buffer, sizeof(double), num_samples, file);
fclose(file);

// 创建 FFTW 计划
fftw_complex* out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * num_samples);
fftw_plan plan = fftw_plan_dft_r2c_1d(num_samples, (double*)buffer, out, FFTW_ESTIMATE);

// 执行 FFT
fftw_execute(plan);

// 分析频率成分
double sample_rate = 44100.0; // 假设采样率为 44100 Hz
for (size_t i = 0; i < num_samples / 2; i++) {
double frequency = i * sample_rate / num_samples;
double magnitude = sqrt(out[i][0] * out[i][0] + out[i][1] * out[i][1]);
printf("频率: %.2f Hz, 幅度: %.2f\n", frequency, magnitude);
}

// 清理
fftw_destroy_plan(plan);
fftw_free(out);
free(buffer);
}

int main() {
const char* filename = "D:\\Users\\3\\Videos\\落叶的位置.pcm"; // 请确保路径正确
analyze_pcm(filename);
return 0;
}

3.使用C语言将PCM文件转为对应的音符并保存在文本文件中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <fftw3.h>
#include <math.h>
#include <string.h>

#define NOTE_COUNT 88 // 从 A0 到 C8 的音符数量

// 音符名称和频率
const char* notes[NOTE_COUNT] = {
"A0", "A0s", "B0", "C1", "C1s", "D1", "D1s", "E1", "F1", "F1s",
"G1", "G1s", "A1", "A1s", "B1", "C2", "C2s", "D2", "D2s", "E2",
"F2", "F2s", "G2", "G2s", "A2", "A2s", "B2", "C3", "C3s", "D3",
"D3s", "E3", "F3", "F3s", "G3", "G3s", "A3", "A3s", "B3", "C4",
"C4s", "D4", "D4s", "E4", "F4", "F4s", "G4", "G4s", "A4", "A4s",
"B4", "C5", "C5s", "D5", "D5s", "E5", "F5", "F5s", "G5", "G5s",
"A5", "A5s", "B5", "C6", "C6s", "D6", "D6s", "E6", "F6", "F6s",
"G6", "G6s", "A6", "A6s", "B6", "C7", "C7s", "D7", "D7s", "E7",
"F7", "F7s", "G7", "G7s", "A7", "A7s", "B7", "C8"
};

const double frequencies[NOTE_COUNT] = {
27.50, 29.14, 30.87, 32.70, 34.65, 36.71, 38.89, 41.20, 43.65, 46.25,
49.00, 51.91, 55.00, 58.27, 61.74, 65.41, 69.30, 73.42, 77.78, 82.41,
87.31, 92.50, 98.00, 103.83, 110.00, 116.54, 123.47, 130.81, 138.59, 146.83,
155.56, 164.81, 174.61, 185.00, 196.00, 207.65, 220.00, 233.08, 246.94, 261.63,
277.18, 293.66, 311.13, 329.63, 349.23, 369.99, 392.00, 415.30, 440.00, 466.16,
493.88, 523.25, 554.37, 587.33, 622.25, 659.25, 698.46, 739.99, 784.00, 830.61,
880.00, 932.33, 987.77, 1046.50, 1109.00, 1175.00, 1245.00, 1319.00, 1397.00, 1479.00,
1567.00, 1661.00, 1760.00, 1865.00, 1976.00, 2093.00, 2217.00, 2349.00, 2489.00, 2637.00,
2793.00, 2959.00, 3135.00, 3322.00, 3520.00, 3729.00, 3951.00, 4186.00
};

void analyze_pcm(const char* filename, const char* output_filename) {
// 打开 PCM 文件
FILE* file = fopen(filename, "rb");
if (!file) {
printf("无法打开文件: %s\n", filename);
return;
}

// 假设 PCM 数据是 16-bit 单声道
double* buffer;
size_t num_samples;

// 读取 PCM 数据
fseek(file, 0, SEEK_END);
long fileSize = ftell(file);
fseek(file, 0, SEEK_SET);
num_samples = fileSize / sizeof(short);
buffer = (double*)malloc(num_samples * sizeof(double));
short* temp_buffer = (short*)malloc(num_samples * sizeof(short));
fread(temp_buffer, sizeof(short), num_samples, file);
fclose(file);

// 将短整型数据转换为双精度浮点型
for (size_t i = 0; i < num_samples; i++) {
buffer[i] = (double)temp_buffer[i] / 32768.0; // 归一化 PCM 数据
}
free(temp_buffer);

// 创建 FFTW 计划
fftw_complex* out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * (num_samples / 2 + 1));
fftw_plan plan = fftw_plan_dft_r2c_1d(num_samples, buffer, out, FFTW_ESTIMATE);

// 执行 FFT
fftw_execute(plan);

// 分析频率成分并识别音符
double sample_rate = 44100.0; // 假设采样率为 44100 Hz
FILE* output_file = fopen(output_filename, "w");
if (!output_file) {
printf("无法打开输出文件: %s\n", output_filename);
fftw_destroy_plan(plan);
fftw_free(out);
free(buffer);
return;
}

for (size_t i = 0; i < num_samples / 2 + 1; i++) {
double frequency = i * sample_rate / num_samples;
double magnitude = sqrt(out[i][0] * out[i][0] + out[i][1] * out[i][1]);

// 找到最接近的音符
int closest_note_index = -1;
double min_diff = 1000.0; // 一个足够大的初始值

for (int j = 0; j < NOTE_COUNT; j++) {
double diff = fabs(frequency - frequencies[j]);
if (diff < min_diff) {
min_diff = diff;
closest_note_index = j;
}
}

// 如果幅度大于某个阈值,记录音符
if (magnitude > 0.01 && closest_note_index != -1) { // 这里的阈值可以根据需要调整
fprintf(output_file, "%s, ", notes[closest_note_index]);
}
if (i % 10 == 0) fprintf(output_file, "\n");
}

// 清理
fclose(output_file);
fftw_destroy_plan(plan);
fftw_free(out);
free(buffer);
}

int main() {
const char* input_filename = "D:\\Users\\3\\Videos\\落叶的位置.pcm"; // 请确保路径正确
const char* output_filename = "output_notes.txt"; // 输出文件名
analyze_pcm(input_filename, output_filename);
return 0;
}

4.使用C语言将PCM文件转为对应的频率

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <fftw3.h>
#include <math.h>

// 定义输入音频文件名和采样率
#define INPUT_FILENAME "D:\\Users\\3\\Videos\\落叶的位置.pcm"
#define SAMPLE_RATE 44100

// 定义 FFTW 参数
#define FFT_SIZE 1024

int main() {
// 定义输入音频缓冲区
double input_buffer[FFT_SIZE];

// 读取输入音频数据,这里假设从文件读取,你可以根据需要进行修改
FILE* input_file = fopen(INPUT_FILENAME, "rb");

// 创建 FFTW 输入和输出数组
fftw_complex* fft_input = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * FFT_SIZE);
fftw_complex* fft_output = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * FFT_SIZE);

// 创建 FFTW 3.0 的计划
fftw_plan plan = fftw_plan_dft_1d(FFT_SIZE, fft_input, fft_output, FFTW_FORWARD, FFTW_ESTIMATE);

// 读取音频数据并进行频谱计算
while (!feof(input_file)) {
// 读取输入音频数据到缓冲区
size_t read_size = fread(input_buffer, sizeof(double), FFT_SIZE, input_file);

// 将输入音频数据复制到 FFTW 输入数组
for (int i = 0; i < FFT_SIZE; i++) {
fft_input[i][0] = input_buffer[i];
fft_input[i][1] = 0.0; // 虚部设置为零
}

// 执行 FFT 变换
fftw_execute(plan);

// 计算频谱
for (int i = 0; i < FFT_SIZE; i++) {
double magnitude = sqrt(fft_output[i][0] * fft_output[i][0] + fft_output[i][1] * fft_output[i][1]);
double frequency = ((double)i / FFT_SIZE) * SAMPLE_RATE;

// 在这里可以对频谱数据做进一步处理,例如绘图、输出等
printf("%.2f,", frequency);
//printf("Magnitude: %.4f\n", magnitude);
if (i % 20 == 0)printf("\n");
}
}

// 销毁 FFTW 相关资源
fftw_destroy_plan(plan);
fftw_free(fft_input);
fftw_free(fft_output);

// 关闭文件
fclose(input_file);

return 0;
}