简单代码示例
CPP
//
// SG_Com_simple_example.cpp
// 2019/07/05
// Simple example use of SG Com in C++ (mostly C).
//
// We construct an Engine and a Player to illustrate generation
// and playback of animation. There is an option to make the Player "local"
// (connected directly to the Engine) or "remote" (with a callback function to "send"
// packets to the Player). This example does not illustrate audio sync, idle mode,
// or control features such as mode switching.
//
// Copyright (c) 2019 Speech Graphics Ltd. All rights reserved.
//
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "../src/SG_Com.h"
#include <chrono>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // memcpy
#include <thread>
const char* help_text =
"Usage: SG_Com_Simple_Example.exe license_string character_file audio_file audio_sample_rate audio_sample_type output_file\n\n"
"license_string : License string (directory path for RLM builds|license string for other builds)\n"
"character_file : Path to character .k file.\n"
"audio_file : Path to input audio file containing speech. Must be a single channel wav file.\n"
"output_file : Path to the file to save the output animation in.\n";
// Example mode. Set to true to treat the Player as a "remote" player. Otherwise it will be treated as local.
static bool remote_player = true;
// Global objects
static SG_COM_EngineHandle engine_handle;
static SG_COM_PlayerHandle player_handle;
// Input settings
const static float input_buffer_sec = 1.0; // maximum duration (seconds) of the input buffer
const static int input_chunk_ms = 10; // duration of audio to be input on each process tick
// Output settings
const static float output_buffer_sec = 1.0; // maximum duration (seconds) of the output buffer
const static int output_fps = 30;
struct WAVFile {
char* audio_data;
sg_size data_size;
SG_AudioSampleRate sample_rate;
SG_AudioSampleType sample_type;
};
enum WAV_Format {
WAV_PCM_FORMAT = 0x01,
WAV_IEEE_FORMAT = 0x03
};
// Convert an int to SG_AudioSampleRate
SG_AudioSampleRate int_to_AudioSampleRate(unsigned int sample_frequency) {
switch (sample_frequency)
{
case 8000: return SG_AUDIO_8_KHZ;
case 12000: return SG_AUDIO_12_KHZ;
case 16000: return SG_AUDIO_16_KHZ;
case 24000: return SG_AUDIO_24_KHZ;
case 32000: return SG_AUDIO_32_KHZ;
case 44100: return SG_AUDIO_44_1_KHZ;
case 48000: return SG_AUDIO_48_KHZ;
default:
fprintf(stderr, "Audio file has an unsupported sample rate.");
exit(1);
}
}
// Get SG_AudioSampleType based on the bit depth and wav sample format
SG_AudioSampleType get_AudioSampleType(unsigned int bit_depth, unsigned short wav_format) {
if (bit_depth == 16 && wav_format == WAV_PCM_FORMAT) {
return SG_AUDIO_INT_16;
}
else if (bit_depth == 32 && wav_format == WAV_PCM_FORMAT) {
return SG_AUDIO_INT_32;
}
else if (bit_depth == 32 && wav_format == WAV_IEEE_FORMAT) {
return SG_AUDIO_FLOAT_32;
}
else {
fprintf(stderr, "Audio file has an unsupported bit depth and sample format combination.");
exit(1);
}
}
// Load a file into a byte buffer
void load_file(const char *file_in, char **bytes_out, sg_size *buffersize) {
FILE *file = fopen(file_in, "rb");
fseek(file, 0, SEEK_END);
*buffersize = ftell(file);
rewind(file);
*bytes_out = (char*) malloc(*buffersize);
fread(*bytes_out, 1, *buffersize, file);
fclose(file);
}
// Load a WAV file detecting the sample rate and sample type
WAVFile load_wav_file(const char* file_in) {
// Byte offsets into the WAV file header
const int format_offset = 0x14;
const int channel_count_offset = 0x16;
const int sample_rate_offset = 0x18;
const int bit_depth_offset = 0x22;
const int data_offset = 0x2C;
char* file = NULL;
sg_size buffersize = 0;
load_file(file_in, &file, &buffersize);
WAVFile audiofile;
// Check the wav file is monophonic
unsigned short channel_count = *((unsigned short*) &file[channel_count_offset]);
if (channel_count != 1) {
fprintf(stderr, "Only single channel audio files are supported.");
exit(1);
}
// Get the sample rate
audiofile.sample_rate = int_to_AudioSampleRate(*((unsigned int*) &file[sample_rate_offset]));
// Get the sample type
WAV_Format wav_format = (WAV_Format) *((unsigned short*) &file[format_offset]);
if (wav_format != WAV_PCM_FORMAT && wav_format != WAV_IEEE_FORMAT) {
fprintf(stderr, "Audio file format must be signed PCM or float.");
exit(1);
}
unsigned short bit_depth = *((unsigned short*) &file[bit_depth_offset]);
if (bit_depth != 16 && bit_depth != 32) {
fprintf(stderr, "Audio file must have a bit depth of 16 or 32.");
exit(1);
}
audiofile.sample_type = get_AudioSampleType(bit_depth, wav_format);
// Copy the audio data
audiofile.data_size = buffersize - data_offset;
audiofile.audio_data = (char*) malloc(audiofile.data_size);
memcpy(audiofile.audio_data, file, audiofile.data_size);
free(file);
return audiofile;
}
// Error handler
void check_error(SG_COM_Error error) {
if (error != SG_COM_ERROR_OK) {
fprintf(stderr, "Exiting with error code %d", error);
std::exit(error);
}
}
// Simulated network send method
void __send__(char *packet, sg_size packet_size) {
// Receive a data packet into the remote Player
SG_COM_Error err = SG_COM_ReceivePacket(player_handle, packet, packet_size);
check_error(err);
}
// Engine broadcast callback
void engine_broadcast_callback(
SG_COM_EngineHandle engine,
char *packet,
sg_size packet_size,
void *custom_data) {
__send__(packet, packet_size);
}
// Engine status callback
void engine_status_callback(
SG_COM_EngineHandle engine,
SG_COM_Status status,
const char *message,
void *custom_data) {
if (status == SG_COM_STATUS_MODE_CHANGED) {
fprintf(stderr, "Mode changed: %s\n", message);
}
if (status == SG_COM_STATUS_EXPRESSION_CHANGED) {
fprintf(stderr, "Expression changed: %s\n", message);
}
if (status == SG_COM_STATUS_VOICE_ACTIVITY_CHANGED) {
fprintf(stderr, "Voice activity changed: %s\n", message);
}
}
// Logging callback
void sg_com_log_messages(const char *message) {
printf(message);
}
// Get samples per second values
size_t get_audio_samples_per_second(const SG_AudioSampleRate sample_rate) {
switch (sample_rate)
{
case SG_AUDIO_8_KHZ: return 8000;
case SG_AUDIO_12_KHZ: return 12000;
case SG_AUDIO_16_KHZ: return 16000;
case SG_AUDIO_24_KHZ: return 24000;
case SG_AUDIO_32_KHZ: return 32000;
case SG_AUDIO_44_1_KHZ: return 44100;
case SG_AUDIO_48_KHZ: return 48000;
default: return 16000;
}
}
// Get sample type sizes
size_t get_audio_bytes_per_sample(const SG_AudioSampleType sample_type) {
switch (sample_type)
{
case SG_AUDIO_INT_16:
return 2;
case SG_AUDIO_INT_32:
case SG_AUDIO_FLOAT_32:
return 4;
default:
return 0;
}
}
// Write the saved animation output to a file in Speech Graphics .rts file format
void write_output_file(
const char *output_file_path,
const sg_size num_animation_nodes,
SG_AnimationNode *animation_nodes,
float *results_ptr,
float *results,
int num_output_frames,
const sg_size num_aggregated_output_channels) {
FILE *file = fopen(output_file_path, "w");
// Write frame rate line
fprintf(file, "%d\n", output_fps);
// Write channel names
for (size_t i = 0; i < num_animation_nodes; i++) {
for (size_t j = 0; j < animation_nodes[i].num_channels; j++) {
fprintf(file, "%s.%s", animation_nodes[i].name, animation_nodes[i].channel_names[j]);
if (i < num_animation_nodes - 1 || j < animation_nodes[i].num_channels - 1) {
fprintf(file, ",");
}
else {
fprintf(file, "\n");
}
}
}
// Write frame data
results_ptr = results;
for (size_t i = 0; i < num_output_frames; i++) {
for (size_t j = 0; j < num_aggregated_output_channels; j++, results_ptr++) {
fprintf(file, "%f", *results_ptr);
if (j < num_aggregated_output_channels - 1) {
fprintf(file, ",");
}
else {
fprintf(file, "\n");
}
}
}
fclose(file);
}
void create_player(const char *character_file_in_memory, sg_size character_file_bytes) {
SG_COM_Error err;
SG_COM_PlayerConfig player_config;
player_config.character_file_bytes = character_file_bytes;
player_config.character_file_in_memory = (sg_byte*)character_file_in_memory;;
player_config.buffer_sec = output_buffer_sec;
err = SG_COM_CreatePlayer(&player_config, &player_handle);
check_error(err);
}
void create_engine(
const char *character_file_in_memory,
sg_size character_file_bytes,
SG_AudioSampleRate audio_sample_rate,
SG_AudioSampleType audio_sample_type) {
SG_COM_Error err;
SG_COM_EngineConfig engine_config;
engine_config.audio_sample_type = audio_sample_type;
engine_config.audio_sample_rate = audio_sample_rate;
engine_config.buffer_sec = input_buffer_sec;
engine_config.character_file_bytes = character_file_bytes;
engine_config.character_file_in_memory = (sg_byte*)character_file_in_memory;
engine_config.local_player = remote_player ? nullptr : player_handle; // SG_Com will automatically handle passing packets to the local player
engine_config.engine_broadcast_callback =
remote_player ? engine_broadcast_callback
: nullptr;
engine_config.engine_status_callback = engine_status_callback;
engine_config.flag = SG_COM_ENGINE_CONFIG_NONE;
engine_config.custom_engine_data = nullptr;
err = SG_COM_CreateEngine(&engine_config, &engine_handle);
check_error(err);
SG_COM_SetAutoMode(engine_handle, SG_COM_POSITIVE_MODE, "positive");
SG_COM_SetAutoMode(engine_handle, SG_COM_NEGATIVE_MODE, "negative");
SG_COM_SetAutoMode(engine_handle, SG_COM_EFFORT_MODE, "effort");
SG_COM_SetAutoMode(engine_handle, SG_COM_ACKNOWLEDGE_MODE, "acknowledge");
SG_COM_ActivateAutoModes(engine_handle);
}
// Input audio into the Engine
void audio_input(
SG_AudioSampleRate audio_sample_rate,
SG_AudioSampleType audio_sample_type,
const sg_size *audio_file_bytes,
const char *audio_file_in_memory) {
SG_COM_Error err;
// Feed audio into the Engine in chunks of input_chunk_ms length, simulating real-time input from a microphone.
int samples_per_ms = (int)get_audio_samples_per_second(audio_sample_rate) / 1000;
int input_chunk_samples = input_chunk_ms * samples_per_ms;
int input_chunk_bytes = input_chunk_samples * (int)get_audio_bytes_per_sample(audio_sample_type);
int num_input_chunks = *audio_file_bytes / input_chunk_bytes;
uint8_t *input_chunk = (uint8_t*)malloc(input_chunk_bytes);
for (size_t i = 0; i < num_input_chunks; i++) {
auto start_time = std::chrono::high_resolution_clock::now();
memcpy(input_chunk, audio_file_in_memory + (i * input_chunk_bytes), input_chunk_bytes);
err = SG_COM_InputAudio(engine_handle, input_chunk, input_chunk_bytes);
check_error(err);
// ProcessTick is called from here for simplicity
// It is recommended this call be offloaded to a background thread, which is then notified using a mechanism like std::condition_variable.
// This will prevent the processing from blocking the real-time audio thread.
err = SG_COM_ProcessTick(engine_handle, nullptr, nullptr);
check_error(err);
// Sleep thread until the duration of the loop equals input_chunk_ms to simulate real-time audio input
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start_time);
std::this_thread::sleep_for(std::chrono::milliseconds(input_chunk_ms - duration.count()));
}
free(input_chunk);
}
// Playback process
void play_output(const char *output_file_path, float output_sec) {
// Get the animation nodes.
sg_size num_animation_nodes = 0;
sg_size num_aggregated_output_channels = 0;
SG_AnimationNode *animation_nodes = nullptr;
SG_COM_Error err = SG_COM_GetAnimationNodes(player_handle, &animation_nodes, &num_animation_nodes);
// Count the aggregated output channels
for (sg_size i = 0; i < num_animation_nodes; i++) {
num_aggregated_output_channels += animation_nodes[i].num_channels;
}
// Prepare output storage. (Normally you would play the output in real time.
// But in this example we will save it to a file.)
int num_output_frames = (int)roundf(output_sec * output_fps);
float *results = (float*)malloc(sizeof(float) * num_aggregated_output_channels * num_output_frames);
float *results_ptr = results;
//Wait until some animation is in the buffer
double min_time, max_time, current_time;
bool bufferred = false;
while (!bufferred) {
SG_COM_GetPlayableRange(player_handle, &min_time, &max_time);
bufferred = (max_time - min_time > 0);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
// Enter the real-time playback loop
auto delta_time = std::chrono::microseconds(1000000 / output_fps);
auto start_time = std::chrono::high_resolution_clock::now();
auto frame_time = start_time;
for (size_t i = 0; i < num_output_frames; i++) {
std::chrono::duration<float, std::milli> time_elapsed_ms = frame_time - start_time;
SG_COM_Error err = SG_COM_UpdateAnimation(player_handle, time_elapsed_ms.count(), ¤t_time);
if (err != SG_COM_ERROR_OK) {
break;
}
for (size_t j = 0; j < num_animation_nodes; j++) {
// Copy the animation values out of the node
// This is where animation would be applied to the character
memcpy(results_ptr, animation_nodes[j].channel_values, animation_nodes[j].num_channels * sizeof(float));
results_ptr += animation_nodes[j].num_channels;
}
// Wait for delta to elapse
std::this_thread::sleep_for(delta_time);
frame_time = std::chrono::high_resolution_clock::now();
}
write_output_file(
output_file_path,
num_animation_nodes,
animation_nodes,
results_ptr,
results,
num_output_frames,
num_aggregated_output_channels);
// Clean up
free(results);
}
// Run the example
void run(
const char *license_string,
const char *character_file_path,
const char *audio_file_path,
const char *output_file_path) {
SG_COM_Error err = SG_COM_Initialize(SG_LOGLEVEL_ERROR, sg_com_log_messages, license_string, nullptr, nullptr);
check_error(err);
printf("SG_Com Version: %s\n", SG_COM_GetVersionString());
// Load files into memory
char *character_file_in_memory;
sg_size character_file_bytes;
load_file(character_file_path, &character_file_in_memory, &character_file_bytes);
WAVFile audio_file = load_wav_file(audio_file_path);
create_player(character_file_in_memory, character_file_bytes);
create_engine(
character_file_in_memory,
character_file_bytes,
audio_file.sample_rate,
audio_file.sample_type);
// Start the audio thread
std::thread input_thread(audio_input, audio_file.sample_rate, audio_file.sample_type, &audio_file.data_size, audio_file.audio_data);
// "Play" the animation
float audio_sec = (float)audio_file.data_size / (get_audio_samples_per_second(audio_file.sample_rate) * get_audio_bytes_per_sample(audio_file.sample_type));
play_output(output_file_path, audio_sec - 0.1f);
// Clean up
input_thread.join();
free(character_file_in_memory);
free(audio_file.audio_data);
// Make sure asynchronous audio input has been shut down before destroying the Engine.
// In this example audio delivery will complete by itself when it reaches the end of the audio file.
SG_COM_DestroyEngine(engine_handle);
SG_COM_DestroyPlayer(player_handle);
SG_COM_Shutdown();
}
// Main function
int main( int argc, char *argv []) {
if (argc != 5) {
printf("Incorrect number of arguments\n%s", help_text);
exit(0);
}
run(argv[1], argv[2], argv[3], argv[4]);
exit(0);
}