Simple Code Example

CPP
//
// SG_Com_simple_example.cpp
// 2019/07/05
// Simple example use of SG Com in C++ (mostly C).
//
// We construct an Engine and a Player to illustrate generation
// and playback of animation. There is an option to make the Player "local"
// (connected directly to the Engine) or "remote" (with a callback function to "send"
// packets to the Player). This example does not illustrate audio sync, idle mode,
// or control features such as mode switching.
//
// Copyright (c) 2019 Speech Graphics Ltd. All rights reserved.
//

#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS
#endif

#include "../src/SG_Com.h"

#include <chrono>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // memcpy
#include <thread>

const char* help_text =
"Usage: SG_Com_Simple_Example.exe license_string character_file audio_file audio_sample_rate audio_sample_type output_file\n\n"
"license_string     :  License string (directory path for RLM builds|license string for other builds)\n"
"character_file     :  Path to character .k file.\n"
"audio_file         :  Path to input audio file containing speech. Must be a single channel wav file.\n"
"output_file        :  Path to the file to save the output animation in.\n";

// Example mode. Set to true to treat the Player as a "remote" player. Otherwise it will be treated as local.
static bool remote_player = true;

// Global objects
static SG_COM_EngineHandle engine_handle;
static SG_COM_PlayerHandle player_handle;

// Input settings
const static float input_buffer_sec = 1.0;  // maximum duration (seconds) of the input buffer
const static int input_chunk_ms = 10;  // duration of audio to be input on each process tick

// Output settings
const static float output_buffer_sec = 1.0;  // maximum duration (seconds) of the output buffer
const static int output_fps = 30;

struct WAVFile {
    char* audio_data;
    sg_size data_size;
    SG_AudioSampleRate sample_rate;
    SG_AudioSampleType sample_type;
};

enum WAV_Format {
    WAV_PCM_FORMAT = 0x01,
    WAV_IEEE_FORMAT = 0x03
};

// Convert an int to SG_AudioSampleRate
SG_AudioSampleRate int_to_AudioSampleRate(unsigned int sample_frequency) {

    switch (sample_frequency)
    {
    case 8000:    return SG_AUDIO_8_KHZ;
    case 12000:   return SG_AUDIO_12_KHZ;
    case 16000:   return SG_AUDIO_16_KHZ;
    case 24000:   return SG_AUDIO_24_KHZ;
    case 32000:   return SG_AUDIO_32_KHZ;
    case 44100:   return SG_AUDIO_44_1_KHZ;
    case 48000:   return SG_AUDIO_48_KHZ;
    default:
        fprintf(stderr, "Audio file has an unsupported sample rate.");
        exit(1);
    }
}

// Get SG_AudioSampleType based on the bit depth and wav sample format
SG_AudioSampleType get_AudioSampleType(unsigned int bit_depth, unsigned short wav_format) {

    if (bit_depth == 16 && wav_format == WAV_PCM_FORMAT) {
        return SG_AUDIO_INT_16;
    }
    else if (bit_depth == 32 && wav_format == WAV_PCM_FORMAT) {
        return SG_AUDIO_INT_32;
    }
    else if (bit_depth == 32 && wav_format == WAV_IEEE_FORMAT) {
        return SG_AUDIO_FLOAT_32;
    }
    else {
        fprintf(stderr, "Audio file has an unsupported bit depth and sample format combination.");
        exit(1);
    }
}

// Load a file into a byte buffer
void load_file(const char *file_in, char **bytes_out, sg_size *buffersize) {

    FILE *file = fopen(file_in, "rb");
    fseek(file, 0, SEEK_END);
    *buffersize = ftell(file);
    rewind(file);
    *bytes_out = (char*) malloc(*buffersize);
    fread(*bytes_out, 1, *buffersize, file);
    fclose(file);
}


// Load a WAV file detecting the sample rate and sample type
WAVFile load_wav_file(const char* file_in) {

    // Byte offsets into the WAV file header
    const int format_offset = 0x14;
    const int channel_count_offset = 0x16;
    const int sample_rate_offset = 0x18;
    const int bit_depth_offset = 0x22;
    const int data_offset = 0x2C;

    char* file = NULL;
    sg_size buffersize = 0;
    load_file(file_in, &file, &buffersize);

    WAVFile audiofile;

    // Check the wav file is monophonic
    unsigned short channel_count = *((unsigned short*) &file[channel_count_offset]);
    if (channel_count != 1) {
        fprintf(stderr, "Only single channel audio files are supported.");
        exit(1);
    }

    // Get the sample rate
    audiofile.sample_rate = int_to_AudioSampleRate(*((unsigned int*) &file[sample_rate_offset]));

    // Get the sample type
    WAV_Format wav_format = (WAV_Format) *((unsigned short*) &file[format_offset]);
    if (wav_format != WAV_PCM_FORMAT && wav_format != WAV_IEEE_FORMAT) {
        fprintf(stderr, "Audio file format must be signed PCM or float.");
        exit(1);
    }

    unsigned short bit_depth = *((unsigned short*) &file[bit_depth_offset]);
    if (bit_depth != 16 && bit_depth != 32) {
        fprintf(stderr, "Audio file must have a bit depth of 16 or 32.");
        exit(1);
    }

    audiofile.sample_type = get_AudioSampleType(bit_depth, wav_format);

    // Copy the audio data
    audiofile.data_size = buffersize - data_offset;
    audiofile.audio_data = (char*) malloc(audiofile.data_size);
    memcpy(audiofile.audio_data, file, audiofile.data_size);

    free(file);

    return audiofile;
}

// Error handler
void check_error(SG_COM_Error error) {

    if (error != SG_COM_ERROR_OK) {
        fprintf(stderr, "Exiting with error code %d", error);
        std::exit(error);
    }
}

// Simulated network send method
void __send__(char *packet, sg_size packet_size) {

    // Receive a data packet into the remote Player
    SG_COM_Error err = SG_COM_ReceivePacket(player_handle, packet, packet_size);

    check_error(err);
}

// Engine broadcast callback
void engine_broadcast_callback(
    SG_COM_EngineHandle engine,
    char *packet,
    sg_size packet_size,
    void *custom_data) {

    __send__(packet, packet_size);
}

// Engine status callback
void engine_status_callback(
    SG_COM_EngineHandle engine,
    SG_COM_Status status,
    const char *message,
    void *custom_data) {

    if (status == SG_COM_STATUS_MODE_CHANGED) {
        fprintf(stderr, "Mode changed: %s\n", message);
    }
    if (status == SG_COM_STATUS_EXPRESSION_CHANGED) {
        fprintf(stderr, "Expression changed: %s\n", message);
    }
    if (status == SG_COM_STATUS_VOICE_ACTIVITY_CHANGED) {
        fprintf(stderr, "Voice activity changed: %s\n", message);
    }
}

// Logging callback
void sg_com_log_messages(const char *message) {

    printf(message);
}

// Get samples per second values
size_t get_audio_samples_per_second(const SG_AudioSampleRate sample_rate) {

    switch (sample_rate)
    {
    case SG_AUDIO_8_KHZ:      return 8000;
    case SG_AUDIO_12_KHZ:     return 12000;
    case SG_AUDIO_16_KHZ:     return 16000;
    case SG_AUDIO_24_KHZ:     return 24000;
    case SG_AUDIO_32_KHZ:     return 32000;
    case SG_AUDIO_44_1_KHZ:   return 44100;
    case SG_AUDIO_48_KHZ:     return 48000;
    default:                  return 16000;
    }
}

// Get sample type sizes
size_t get_audio_bytes_per_sample(const SG_AudioSampleType sample_type) {

    switch (sample_type)
    {
    case SG_AUDIO_INT_16:
        return 2;
    case SG_AUDIO_INT_32:
    case SG_AUDIO_FLOAT_32:
        return 4;
    default:
        return 0;
    }
}

// Write the saved animation output to a file in Speech Graphics .rts file format
void write_output_file(
    const char *output_file_path,
    const sg_size num_animation_nodes,
    SG_AnimationNode *animation_nodes,
    float *results_ptr,
    float *results,
    int num_output_frames,
    const sg_size num_aggregated_output_channels) {

    FILE *file = fopen(output_file_path, "w");

    // Write frame rate line
    fprintf(file, "%d\n", output_fps);

    // Write channel names
    for (size_t i = 0; i < num_animation_nodes; i++) {
        for (size_t j = 0; j < animation_nodes[i].num_channels; j++) {
            fprintf(file, "%s.%s", animation_nodes[i].name, animation_nodes[i].channel_names[j]);
            if (i < num_animation_nodes - 1 || j < animation_nodes[i].num_channels - 1) {
                fprintf(file, ",");
            }
            else {
                fprintf(file, "\n");
            }
        }
    }

    // Write frame data
    results_ptr = results;
    for (size_t i = 0; i < num_output_frames; i++) {
        for (size_t j = 0; j < num_aggregated_output_channels; j++, results_ptr++) {
            fprintf(file, "%f", *results_ptr);
            if (j < num_aggregated_output_channels - 1) {
                fprintf(file, ",");
            }
            else {
                fprintf(file, "\n");
            }
        }
    }
    fclose(file);
}

void create_player(const char *character_file_in_memory, sg_size character_file_bytes) {

    SG_COM_Error err;
    SG_COM_PlayerConfig player_config;
    player_config.character_file_bytes = character_file_bytes;
    player_config.character_file_in_memory = (sg_byte*)character_file_in_memory;;
    player_config.buffer_sec = output_buffer_sec;

    err = SG_COM_CreatePlayer(&player_config, &player_handle);
    check_error(err);
}

void create_engine(
    const char *character_file_in_memory,
    sg_size character_file_bytes,
    SG_AudioSampleRate audio_sample_rate,
    SG_AudioSampleType audio_sample_type) {

    SG_COM_Error err;
    SG_COM_EngineConfig engine_config;
    engine_config.audio_sample_type = audio_sample_type;
    engine_config.audio_sample_rate = audio_sample_rate;
    engine_config.buffer_sec = input_buffer_sec;
    engine_config.character_file_bytes = character_file_bytes;
    engine_config.character_file_in_memory = (sg_byte*)character_file_in_memory;
    engine_config.local_player = remote_player ? nullptr : player_handle; // SG_Com will automatically handle passing packets to the local player
    engine_config.engine_broadcast_callback =
        remote_player ? engine_broadcast_callback
        : nullptr; 
    engine_config.engine_status_callback = engine_status_callback;
    engine_config.flag = SG_COM_ENGINE_CONFIG_NONE;
    engine_config.custom_engine_data = nullptr;

    err = SG_COM_CreateEngine(&engine_config, &engine_handle);
    check_error(err);
    SG_COM_SetAutoMode(engine_handle, SG_COM_POSITIVE_MODE, "positive");
    SG_COM_SetAutoMode(engine_handle, SG_COM_NEGATIVE_MODE, "negative");
    SG_COM_SetAutoMode(engine_handle, SG_COM_EFFORT_MODE, "effort");
    SG_COM_SetAutoMode(engine_handle, SG_COM_ACKNOWLEDGE_MODE, "acknowledge");
    SG_COM_ActivateAutoModes(engine_handle);
}

// Input audio into the Engine
void audio_input(
    SG_AudioSampleRate audio_sample_rate,
    SG_AudioSampleType audio_sample_type,
    const sg_size *audio_file_bytes,
    const char *audio_file_in_memory) {

    SG_COM_Error err;
    // Feed audio into the Engine in chunks of input_chunk_ms length, simulating real-time input from a microphone.
    int samples_per_ms = (int)get_audio_samples_per_second(audio_sample_rate) / 1000;
    int input_chunk_samples = input_chunk_ms * samples_per_ms;
    int input_chunk_bytes = input_chunk_samples * (int)get_audio_bytes_per_sample(audio_sample_type);
    int num_input_chunks = *audio_file_bytes / input_chunk_bytes;
    uint8_t *input_chunk = (uint8_t*)malloc(input_chunk_bytes);

    for (size_t i = 0; i < num_input_chunks; i++) {
        auto start_time = std::chrono::high_resolution_clock::now();
        memcpy(input_chunk, audio_file_in_memory + (i * input_chunk_bytes), input_chunk_bytes);
        err = SG_COM_InputAudio(engine_handle, input_chunk, input_chunk_bytes);
        check_error(err);

        // ProcessTick is called from here for simplicity
        // It is recommended this call be offloaded to a background thread, which is then notified using a mechanism like std::condition_variable.
        // This will prevent the processing from blocking the real-time audio thread.
        err = SG_COM_ProcessTick(engine_handle, nullptr, nullptr);
        check_error(err);

        // Sleep thread until the duration of the loop equals input_chunk_ms to simulate real-time audio input
        auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start_time);
        std::this_thread::sleep_for(std::chrono::milliseconds(input_chunk_ms - duration.count()));
    }
    free(input_chunk);
}

// Playback process
void play_output(const char *output_file_path, float output_sec) {

    // Get the animation nodes.
    sg_size num_animation_nodes = 0;
    sg_size num_aggregated_output_channels = 0;
    SG_AnimationNode *animation_nodes = nullptr;

    SG_COM_Error err = SG_COM_GetAnimationNodes(player_handle, &animation_nodes, &num_animation_nodes);

    // Count the aggregated output channels
    for (sg_size i = 0; i < num_animation_nodes; i++) {
        num_aggregated_output_channels += animation_nodes[i].num_channels;
    }

    // Prepare output storage. (Normally you would play the output in real time.
    // But in this example we will save it to a file.)
    int num_output_frames = (int)roundf(output_sec * output_fps);
    float *results = (float*)malloc(sizeof(float) * num_aggregated_output_channels * num_output_frames);
    float *results_ptr = results;

    //Wait until some animation is in the buffer
    double min_time, max_time, current_time;
    bool bufferred = false;
    while (!bufferred) {
        SG_COM_GetPlayableRange(player_handle, &min_time, &max_time);
        bufferred = (max_time - min_time > 0);
        std::this_thread::sleep_for(std::chrono::milliseconds(10));
    }

    // Enter the real-time playback loop
    auto delta_time = std::chrono::microseconds(1000000 / output_fps);
    auto start_time = std::chrono::high_resolution_clock::now();
    auto frame_time = start_time;
    for (size_t i = 0; i < num_output_frames; i++) {
        std::chrono::duration<float, std::milli> time_elapsed_ms = frame_time - start_time;
        SG_COM_Error err = SG_COM_UpdateAnimation(player_handle, time_elapsed_ms.count(), &current_time);
        if (err != SG_COM_ERROR_OK) {
            break;
        }
        for (size_t j = 0; j < num_animation_nodes; j++) {
            // Copy the animation values out of the node
            // This is where animation would be applied to the character
            memcpy(results_ptr, animation_nodes[j].channel_values, animation_nodes[j].num_channels * sizeof(float));
            results_ptr += animation_nodes[j].num_channels;
        }

        // Wait for delta to elapse
        std::this_thread::sleep_for(delta_time);
        frame_time = std::chrono::high_resolution_clock::now();
    }

    write_output_file(
        output_file_path,
        num_animation_nodes,
        animation_nodes,
        results_ptr,
        results,
        num_output_frames,
        num_aggregated_output_channels);

    // Clean up
    free(results);
}

// Run the example
void run(
    const char *license_string,
    const char *character_file_path,
    const char *audio_file_path,
    const char *output_file_path) {

    SG_COM_Error err = SG_COM_Initialize(SG_LOGLEVEL_ERROR, sg_com_log_messages, license_string, nullptr, nullptr);
    check_error(err);

    printf("SG_Com Version: %s\n", SG_COM_GetVersionString());

    // Load files into memory
    char *character_file_in_memory;
    sg_size character_file_bytes;
    load_file(character_file_path, &character_file_in_memory, &character_file_bytes);

    WAVFile audio_file = load_wav_file(audio_file_path);

    create_player(character_file_in_memory, character_file_bytes);

    create_engine(
        character_file_in_memory,
        character_file_bytes,
        audio_file.sample_rate,
        audio_file.sample_type);

    // Start the audio thread
    std::thread input_thread(audio_input, audio_file.sample_rate, audio_file.sample_type, &audio_file.data_size, audio_file.audio_data);

    // "Play" the animation
    float audio_sec = (float)audio_file.data_size / (get_audio_samples_per_second(audio_file.sample_rate) * get_audio_bytes_per_sample(audio_file.sample_type));
    play_output(output_file_path, audio_sec - 0.1f);

    // Clean up
    input_thread.join();
    free(character_file_in_memory);
    free(audio_file.audio_data);

    // Make sure asynchronous audio input has been shut down before destroying the Engine.
    // In this example audio delivery will complete by itself when it reaches the end of the audio file.
    SG_COM_DestroyEngine(engine_handle);
    SG_COM_DestroyPlayer(player_handle);
    SG_COM_Shutdown();
}

// Main function
int main( int argc, char *argv []) {

    if (argc != 5) {
        printf("Incorrect number of arguments\n%s", help_text);
        exit(0);
    }

    run(argv[1], argv[2], argv[3], argv[4]);
    exit(0);
}