// // SG_Com_simple_example.cpp // 2019/07/05 // Simple example use of SG Com in C++ (mostly C). // // We construct an Engine and a Player to illustrate generation // and playback of animation. There is an option to make the Player "local" // (connected directly to the Engine) or "remote" (with a callback function to "send" // packets to the Player). This example does not illustrate audio sync, idle mode, // or control features such as mode switching. // // Copyright (c) 2019 Speech Graphics Ltd. All rights reserved. // #ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS #endif #include "../src/SG_Com.h" #include #include #include #include #include // memcpy #include const char* help_text = "Usage: SG_Com_Simple_Example.exe license_string character_file audio_file audio_sample_rate audio_sample_type output_file\n\n" "license_string : License string (directory path for RLM builds|license string for other builds)\n" "character_file : Path to character .k file.\n" "audio_file : Path to input audio file containing speech. Must be a single channel wav file.\n" "output_file : Path to the file to save the output animation in.\n"; // Example mode. Set to true to treat the Player as a "remote" player. Otherwise it will be treated as local. static bool remote_player = true; // Global objects static SG_COM_EngineHandle engine_handle; static SG_COM_PlayerHandle player_handle; // Input settings const static float input_buffer_sec = 1.0; // maximum duration (seconds) of the input buffer const static int input_chunk_ms = 10; // duration of audio to be input on each process tick // Output settings const static float output_buffer_sec = 1.0; // maximum duration (seconds) of the output buffer const static int output_fps = 30; struct WAVFile { char* audio_data; sg_size data_size; SG_AudioSampleRate sample_rate; SG_AudioSampleType sample_type; }; enum WAV_Format { WAV_PCM_FORMAT = 0x01, WAV_IEEE_FORMAT = 0x03 }; // Convert an int to SG_AudioSampleRate SG_AudioSampleRate int_to_AudioSampleRate(unsigned int sample_frequency) { switch (sample_frequency) { case 8000: return SG_AUDIO_8_KHZ; case 12000: return SG_AUDIO_12_KHZ; case 16000: return SG_AUDIO_16_KHZ; case 24000: return SG_AUDIO_24_KHZ; case 32000: return SG_AUDIO_32_KHZ; case 44100: return SG_AUDIO_44_1_KHZ; case 48000: return SG_AUDIO_48_KHZ; default: fprintf(stderr, "Audio file has an unsupported sample rate."); exit(1); } } // Get SG_AudioSampleType based on the bit depth and wav sample format SG_AudioSampleType get_AudioSampleType(unsigned int bit_depth, unsigned short wav_format) { if (bit_depth == 16 && wav_format == WAV_PCM_FORMAT) { return SG_AUDIO_INT_16; } else if (bit_depth == 32 && wav_format == WAV_PCM_FORMAT) { return SG_AUDIO_INT_32; } else if (bit_depth == 32 && wav_format == WAV_IEEE_FORMAT) { return SG_AUDIO_FLOAT_32; } else { fprintf(stderr, "Audio file has an unsupported bit depth and sample format combination."); exit(1); } } // Load a file into a byte buffer void load_file(const char *file_in, char **bytes_out, sg_size *buffersize) { FILE *file = fopen(file_in, "rb"); fseek(file, 0, SEEK_END); *buffersize = ftell(file); rewind(file); *bytes_out = (char*) malloc(*buffersize); fread(*bytes_out, 1, *buffersize, file); fclose(file); } // Load a WAV file detecting the sample rate and sample type WAVFile load_wav_file(const char* file_in) { // Byte offsets into the WAV file header const int format_offset = 0x14; const int channel_count_offset = 0x16; const int sample_rate_offset = 0x18; const int bit_depth_offset = 0x22; const int data_offset = 0x2C; char* file = NULL; sg_size buffersize = 0; load_file(file_in, &file, &buffersize); WAVFile audiofile; // Check the wav file is monophonic unsigned short channel_count = *((unsigned short*) &file[channel_count_offset]); if (channel_count != 1) { fprintf(stderr, "Only single channel audio files are supported."); exit(1); } // Get the sample rate audiofile.sample_rate = int_to_AudioSampleRate(*((unsigned int*) &file[sample_rate_offset])); // Get the sample type WAV_Format wav_format = (WAV_Format) *((unsigned short*) &file[format_offset]); if (wav_format != WAV_PCM_FORMAT && wav_format != WAV_IEEE_FORMAT) { fprintf(stderr, "Audio file format must be signed PCM or float."); exit(1); } unsigned short bit_depth = *((unsigned short*) &file[bit_depth_offset]); if (bit_depth != 16 && bit_depth != 32) { fprintf(stderr, "Audio file must have a bit depth of 16 or 32."); exit(1); } audiofile.sample_type = get_AudioSampleType(bit_depth, wav_format); // Copy the audio data audiofile.data_size = buffersize - data_offset; audiofile.audio_data = (char*) malloc(audiofile.data_size); memcpy(audiofile.audio_data, file, audiofile.data_size); free(file); return audiofile; } // Error handler void check_error(SG_COM_Error error) { if (error != SG_COM_ERROR_OK) { fprintf(stderr, "Exiting with error code %d", error); std::exit(error); } } // Simulated network send method void __send__(char *packet, sg_size packet_size) { // Receive a data packet into the remote Player SG_COM_Error err = SG_COM_ReceivePacket(player_handle, packet, packet_size); check_error(err); } // Engine broadcast callback void engine_broadcast_callback( SG_COM_EngineHandle engine, char *packet, sg_size packet_size, void *custom_data) { __send__(packet, packet_size); } // Engine status callback void engine_status_callback( SG_COM_EngineHandle engine, SG_COM_Status status, const char *message, void *custom_data) { if (status == SG_COM_STATUS_MODE_CHANGED) { fprintf(stderr, "Mode changed: %s\n", message); } if (status == SG_COM_STATUS_EXPRESSION_CHANGED) { fprintf(stderr, "Expression changed: %s\n", message); } if (status == SG_COM_STATUS_VOICE_ACTIVITY_CHANGED) { fprintf(stderr, "Voice activity changed: %s\n", message); } } // Logging callback void sg_com_log_messages(const char *message) { printf(message); } // Get samples per second values size_t get_audio_samples_per_second(const SG_AudioSampleRate sample_rate) { switch (sample_rate) { case SG_AUDIO_8_KHZ: return 8000; case SG_AUDIO_12_KHZ: return 12000; case SG_AUDIO_16_KHZ: return 16000; case SG_AUDIO_24_KHZ: return 24000; case SG_AUDIO_32_KHZ: return 32000; case SG_AUDIO_44_1_KHZ: return 44100; case SG_AUDIO_48_KHZ: return 48000; default: return 16000; } } // Get sample type sizes size_t get_audio_bytes_per_sample(const SG_AudioSampleType sample_type) { switch (sample_type) { case SG_AUDIO_INT_16: return 2; case SG_AUDIO_INT_32: case SG_AUDIO_FLOAT_32: return 4; default: return 0; } } // Write the saved animation output to a file in Speech Graphics .rts file format void write_output_file( const char *output_file_path, const sg_size num_animation_nodes, SG_AnimationNode *animation_nodes, float *results_ptr, float *results, int num_output_frames, const sg_size num_aggregated_output_channels) { FILE *file = fopen(output_file_path, "w"); // Write frame rate line fprintf(file, "%d\n", output_fps); // Write channel names for (size_t i = 0; i < num_animation_nodes; i++) { for (size_t j = 0; j < animation_nodes[i].num_channels; j++) { fprintf(file, "%s.%s", animation_nodes[i].name, animation_nodes[i].channel_names[j]); if (i < num_animation_nodes - 1 || j < animation_nodes[i].num_channels - 1) { fprintf(file, ","); } else { fprintf(file, "\n"); } } } // Write frame data results_ptr = results; for (size_t i = 0; i < num_output_frames; i++) { for (size_t j = 0; j < num_aggregated_output_channels; j++, results_ptr++) { fprintf(file, "%f", *results_ptr); if (j < num_aggregated_output_channels - 1) { fprintf(file, ","); } else { fprintf(file, "\n"); } } } fclose(file); } void create_player(const char *character_file_in_memory, sg_size character_file_bytes) { SG_COM_Error err; SG_COM_PlayerConfig player_config; player_config.character_file_bytes = character_file_bytes; player_config.character_file_in_memory = (sg_byte*)character_file_in_memory;; player_config.buffer_sec = output_buffer_sec; err = SG_COM_CreatePlayer(&player_config, &player_handle); check_error(err); } void create_engine( const char *character_file_in_memory, sg_size character_file_bytes, SG_AudioSampleRate audio_sample_rate, SG_AudioSampleType audio_sample_type) { SG_COM_Error err; SG_COM_EngineConfig engine_config; engine_config.audio_sample_type = audio_sample_type; engine_config.audio_sample_rate = audio_sample_rate; engine_config.buffer_sec = input_buffer_sec; engine_config.character_file_bytes = character_file_bytes; engine_config.character_file_in_memory = (sg_byte*)character_file_in_memory; engine_config.local_player = remote_player ? nullptr : player_handle; // SG_Com will automatically handle passing packets to the local player engine_config.engine_broadcast_callback = remote_player ? engine_broadcast_callback : nullptr; engine_config.engine_status_callback = engine_status_callback; engine_config.flag = SG_COM_ENGINE_CONFIG_NONE; engine_config.custom_engine_data = nullptr; err = SG_COM_CreateEngine(&engine_config, &engine_handle); check_error(err); SG_COM_SetAutoMode(engine_handle, SG_COM_POSITIVE_MODE, "positive"); SG_COM_SetAutoMode(engine_handle, SG_COM_NEGATIVE_MODE, "negative"); SG_COM_SetAutoMode(engine_handle, SG_COM_EFFORT_MODE, "effort"); SG_COM_SetAutoMode(engine_handle, SG_COM_ACKNOWLEDGE_MODE, "acknowledge"); SG_COM_ActivateAutoModes(engine_handle); } // Input audio into the Engine void audio_input( SG_AudioSampleRate audio_sample_rate, SG_AudioSampleType audio_sample_type, const sg_size *audio_file_bytes, const char *audio_file_in_memory) { SG_COM_Error err; // Feed audio into the Engine in chunks of input_chunk_ms length, simulating real-time input from a microphone. int samples_per_ms = (int)get_audio_samples_per_second(audio_sample_rate) / 1000; int input_chunk_samples = input_chunk_ms * samples_per_ms; int input_chunk_bytes = input_chunk_samples * (int)get_audio_bytes_per_sample(audio_sample_type); int num_input_chunks = *audio_file_bytes / input_chunk_bytes; uint8_t *input_chunk = (uint8_t*)malloc(input_chunk_bytes); for (size_t i = 0; i < num_input_chunks; i++) { auto start_time = std::chrono::high_resolution_clock::now(); memcpy(input_chunk, audio_file_in_memory + (i * input_chunk_bytes), input_chunk_bytes); err = SG_COM_InputAudio(engine_handle, input_chunk, input_chunk_bytes); check_error(err); // ProcessTick is called from here for simplicity // It is recommended this call be offloaded to a background thread, which is then notified using a mechanism like std::condition_variable. // This will prevent the processing from blocking the real-time audio thread. err = SG_COM_ProcessTick(engine_handle, nullptr, nullptr); check_error(err); // Sleep thread until the duration of the loop equals input_chunk_ms to simulate real-time audio input auto duration = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::this_thread::sleep_for(std::chrono::milliseconds(input_chunk_ms - duration.count())); } free(input_chunk); } // Playback process void play_output(const char *output_file_path, float output_sec) { // Get the animation nodes. sg_size num_animation_nodes = 0; sg_size num_aggregated_output_channels = 0; SG_AnimationNode *animation_nodes = nullptr; SG_COM_Error err = SG_COM_GetAnimationNodes(player_handle, &animation_nodes, &num_animation_nodes); // Count the aggregated output channels for (sg_size i = 0; i < num_animation_nodes; i++) { num_aggregated_output_channels += animation_nodes[i].num_channels; } // Prepare output storage. (Normally you would play the output in real time. // But in this example we will save it to a file.) int num_output_frames = (int)roundf(output_sec * output_fps); float *results = (float*)malloc(sizeof(float) * num_aggregated_output_channels * num_output_frames); float *results_ptr = results; //Wait until some animation is in the buffer double min_time, max_time, current_time; bool bufferred = false; while (!bufferred) { SG_COM_GetPlayableRange(player_handle, &min_time, &max_time); bufferred = (max_time - min_time > 0); std::this_thread::sleep_for(std::chrono::milliseconds(10)); } // Enter the real-time playback loop auto delta_time = std::chrono::microseconds(1000000 / output_fps); auto start_time = std::chrono::high_resolution_clock::now(); auto frame_time = start_time; for (size_t i = 0; i < num_output_frames; i++) { std::chrono::duration time_elapsed_ms = frame_time - start_time; SG_COM_Error err = SG_COM_UpdateAnimation(player_handle, time_elapsed_ms.count(), ¤t_time); if (err != SG_COM_ERROR_OK) { break; } for (size_t j = 0; j < num_animation_nodes; j++) { // Copy the animation values out of the node // This is where animation would be applied to the character memcpy(results_ptr, animation_nodes[j].channel_values, animation_nodes[j].num_channels * sizeof(float)); results_ptr += animation_nodes[j].num_channels; } // Wait for delta to elapse std::this_thread::sleep_for(delta_time); frame_time = std::chrono::high_resolution_clock::now(); } write_output_file( output_file_path, num_animation_nodes, animation_nodes, results_ptr, results, num_output_frames, num_aggregated_output_channels); // Clean up free(results); } // Run the example void run( const char *license_string, const char *character_file_path, const char *audio_file_path, const char *output_file_path) { SG_COM_Error err = SG_COM_Initialize(SG_LOGLEVEL_ERROR, sg_com_log_messages, license_string, nullptr, nullptr); check_error(err); printf("SG_Com Version: %s\n", SG_COM_GetVersionString()); // Load files into memory char *character_file_in_memory; sg_size character_file_bytes; load_file(character_file_path, &character_file_in_memory, &character_file_bytes); WAVFile audio_file = load_wav_file(audio_file_path); create_player(character_file_in_memory, character_file_bytes); create_engine( character_file_in_memory, character_file_bytes, audio_file.sample_rate, audio_file.sample_type); // Start the audio thread std::thread input_thread(audio_input, audio_file.sample_rate, audio_file.sample_type, &audio_file.data_size, audio_file.audio_data); // "Play" the animation float audio_sec = (float)audio_file.data_size / (get_audio_samples_per_second(audio_file.sample_rate) * get_audio_bytes_per_sample(audio_file.sample_type)); play_output(output_file_path, audio_sec - 0.1f); // Clean up input_thread.join(); free(character_file_in_memory); free(audio_file.audio_data); // Make sure asynchronous audio input has been shut down before destroying the Engine. // In this example audio delivery will complete by itself when it reaches the end of the audio file. SG_COM_DestroyEngine(engine_handle); SG_COM_DestroyPlayer(player_handle); SG_COM_Shutdown(); } // Main function int main( int argc, char *argv []) { if (argc != 5) { printf("Incorrect number of arguments\n%s", help_text); exit(0); } run(argv[1], argv[2], argv[3], argv[4]); exit(0); }