@alicloud-panxi/dhuman-sdk

v1.0.22

Published

9 months ago

## 1. 安装

0High
0Medium
0Low

磐曦 2D、3D 数字人 Runtime SDK 使用文档

1. 安装

安装前置依赖

npm i @alifd/next @b-design/fusion moment react react-dom --save

html 模板中引入组件库 css 依赖

<link href="//gw.alipayobjects.com/os/lib/alifd/next/1.25.45/dist/next-noreset.var.css" rel="stylesheet" />
<link href="//gw.alipayobjects.com/os/lib/b-design/fusion/3.0.3/dist/index.css" rel="stylesheet" />

请使用 npm 包管理工具安装 SDK。

npm install @alicloud-panxi/dhuman-sdk --save

2. 快速接入

1. 引入 SDK

在 JavaScript 或 TypeScript 文件中使用 SDK：

import AIGCPreviewRuntimeSDK from '@alicloud-panxi/dhuman-sdk';

// 创建容器 DOM 元素
const container = document.getElementById('sdk-container') as HTMLElement;

// 配置 SDK 参数
const sdkOptions: SDKOptions = {
  // 服务端对接后，服务端会透出一个 api 接口，这个接口地址就是 apiPath
  apiPath: 'https://xxx.com/path/to/server',
  // 要渲染的容器dom节点
  container: container,
  // 项目 id
  projectId: 'your-project-id',
  // 场景类型
  type: '2d',
};

// 初始化并渲染 SDK 实例
const sdkInstance = new AIGCPreviewRuntimeSDK(sdkOptions);

// 当不再需要时，可以调用 destroy 方法来销毁 SDK
sdkInstance.destroy();

除了上述必要参数，SDK 支持更灵活的配置，全量示例如下：

import AIGCPreviewRuntimeSDK from '@alicloud-panxi/dhuman-sdk';

// 创建容器 DOM 元素
const container = document.getElementById('sdk-container') as HTMLElement;

// 配置 SDK 参数
const sdkOptions: SDKOptions = {
  // 服务端对接后，服务端会透出一个 api 接口，这个接口地址就是 apiPath
  apiPath: 'https://xxx.com/path/to/server',
  // 要渲染的容器dom节点
  container: container,
  // 项目 id
  projectId: 'your-project-id',
  // 场景类型
  type: '2d',
  // 自定义参数 - 按需
  options: {
    topPane: {
      isHideTitle: false,
      isHideCloseButton: false,
      isHideCountDown: true,
      ComponentView: {
        CustomInteractSwitch: MyCustomInteractSwitch,
      },
    },
    chatPane: {
      useTextOnly: true,
      useAudioOnly: false,
      ComponentView: {
        CustomChatInput: MyCustomChatInput,
        CustomAudioInput: MyCustomAudioInput,
        CustomMessageList: MyCustomMessageList,
      },
    },
    hooks: {
      chat: {
        useCustomModelWithCompleteInput: async (question) => {
          console.log('Processing question:', question);
          // 在自定义服务中处理问题
          const responseText = await externalService(question);

          return {
            TotalResponse: {
              content: responseText,
              messageId: 'unique-id',
              finish: true,
            },
            processAudioStream: (callback) => {
              // 如果需要处理音频数据块，可以在这里实现逻辑。
              const audioChunks = mockAudioChunks(responseText);
              audioChunks.forEach((chunk, index) => {
                const isEnd = index === audioChunks.length - 1;
                callback(chunk, isEnd);
              });
            },
          };
        },
      },
      audio: {
        useCustomModelWithCompleteInput: async (question) => {
          console.log('Processing audio question:', question);
          const responseText = await externalService(question);

          return {
            TotalResponse: {
              content: responseText,
              messageId: 'unique-id',
              finish: true,
            },
            processAudioStream: (callback) => {
              const audioChunks = createAudioChunks(responseText);
              audioChunks.forEach((chunk, index) => {
                const isEnd = index === audioChunks.length - 1;
                callback(chunk, isEnd);
              });
            },
          };
        },
        useCustomModelWithStreamASRInput: async (asrResult) => {
          console.log('ASR result:', asrResult);
          const processedText = await externalService(asrResult);

          const messageId = 'streaming-text-response-id';
          return {
            processTextStream: (callback) => {
              const textChunks = [
                { content: '这是第一部分', messageId, finish: false },
                { content: '继续第二部分', messageId, finish: false },
                { content: '最后的结尾', messageId, finish: true },
              ];
              textChunks.forEach((chunk, index) => {
                const isEnd = chunk.finish;
                callback(chunk, isEnd);
              });
            },
          };
        },
      },
      asr: {
        useAudioData: async ({ audio, taskStatus, taskId }) => {
          if (taskStatus === 'taskStart') {
            console.log('Task started');
            return {
              processASRStream: (callback) => {
                callback(
                  {
                    header: { name: 'TranscriptionStarted', task_id: taskId.current },
                    payload: {},
                  },
                  false
                );
              },
            };
          }
          if (taskStatus === 'taskEnd') {
            console.log('Task ended');
            return {
              processASRStream: (callback) => {
                callback(
                  {
                    header: { name: 'TranscriptionCompleted', task_id: taskId.current },
                    payload: {},
                  },
                  true
                );
              },
            };
          }

          // 默认返回
          return {
            processASRStream: (callback) => {
              callback(
                {
                  header: { name: 'TranscriptionResultChanged', task_id: taskId.current },
                  payload: { result: '动态的识别结果' },
                },
                false
              );
            },
          };
        },
      },
    },
  },
  onJoinChannel: ({ success }) => {
    console.log('Join channel success:', success);
  },
  onError: (error) => {
    console.error('Error occurred:', error);
  },
  onStart: () => {
    console.log('SDK started');
  },
  onEnd: () => {
    console.log('SDK ended');
  },
};

// 初始化并渲染 SDK 实例
const sdkInstance = new AIGCPreviewRuntimeSDK(sdkOptions);

// 当不再需要时，可以调用 destroy 方法来销毁 SDK
sdkInstance.destroy();

2. 参数说明

| 参数名 | 描述 | 类型 | 是否必填 | 默认值 | | -------------- | -------------------------------------------------------------------------------------------- | ------------------- | -------- | ------ | | apiPath | 后台服务端 API 接口地址 | string | 是 | - | | apiPathOptions | apiPath 接口请求的配置（如 timeout，headers，method，withCredentials, customRequestData 等） | Record<string, any> | 否 | - | | container | 要渲染的容器 DOM 节点 | HTMLElement | 是 | - | | projectId | 项目标识 ID | string | 是 | - | | type | 渲染的类型，可选值有 '2d', 'hyperrealism', 等 | string | 否 | '2d' | | options | UI 自定义配置和处理钩子，详见下文 | object | 是 | - | | onJoinChannel | 加入频道成功后的回调函数，返回一个对象包含 success 状态 | function | 否 | - | | onError | 出错时的回调函数 | function | 否 | - | | onStart | SDK 启动成功后的回调函数 | function | 否 | - | | onRender | （仅 3D）SDK 渲染完成后的回调函数 | function | 否 | - | | onEnd | SDK 结束时的回调函数 | function | 否 | - |

3. 方法说明

| 参数名 | 描述 | 类型 | 是否必填 | 默认值 | | ----------------- | ---------------------------------------------------------------------------------------- | -------- | -------- | ------ | | destroy | 销毁当前实例 | function | 否 | - | | speak | （仅 3D）主动播报一段语音，speak(text, { onStart,onEnd }) | function | 否 | - | | stopSpeak | （仅 3D）停止播报当前语音 | function | 否 | - | | createSpeakStream | （仅 3D）播放文本流 x=createSpeakStream({ onStart,onEnd });x.next('text');x.last('text') | function | 否 | - |

4. Options 配置

options 对象允许自定义 SDK 的 UI 组件和功能钩子，支持以下自定义配置：

TopPane: 定制顶部面板的外观和功能。

| 参数名 | 描述 | 类型 | 是否必填 | 默认值 | | ----------------- | ------------------------------------------------------------------- | ------------------- | -------- | ------ | | isHideTitle | 是否隐藏标题 | boolean | 否 | false | | isHideCloseButton | 是否隐藏关闭按钮 | boolean | 否 | false | | isHideCountDown | 是否隐藏倒计时 | boolean | 否 | false | | ComponentView | 自定义顶部面板的组件，比如 CustomInteractSwitch（交互切换按钮）。 | react/vue component | 否 | - |

ChatPane: 定制聊天面板的外观和功能。
- useSettingsButton: 自定义侧边按钮，默认全部展示。按钮类型包括text（文本输入）, longPress（长按收音）, call（通话模式）, continuous（沉浸式），使用方式如['text', 'longPress']只展示文本输入和长按收音按钮。
- ComponentView: 自定义聊天组件，例如 CustomChatInput（聊天输入框）、CustomAudioInput（音频输入框）、CustomMessageList（消息列表）。
Hooks: 用于自定义交互逻辑。

chat

- **`useCustomModelWithCompleteInput`**

  - **类型**: `(question: string) => Promise<CustomModelResponse>`
  - **描述**: 文字输入场景使用，用户可以通过 hook 入参获取用户当前输入的对话内容，用户侧自由调用服务，按照 `CustomModelResponse` 定义返回处理结果，交由前端 SDK 进行渲染。
  - **示例**:

- **`getUserInput`**

  - **类型**: `(question: string) => void`
  - **描述**: 文字输入场景使用，用户可以通过 hook 入参获取用户当前输入的对话内容。
  - **示例**:

    ```typescript
    hooks: {
      chat: {
        getUserInput: async (question) => {
          console.log('Processing question:', question);
        };
      }
    }
    ```

audio

useCustomModelWithCompleteInput

类型: (question: string) => Promise<CustomModelResponse>
描述: 语音输入场景使用，用户可以通过 hook 入参获取用户语音输入完整的 ASR 识别结果，用户侧自由调用服务，按照 CustomModelResponse 定义返回处理结果，交由前端 SDK 进行渲染。

示例:

hooks: {
  audio: {
    useCustomModelWithCompleteInput: async (question) => {
      console.log('Processing audio question:', question);
      const responseText = await externalService(question);

      return {
        TotalResponse: {
          content: responseText,
          messageId: 'audio-message-id',
          finish: true
        },
        processAudioStream: (callback) => {
          const audioChunks = createAudioChunks(responseText);
          audioChunks.forEach((chunk, index) => {
            const isEnd = index === audioChunks.length - 1;
            callback(chunk, isEnd);
          });
        },
      };
    },
  }
}

useCustomModelWithStreamASRInput

类型: (asrResult: string) => Promise<CustomModelResponse>
描述: 语音输入场景使用，用户可以通过 hook 入参获取用户语音输入流式的 ASR 识别结果，用户侧自由调用服务，按照 CustomModelResponse 定义返回处理结果，交由前端 SDK 进行渲染。

示例:

hooks: {
  audio: {
    useCustomModelWithStreamASRInput: async (asrResult) => {
      console.log('ASR result:', asrResult);
      const processedText = await externalService(asrResult);

      return {
        processTextStream: (callback) => {
          const textChunks = [
            { content: '流式文本部分1', messageId: 'stream-id', finish: false },
            { content: '流式文本部分2', messageId: 'stream-id', finish: true },
          ];
          textChunks.forEach((chunk) => {
            callback(chunk, chunk.finish);
          });
        },
      };
    };
  }
}

getASRResultInStream
- 类型: (asrResult: string) => void
- 描述: 语音输入场景使用，用户可以通过 hook 入参获取用户语音输入流式的 ASR 识别结果。
getASRResultInTotal
- 类型: (asrResult: string) => void
- 描述: 语音输入场景使用，用户可以通过 hook 入参获取用户语音输入完整的 ASR 识别结果。

asr

useAudioData

类型: ({ audio, taskStatus, taskId }) => Promise<CustomASRModelResponse>
描述: 用户自定义 ASR 服务使用，用户可以通过 hook 入参获取用户输入语音的二进制数据、当前语音识别任务的状态（'taskStart'：任务开始，'taskRecording'：任务进行中，'taskEnd'：任务结束）、任务 ID（当前语音识别任务的标识，一段语音识别为一个任务），用户侧自由调用服务，按照 CustomASRModelResponse 定义返回处理结果，交由前端 SDK 进行渲染。

示例:

hooks: {
  asr: {
    useAudioData: async ({ audio, taskStatus, taskId }) => {
      if (taskStatus === 'taskStart') {
        console.log('任务开始');
        return {
          processASRStream: (callback) => {
            callback(
              {
                header: { name: 'TranscriptionStarted', task_id: taskId.current },
                payload: {},
              },
              false
            );
          },
        };
      }

      if (taskStatus === 'taskEnd') {
        console.log('任务结束');
        return {
          processASRStream: (callback) => {
            callback(
              {
                header: { name: 'TranscriptionCompleted', task_id: taskId.current },
                payload: {},
              },
              true
            );
          },
        };
      }

      // 处理中状态
      return {
        processASRStream: (callback) => {
          callback(
            {
              header: { name: 'TranscriptionResultChanged', task_id: taskId.current },
              payload: { result: '流动态识别结果' },
            },
            false
          );
        };
      };
    };
  }
}

CustomModelResponse

类型: 自定义大模型 hooks 返回类型

描述: 包含大模型的完整文字回复、流式音频回复和流式文字回复

interface IModelMessage {
  content: string; // 大模型完整回复
  messageId: string; // uuid
  finish: boolean; // 全量返回的时候直接传 true
  relatedImages?: string[]; // 相关图片资源
  relatedVideos?: string[]; // 相关视频资源
}

interface CustomModelResponse {
  TotalResponse?: IModelMessage;
  processAudioStream?: (callback: (audioChunk: ArrayBuffer, isEnd: boolean) => void) => void;
  processTextStream?: (callback: (textChunk: IModelMessage, isEnd: boolean) => void) => void;
}

CustomASRModelResponse

类型: 自定义 ASR hooks 返回类型

描述: 包含 ASR 完整识别结果和流式识别结果。持续收音交互模式只支持 processASRStream，不支持 totalASRResult。

interface IASRMessage {
  header: {
    name: 'TranscriptionStarted' | 'SentenceBegin' | 'TranscriptionResultChanged' | 'SentenceEnd' | 'TranscriptionCompleted';
    task_id: string;
  };
  payload: {
    result?: string;
  };
}

interface CustomASRModelResponse {
  totalASRResult?: string; // 完整返回识别结果
  processASRStream?: (callback: (asrChunk: IASRMessage, isEnd: boolean) => void) => void; // 流式返回识别结果
}

5. 版本更新

对于最新的版本更改日志，请参阅 changelog.md 文件。

通过 AIGC Preview Runtime SDK，您可以灵活定制 avatars 的预览会话并集成到现有系统中。请根据具体需求调整 options 设置，以实现符合您期望的 UI 和功能。通过 hooks，您可以在 SDK 执行关键逻辑时插入自定义处理，应用扩展和业务集成更加灵活与高效。