maxscript根据音频创建动画表情

方案1: Python + pydub / Audacity + phoneme recognition 来提取语音中的音素(phonemes)并输出为 JSON 供 3ds Max 使用


方案2: Papagayo输出.pgo 文件,通过 Python 脚本解析,然后转换成 JSON。

下面介绍下方案2:
1、需要软件Papagayo这软件把音频解释成.pgo文件,

import json
import re

def parse_ng_pgo_file(path):
    phonemes = []
    fps = 24

    with open(path, 'r') as file:
        lines = file.readlines()

    for line in lines:
        line = line.strip()
        # 检查是否是音素行,例如 "37 E"
        match = re.match(r'^(\d+)\s+([A-Z]+)$', line)
        if match:
            frame = int(match.group(1))
            phoneme = match.group(2)
            time = frame / fps
            phonemes.append({"time": round(time, 3), "phoneme": phoneme})

    return phonemes

# 用法
pgo_path = "Papagayo.pgo"
phoneme_data = parse_ng_pgo_file(pgo_path)

# 保存为 JSON
with open("output_phonemes.json", "w") as out:
    json.dump(phoneme_data, out, indent=2)

再用pgo2json.py转为phonemes.json

[
  {
    "time": 0.333,
    "phoneme": "E"
  },
  {
    "time": 0.542,
    "phoneme": "AI"
  },
]

2,需要引入 Newtonsoft.Json.dll  json解释器
3,定义骨骼的pose库 phonemesPose.json

-- 3. 定义音素姿态(每个音素对应的位置偏移)
#(
    #("E", #(
        #( "jaw", [0.886,-10,15.13] ),
        #( "lipUpper", [-14.243,3.0,-0.604] ),
        #( "lipLower", [16.014,-2.0,0.001] )
    )),
    #("AI", #(
        #( "jaw", [0,0,0] ),
        #( "lipUpper", [0,0,0] ),
        #( "lipLower", [0,0,0] )
    )),
    #("L", #(
        #( "jaw", [0,-6,0] ),
        #( "lipUpper", [0,1,0] ),
        #( "lipLower", [0,-1,0] )
    )),
    #("FV", #(
        #( "jaw", [0,-3,0] ),
        #( "lipUpper", [0,2,0] ),
        #( "lipLower", [0,-2,0] )
    ))
)

然后用脚本读取 phonemes.jsonphonemesPose.json ,执行动画操作

-- 第1步:引入 Newtonsoft.Json.dll
scriptDir = getFilenamePath (getSourceFileName())
JsonDllPath = scriptDir + @"Newtonsoft.Json.dll"
JsonFilePath = scriptDir + @"phonemes.json"

-- 1.一次性读取JSON
fn ReadandProcess JsonFilePath =
(
    -- 获取 UTF-8 编码对象
    encoding = dotNetObject  "System.Text.UTF8Encoding"
    -- 读取文件的所有字节
    fileBytes = (dotnetClass "System.IO.File").ReadAllBytes(JsonFilePath)    
    -- 将字节数组转换为字符串
    jsonText = encoding.GetString(fileBytes)
    
    -- 判断 JSON 数据是对象还是数组
    local jsonType
    if jsonText[1] == "[" then
        jsonType = "Newtonsoft.Json.Linq.JArray"
    else
        jsonType = "Newtonsoft.Json.Linq.JObject"
    
    -- 解析 JSON 数据
    local jsonStruct = (dotNetClass jsonType).Parse jsonText
    return jsonStruct
)

resetMaxFile #noPrompt

-- 定义音频文件的路径
audioFilePath = @"E:\捕鱼\--捕鱼3D资源--\25.4月工作\根据音频做表情\2Papagayo提取和输出音素\recently_short.wav"
prosound.append audioFilePath
trackview.open "Track View - Curve Editor"

dummyCount=10
dummyPrefix = "Dummy_"

-- 创建 Dummy 并命名
dummy name:"CTRL_Jaw" POS:[0, 0, 0]
dummy name:"CTRL_Lip_Upper" POS:[0, 0, 10]
dummy name:"CTRL_Lip_Lower" POS:[0, 0, 20]

-- 刷新视图
redrawViews()

-- 2. 骨骼控制器绑定
global ctrl_jaw = $CTRL_Jaw
global ctrl_lipUpper = $CTRL_Lip_Upper
global ctrl_lipLower = $CTRL_Lip_Lower

-- 3. 定义音素姿态(每个音素对应的位置偏移)
global phonemePoses = #(
    #("E", #(
        #( "jaw", [0.886,-10,15.13] ),
        #( "lipUpper", [-14.243,3.0,-0.604] ),
        #( "lipLower", [16.014,-2.0,0.001] )
    )),
    #("AI", #(
        #( "jaw", [0,0,0] ),
        #( "lipUpper", [0,0,0] ),
        #( "lipLower", [0,0,0] )
    )),
    #("L", #(
        #( "jaw", [0,-6,0] ),
        #( "lipUpper", [0,1,0] ),
        #( "lipLower", [0,-1,0] )
    )),
    #("FV", #(
        #( "jaw", [0,-3,0] ),
        #( "lipUpper", [0,2,0] ),
        #( "lipLower", [0,-2,0] )
    ))
)

-- 4. 应用姿态打关键帧
fn applyPhonemePoses phonemeData =
(
    if phonemeData == undefined then (
        format "No valid phoneme data provided.\n"
        return false
    )
    for i = 0 to phonemeData.Count - 1 do
    (
         local element = phonemeData.item[i]
        local timeSec = element.Item["time"].Value as float
        local phoneme = element.Item["phoneme"].Value as string
        local frameNum = timeSec * frameRate
            
        set animate on
        -- 查找对应音素姿态
         for p in phonemePoses do
        (
            if p[1] == phoneme do
            (
                local pose = p[2]

                at time frameNum (
                    for subPose in pose do
                    (
                        if subPose[1] == "jaw" do (ctrl_jaw.position = subPose[2])
                        if isValidNode ctrl_jaw do addNewKey  ctrl_jaw frameNum
                        if subPose[1] == "lipUpper" do (ctrl_lipUpper.position = subPose[2])
                        if isValidNode ctrl_lipUpper do addNewKey ctrl_lipUpper frameNum
                        if subPose[1] == "lipLower" do (ctrl_lipLower.position = subPose[2])
                        if isValidNode ctrl_lipLower do addNewKey  ctrl_lipLower frameNum
                    )                    
                )
            )
        )
        set animate off
    )
)

-- 5. 主函数入口
phonemeData = ReadandProcess JsonFilePath
applyPhonemePoses phonemeData

你可能感兴趣的:(maxscript,音视频,3dsmax)