无Proto源文件?解锁ProtoBuf反序列化新姿势:Python3 & PHP双版本实现直接解码

无Proto源文件?解锁ProtoBuf反序列化新姿势:Python3 & PHP双版本实现直接解码

引言

在处理数据交换格式时,Protobuf(Protocol Buffers)以其高效性和灵活性而闻名。然而,在某些场合,我们可能会面临一个挑战:只有二进制数据而没有原始的.proto定义文件。这种情况下,如何从这些二进制文件中恢复出有用的信息呢?本文将探讨这一问题,并通过Python 3和PHP两种语言的具体实现来展示解决方案。

Protobuf简介

Protobuf是一种高效的序列化方式,由Google开发,用于结构化数据的存储和通信。它允许开发者定义数据结构,并自动生成用于该结构的数据访问类。相比于XML等格式,Protobuf提供了更好的性能和更小的文件大小。

反序列化概述

反序列化是指将序列化的数据转换回其原始形式的过程。当缺少.proto文件时,反序列化变得复杂,因为我们需要手动解析二进制数据以理解其结构。下面给出基于python3和php版本的protobuf反序列化实现代码

python3实现代码

# -*- coding: utf-8 -*-
import sys
import codecs
import struct
import json
import base64

def GetDynamicWireFormat(data, start, end):
    wire_type = data[start] & 0x7
    firstByte = data[start]
    if (firstByte & 0x80) == 0:
        field_number = (firstByte >> 3)
        return (start+1, wire_type, field_number)
    else:
        byteList = []
        pos = 0
        while True:
            if start+pos >= end:
                return (None, None, None)
            oneByte = data[start+pos]
            byteList.append(oneByte & 0x7F)
            pos = pos + 1
            if oneByte & 0x80 == 0x0:
                break;

        newStart = start + pos

        index = len(byteList) - 1
        field_number = 0
        while index >= 0:
            field_number = (field_number << 0x7) + byteList[index]
            index = index - 1

        field_number = (field_number >> 3)
        return (newStart, wire_type, field_number)

def RetrieveInt(data, start, end):
    pos = 0
    byteList = []
    while True:
        if start+pos >= end:
            return (None, None, False)
        oneByte = data[start+pos]
        byteList.append(oneByte & 0x7F)
        pos = pos + 1
        if oneByte & 0x80 == 0x0:
            break

    newStart = start + pos

    index = len(byteList) - 1
    num = 0
    while index >= 0:
        num = (num << 0x7) + byteList[index]
        index = index - 1
    return (num, newStart, True)


def ParseRepeatedField(data, start, end, message, depth = 0):
    while start < end:
        (num, start, success) = RetrieveInt(data, start, end)
        if success == False:
            return False
        message.append(num)
    return True

def ParseData(data, start, end, messages, depth = 0):
    strings = []
    ordinary = 0
    while start < end:
        (start, wire_type, field_number) = GetDynamicWireFormat(data, start, end)
        if start == None:
            return False

        if wire_type == 0x00:
            (num, start, success) = RetrieveInt(data, start, end)
            if success == False:
                return False
            if depth != 0:
                strings.append('\t'*depth)
            strings.append("(%d) Varint: %d\n" % (field_number, num))
            messages['%02d:%02d:Varint' % (field_number,ordinary)] = num
            ordinary  = ordinary + 1

        elif wire_type == 0x01:#64-bit
            num = 0
            pos = 7
            while pos >= 0:
                if start+pos >= end:
                    return False
                num = (num << 8) + data[start+pos]
                pos = pos - 1
            start = start + 8
            try:
                floatNum = struct.unpack('d',struct.pack('q',int(hex(num),16)))
                floatNum = floatNum[0]
            except:
                floatNum = None
                
            if depth != 0:
                strings.append('\t'*depth)
            if floatNum != None:
                strings.append("(%d) 64-bit: 0x%x / %f\n" % (field_number, num, floatNum))
                messages['%02d:%02d:64-bit' % (field_number,ordinary)] = floatNum
            else:
                strings.append("(%d) 64-bit: 0x%x\n" % (field_number, num))
                messages['%02d:%02d:64-bit' % (field_number,ordinary)] = num


            ordinary = ordinary + 1

            
        elif wire_type == 0x02:
            curStrIndex = len(strings)
            (stringLen, start, success) = RetrieveInt(data, start, end)
            if success == False:
                return False
            if depth != 0:
                strings.append('\t'*depth)
            strings.append("(%d) embedded message:\n" % field_number)
            messages['%02d:%02d:embedded message' % (field_number, ordinary)] = {}
            if start+stringLen > end:
                del strings[curStrIndex + 1:]
                messages.pop('%02d:%02d:embedded message' % (field_number, ordinary), None)
                return False

            ret = ParseData(data, start, start+stringLen, messages['%02d:%02d:embedded message' % (field_number, ordinary)], depth+1)
            if ret == False:
                del strings[curStrIndex + 1:]
                messages.pop('%02d:%02d:embedded message' % (field_number, ordinary), None)
                if depth != 0:
                    strings.append('\t'*depth)

                strings.append("(%d) repeated:\n" % field_number)
                try:
                    data[start:start+stringLen].decode('utf-8')# .encode('utf-8')
                    strings.append("(%d) string: %s\n" % (field_number, data[start:start+stringLen]))
                    messages['%02d:%02d:string' % (field_number, ordinary)] = data[start:start+stringLen].decode('utf-8')
                except:
                   if depth != 0:
                       strings.append('\t'*depth)

                   strings.append("(%d) repeated:\n" % field_number)
                   messages['%02d:%02d:repeated' % (field_number, ordinary)] = []
                   ret = ParseRepeatedField(data, start, start+stringLen, messages['%02d:%02d:repeated' % (field_number, ordinary)], depth+1)
                   if ret == False:
                       del strings[curStrIndex + 1:]     #pop failed result
                       messages.pop('%02d:%02d:repeated' % (field_number, ordinary), None)
                       hexStr = ['0x%x' % x for x in data[start:start+stringLen]]
                       hexStr = ':'.join(hexStr)
                       strings.append("(%d) bytes: %s\n" % (field_number, hexStr))
                       messages['%02d:%02d:bytes' % (field_number, ordinary)] = hexStr

            ordinary = ordinary + 1
            start = start+stringLen

        elif wire_type == 0x05:
            num = 0
            pos = 3
            while pos >= 0:
                if start+pos >= end:
                    return False
                num = (num << 8) + data[start+pos]
                pos = pos - 1

            start = start + 4
            try:
                floatNum = struct.unpack('f',struct.pack('i',int(hex(num),16)))
                floatNum = floatNum[0]
            except:
                floatNum = None

                
            if depth != 0:
                strings.append('\t'*depth)
            if floatNum != None:
                strings.append("(%d) 32-bit: 0x%x / %f\n" % (field_number, num, floatNum))
                messages['%02d:%02d:32-bit' % (field_number,ordinary)] = floatNum
            else:
                strings.append("(%d) 32-bit: 0x%x\n" % (field_number, num))
                messages['%02d:%02d:32-bit' % (field_number,ordinary)] = num 

            ordinary = ordinary + 1


        else:
            return False

    return True

def ParseProto(fileName):
    data = open(fileName, "rb").read()
    size = len(data)

    messages = {}
    ParseData(data, 0, size, messages)

    return messages

def ParseProtoFromBase64(base64_content):
    data = base64.b64decode(base64_content)
    size = len(data)

    messages = {}
    ParseData(data, 0, size, messages)

    return messages

def GenValueList(value):
    valueList = []
    while value >= 0:
        oneByte = (value & 0x7F)
        value = (value >> 0x7)
        if value > 0:
            oneByte |= 0x80
        valueList.append(oneByte)
        if value == 0:
            break
    
    return valueList


def WriteValue(value, output):
    byteWritten = 0
    while value >= 0:
        oneByte = (value & 0x7F)
        value = (value >> 0x7)
        if value > 0:
            oneByte |= 0x80
        output.append(oneByte)
        byteWritten += 1
        if value == 0:
            break
    
    return byteWritten

def WriteVarint(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x00
    byteWritten += WriteValue(wireFormat, output)
    while value >= 0:
        oneByte = (value & 0x7F)
        value = (value >> 0x7)
        if value > 0:
            oneByte |= 0x80
        output.append(oneByte)
        byteWritten += 1
        if value == 0:
            break
    
    return byteWritten

def Write64bitFloat(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x01
    byteWritten += WriteValue(wireFormat, output)
    
    bytesStr = struct.pack('d', value).encode('hex')
    n = 2
    bytesList = [bytesStr[i:i+n] for i in range(0, len(bytesStr), n)]
    for i in range(0,len(bytesList)):
        output.append(int(bytesList[i],16))
        byteWritten += 1

    return byteWritten

def Write64bit(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x01
    byteWritten += WriteValue(wireFormat, output) 
    for i in range(0,8):
        output.append(value & 0xFF)
        value = (value >> 8)
        byteWritten += 1

    return byteWritten

def Write32bitFloat(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x05
    byteWritten += WriteValue(wireFormat, output)
    
    bytesStr = struct.pack('f', value).encode('hex')
    n = 2
    bytesList = [bytesStr[i:i+n] for i in range(0, len(bytesStr), n)]
    for i in range(0,len(bytesList)):
        output.append(int(bytesList[i],16))
        byteWritten += 1


    return byteWritten

def Write32bit(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x05
    byteWritten += WriteValue(wireFormat, output)
    
    for i in range(0,4):
        output.append(value & 0xFF)
        value = (value >> 8)
        byteWritten += 1

    return byteWritten

def WriteRepeatedField(message, output):
    byteWritten = 0
    for v in message:
        byteWritten += WriteValue(v, output)
    return byteWritten


def Decode(binary):
    messages = {}
    ret = ParseData(binary, 0, len(binary), messages)

    if ret == False:
        return False

    return messages


def ReEncode(messages, output):
    byteWritten = 0
    for key in sorted(messages.iterkeys(), key= lambda x: int(x.split(':')[1])):
        keyList = key.split(':')
        field_number = int(keyList[0])
        wire_type = keyList[2]
        value = messages[key]

        if wire_type == 'Varint':
            byteWritten += WriteVarint(field_number, value, output)
        elif wire_type == '32-bit':
            if type(value) == type(float(1.0)):
                byteWritten += Write32bitFloat(field_number, value, output)
            else:
                byteWritten += Write32bit(field_number, value, output)
        elif wire_type == '64-bit':
            if type(value) == type(float(1.0)):
                byteWritten += Write64bitFloat(field_number, value, output)
            else:
                byteWritten += Write64bit(field_number, value, output)
        elif wire_type == 'embedded message':
            wireFormat = (field_number << 3) | 0x02 
            byteWritten += WriteValue(wireFormat, output)
            index = len(output)
            tmpByteWritten = ReEncode(messages[key], output)
            valueList = GenValueList(tmpByteWritten)
            listLen = len(valueList)
            for i in range(0,listLen):
                output.insert(index, valueList[i])
                index += 1
            byteWritten += tmpByteWritten + listLen
        elif wire_type == 'repeated':
            wireFormat = (field_number << 3) | 0x02
            byteWritten += WriteValue(wireFormat, output)
            index = len(output)
            tmpByteWritten = WriteRepeatedField(messages[key], output)
            valueList = GenValueList(tmpByteWritten)
            listLen = len(valueList)
            for i in range(0,listLen):
                output.insert(index, valueList[i])
                index += 1
            byteWritten += tmpByteWritten + listLen
        elif wire_type == 'string':
            wireFormat = (field_number << 3) | 0x02 
            byteWritten += WriteValue(wireFormat, output)

            bytesStr = [int(elem.encode("hex"),16) for elem in messages[key].encode('utf-8')]

            byteWritten += WriteValue(len(bytesStr),output)

            output.extend(bytesStr)
            byteWritten += len(bytesStr)
        elif wire_type == 'bytes':
            wireFormat = (field_number << 3) | 0x02 
            byteWritten += WriteValue(wireFormat, output)

            bytesStr = [int(byte,16) for byte in messages[key].split(':')]
            byteWritten += WriteValue(len(bytesStr),output)

            output.extend(bytesStr)
            byteWritten += len(bytesStr)
    return byteWritten
    

def SaveModification(messages, fileName):
    output = list()
    ReEncode(messages, output)
    f = open(fileName, 'wb')
    f.write(bytearray(output))
    f.close()
    

php实现代码


class ProtobufDecodeUtil
{
    public function ParseProto($fileName) {
        $data = file_get_contents($fileName);
        $size = strlen($data);
        $messages = [];
        $this->ParseData($data, 0, $size, $messages);
        return $messages;
    }

    public function ParseProtoFromBase64($base64_content) {
        $data = base64_decode($base64_content);
        $size = strlen($data);
        $messages = [];
        $this->ParseData($data, 0, $size, $messages);
        return $messages;
    }

    public function GetDynamicWireFormat($data, $start, $end) {
        $wire_type = ord($data[$start]) & 0x7;
        $firstByte = ord($data[$start]);
        if (($firstByte & 0x80) == 0) {
            $field_number = ($firstByte >> 3);
            return [$start + 1, $wire_type, $field_number];
        } else {
            $byteList = [];
            $pos = 0;
            while (true) {
                if ($start + $pos >= $end) {
                    return [null, null, null];
                }
                $oneByte = ord($data[$start + $pos]);
                $byteList[] = $oneByte & 0x7F;
                $pos++;
                if (($oneByte & 0x80) == 0x0) {
                    break;
                }
            }

            $newStart = $start + $pos;
            $index = count($byteList) - 1;
            $field_number = 0;
            while ($index >= 0) {
                $field_number = ($field_number << 0x7) + $byteList[$index];
                $index--;
            }

            $field_number = ($field_number >> 3);
            return [$newStart, $wire_type, $field_number];
        }
    }

    public function RetrieveInt($data, $start, $end) {
        $pos = 0;
        $byteList = [];
        while (true) {
            if ($start + $pos >= $end) {
                return [null, null, false];
            }
            $oneByte = ord($data[$start + $pos]);
            $byteList[] = $oneByte & 0x7F;
            $pos++;
            if (($oneByte & 0x80) == 0x0) {
                break;
            }
        }

        $newStart = $start + $pos;
        $index = count($byteList) - 1;
        $num = 0;
        while ($index >= 0) {
            $num = ($num << 0x7) + $byteList[$index];
            $index--;
        }
        return [$num, $newStart, true];
    }

    public function ParseRepeatedField($data, $start, $end, &$message, $depth = 0) {
        while ($start < $end) {
            list($num, $start, $success) = $this->RetrieveInt($data, $start, $end);
            if (!$success) {
                return false;
            }
            $message[] = $num;
        }
        return true;
    }

    public function ParseData($data, $start, $end, &$messages, $depth = 0) {
        $strings = [];
        $ordinary = 0;
        while ($start < $end) {
            list($start, $wire_type, $field_number) = $this->GetDynamicWireFormat($data, $start, $end);
            if ($start === null) {
                return false;
            }

            if ($wire_type == 0x00) { 
                list($num, $start, $success) = $this->RetrieveInt($data, $start, $end);
                if (!$success) {
                    return false;
                }

                if ($depth != 0) {
                    $strings[] = str_repeat("\t", $depth);
                }
                $strings[] = sprintf("(%d) Varint: %d\n", $field_number, $num);
                $messages[sprintf('%02d:%02d:Varint', $field_number, $ordinary)] = $num;
                $ordinary++;
            } elseif ($wire_type == 0x01) { 
                $num = 0;
                $pos = 7;
                while ($pos >= 0) {
                    if ($start + $pos >= $end) {
                        return false;
                    }
                    $num = ($num << 8) + ord($data[$start + $pos]);
                    $pos--;
                }

                $start += 8;
                try {
                    $floatNum = unpack('d', pack('q', $num))[1];
                } catch (\Exception $e) {
                    $floatNum = null;
                }

                if ($depth != 0) {
                    $strings[] = str_repeat("\t", $depth);
                }
                if ($floatNum !== null) {
                    $strings[] = sprintf("(%d) 64-bit: 0x%x / %f\n", $field_number, $num, $floatNum);
                    $messages[sprintf('%02d:%02d:64-bit', $field_number, $ordinary)] = $floatNum;
                } else {
                    $strings[] = sprintf("(%d) 64-bit: 0x%x\n", $field_number, $num);
                    $messages[sprintf('%02d:%02d:64-bit', $field_number, $ordinary)] = $num;
                }

                $ordinary++;
            } elseif ($wire_type == 0x02) { // Length-delimited
                $curStrIndex = count($strings);
                list($stringLen, $start, $success) = $this->RetrieveInt($data, $start, $end);
                if (!$success) {
                    return false;
                }
                if ($depth != 0) {
                    $strings[] = str_repeat("\t", $depth);
                }
                $strings[] = sprintf("(%d) embedded message:\n", $field_number);
                $messages[sprintf('%02d:%02d:embedded message', $field_number, $ordinary)] = [];
                if ($start + $stringLen > $end) {
                    array_splice($strings, $curStrIndex + 1);
                    unset($messages[sprintf('%02d:%02d:embedded message', $field_number, $ordinary)]);
                    return false;
                }

                $ret = $this->ParseData($data, $start, $start + $stringLen, $messages[sprintf('%02d:%02d:embedded message', $field_number, $ordinary)], $depth + 1);
                if (!$ret) {
                    array_splice($strings, $curStrIndex + 1);
                    unset($messages[sprintf('%02d:%02d:embedded message', $field_number, $ordinary)]);
                    if ($depth != 0) {
                        $strings[] = str_repeat("\t", $depth);
                    }

                    $strings[] = sprintf("(%d) repeated:\n", $field_number);
                    try {
                        $decoded = mb_convert_encoding(substr($data, $start, $stringLen), 'UTF-8', 'UTF-8');
                        $strings[] = sprintf("(%d) string: %s\n", $field_number, $decoded);
                        $messages[sprintf('%02d:%02d:string', $field_number, $ordinary)] = $decoded;
                    } catch (\Exception $e) {
                        if ($depth != 0) {
                            $strings[] = str_repeat("\t", $depth);
                        }

                        $strings[] = sprintf("(%d) repeated:\n", $field_number);
                        $messages[sprintf('%02d:%02d:repeated', $field_number, $ordinary)] = [];
                        $ret = $this->ParseRepeatedField($data, $start, $start + $stringLen, $messages[sprintf('%02d:%02d:repeated', $field_number, $ordinary)], $depth + 1);
                        if (!$ret) {
                            array_splice($strings, $curStrIndex + 1);
                            unset($messages[sprintf('%02d:%02d:repeated', $field_number, $ordinary)]);
                            $hexStr = [];
                            for ($i = $start; $i < $start + $stringLen; $i++) {
                                $hexStr[] = sprintf('0x%x', ord($data[$i]));
                            }
                            $hexStr = implode(':', $hexStr);
                            $strings[] = sprintf("(%d) bytes: %s\n", $field_number, $hexStr);
                            $messages[sprintf('%02d:%02d:bytes', $field_number, $ordinary)] = $hexStr;
                        }
                    }
                }

                $ordinary++;
                $start += $stringLen;
            } elseif ($wire_type == 0x05) { // 32-bit
                $num = 0;
                $pos = 3;
                while ($pos >= 0) {
                    if ($start + $pos >= $end) {
                        return false;
                    }
                    $num = ($num << 8) + ord($data[$start + $pos]);
                    $pos--;
                }

                $start += 4;
                try {
                    $floatNum = unpack('f', pack('i', $num))[1];
                } catch (\Exception $e) {
                    $floatNum = null;
                }

                if ($depth != 0) {
                    $strings[] = str_repeat("\t", $depth);
                }
                if ($floatNum !== null) {
                    $strings[] = sprintf("(%d) 32-bit: 0x%x / %f\n", $field_number, $num, $floatNum);
                    $messages[sprintf('%02d:%02d:32-bit', $field_number, $ordinary)] = $floatNum;
                } else {
                    $strings[] = sprintf("(%d) 32-bit: 0x%x\n", $field_number, $num);
                    $messages[sprintf('%02d:%02d:32-bit', $field_number, $ordinary)] = $num;
                }

                $ordinary++;
            } else {
                return false;
            }
        }

        return true;
    }

    public function GenValueList($value) {
        $valueList = [];
        while ($value >= 0) {
            $oneByte = ($value & 0x7F);
            $value = ($value >> 0x7);
            if ($value > 0) {
                $oneByte |= 0x80;
            }
            $valueList[] = $oneByte;
            if ($value == 0) {
                break;
            }
        }

        return $valueList;
    }

    public function WriteValue($value, &$output) {
        $byteWritten = 0;
        while ($value >= 0) {
            $oneByte = ($value & 0x7F);
            $value = ($value >> 0x7);
            if ($value > 0) {
                $oneByte |= 0x80;
            }
            $output[] = $oneByte;
            $byteWritten++;
            if ($value == 0) {
                break;
            }
        }

        return $byteWritten;
    }

    public function WriteVarint($field_number, $value, &$output) {
        $byteWritten = 0;
        $wireFormat = ($field_number << 3) | 0x000;
        $byteWritten += $this->WriteValue($wireFormat, $output);
        $byteWritten += $this->WriteValue($value, $output);

        return $byteWritten;
    }

    public function Write64bitFloat($field_number, $value, &$output) {
        $byteWritten = 0;
        $wireFormat = ($field_number << 3) | 0x01;
        $byteWritten += $this->WriteValue($wireFormat, $output);

        $bytesStr = unpack('H*', pack('d', $value))[1];
        $bytesList = str_split($bytesStr, 2);
        foreach ($bytesList as $byte) {
            $output[] = hexdec($byte);
            $byteWritten++;
        }

        return $byteWritten;
    }

    public function Write64bit($field_number, $value, &$output) {
        $byteWritten = 0;
        $wireFormat = ($field_number << 3) | 0x01;
        $byteWritten += $this->WriteValue($wireFormat, $output);

        for ($i = 0; $i < 8; $i++) {
            $output[] = $value & 0xFF;
            $value = ($value >> 8);
            $byteWritten++;
        }

        return $byteWritten;
    }

    public function Write32bitFloat($field_number, $value, &$output) {
        $byteWritten = 0;
        $wireFormat = ($field_number << 3) | 0x05;
        $byteWritten += $this->WriteValue($wireFormat, $output);

        $bytesStr = unpack('H*', pack('f', $value))[1];
        $bytesList = str_split($bytesStr, 2);
        foreach ($bytesList as $byte) {
            $output[] = hexdec($byte);
            $byteWritten++;
        }

        return $byteWritten;
    }

    public function Write32bit($field_number, $value, &$output) {
        $byteWritten = 0;
        $wireFormat = ($field_number << 3) | 0x05;
        $byteWritten += $this->WriteValue($wireFormat, $output);

        for ($i = 0; $i < 4; $i++) {
            $output[] = $value & 0xFF;
            $value = ($value >> 8);
            $byteWritten++;
        }

        return $byteWritten;
    }

    public function WriteRepeatedField($message, &$output) {
        $byteWritten = 0;
        foreach ($message as $v) {
            $byteWritten += $this->WriteValue($v, $output);
        }
        return $byteWritten;
    }

    public function Decode($binary) {
        $messages = [];
        $ret = $this->ParseData($binary, 0, strlen($binary), $messages);

        if (!$ret) {
            return false;
        }

        return $messages;
    }

    public function ReEncode($messages, &$output) {
        $byteWritten = 0;
        foreach ($messages as $key => $value) {
            $keyList = explode(':', $key);
            $field_number = intval($keyList[0]);
            $wire_type = $keyList[2];

            if ($wire_type == 'Varint') {
                $byteWritten += $this->WriteVarint($field_number, $value, $output);
            } elseif ($wire_type == '32-bit') {
                if (is_float($value)) {
                    $byteWritten += $this->Write32bitFloat($field_number, $value, $output);
                } else {
                    $byteWritten += $this->Write32bit($field_number, $value, $output);
                }
            } elseif ($wire_type == '64-bit') {
                if (is_float($value)) {
                    $byteWritten += $this->Write64bitFloat($field_number, $value, $output);
                } else {
                    $byteWritten += $this->Write64bit($field_number, $value, $output);
                }
            } elseif ($wire_type == 'embedded message') {
                $wireFormat = ($field_number << 3) | 0x02;
                $byteWritten += $this->WriteValue($wireFormat, $output);
                $index = count($output);
                $tmpByteWritten = $this->ReEncode($value, $output);
                $valueList = $this->GenValueList($tmpByteWritten);
                $listLen = count($valueList);
                for ($i = 0; $i < $listLen; $i++) {
                    array_splice($output, $index, 0, $valueList[$i]);
                    $index++;
                }
                $byteWritten += $tmpByteWritten + $listLen;
            } elseif ($wire_type == 'repeated') {
                $wireFormat = ($field_number << 3) | 0x02;
                $byteWritten += $this->WriteValue($wireFormat, $output);
                $index = count($output);
                $tmpByteWritten = $this->WriteRepeatedField($value, $output);
                $valueList = $this->GenValueList($tmpByteWritten);
                $listLen = count($valueList);
                for ($i = 0; $i < $listLen; $i++) {
                    array_splice($output, $index, 0, $valueList[$i]);
                    $index++;
                }
                $byteWritten += $tmpByteWritten + $listLen;
            } elseif ($wire_type == 'string') {
                $wireFormat = ($field_number << 3) | 0x02;
                $byteWritten += $this->WriteValue($wireFormat, $output);

                $bytesStr = array_map('ord', str_split($value));
                $byteWritten += $this->WriteValue(count($bytesStr), $output);

                $output = array_merge($output, $bytesStr);
                $byteWritten += count($bytesStr);
            } elseif ($wire_type == 'bytes') {
                $wireFormat = ($field_number << 3) | 0x02;
                $byteWritten += $this->WriteValue($wireFormat, $output);

                $bytesStr = array_map('hexdec', explode(':', $value));
                $byteWritten += $this->WriteValue(count($bytesStr), $output);

                $output = array_merge($output, $bytesStr);
                $byteWritten += count($bytesStr);
            }
        }

        return $byteWritten;
    }

    public function SaveModification($messages, $fileName) {
        $output = [];
        $this->ReEncode($messages, $output);
        file_put_contents($fileName, pack('C*', ...$output));
    }

    public function pxprint(array $dict, int $indent = 0): void {
        $spaces = "    ";
        foreach ($dict as $k => $v) {
            if (is_array($v)) {
                echo str_repeat($spaces, $indent + 1) . "\"$k\": {\n";
                $this->pxprint($v, $indent + 1);
                echo str_repeat($spaces, $indent + 1) . "}\n";
            } else {
                try {
                    echo str_repeat($spaces, $indent + 1) . "\"$k\":\"$v\"\n";
                } catch (\Exception $e) {
                    echo str_repeat($spaces, $indent + 1) . "\"$k\":\"error-v\"\n";
                }
            }
        }
    }

}

案例测试

给定的base64编码的protobuf数据

CsUJChRXZWJjYXN0TWVtYmVyTWVzc2FnZRCA1I7S5s3T5GYYspaXpLf90eRmMAFChwkKF2ludmFsaWRfa2V5X3BsYWNlaG9sZGVyEg97MDp1c2VyfSDmnaXkuoYaDgoJI2I4ZmZmZmZmIJADIsoICAsSDAoHIzhDRTdGRiCQA6oBtggKswgIh+QGGgbkupEqKiowAUrJAQpPaHR0cHM6Ly9wMy13ZWJjYXN0LmRvdXlpbnBpYy5jb20vaW1nL3dlYmNhc3Qvc21hbGxfRGVmQXZhdGFyLnBuZ350cGx2LW9iai5pbWFnZQpQaHR0cHM6Ly9wMTEtd2ViY2FzdC5kb3V5aW5waWMuY29tL2ltZy93ZWJjYXN0L3NtYWxsX0RlZkF2YXRhci5wbmd+dHBsdi1vYmouaW1hZ2USG3dlYmNhc3Qvc21hbGxfRGVmQXZhdGFyLnBuZyoHI0EzN0M5NqoBfQpZaHR0cHM6Ly9wMy13ZWJjYXN0LmRvdXlpbnBpYy5jb20vaW1nL3dlYmNhc3QvbmV3X3VzZXJfZ3JhZGVfbGV2ZWxfdjFfNC5wbmd+dHBsdi1vYmouaW1hZ2UYECAgMAFCGhgEIhbojaPoqonnrYnnuqc057qn5YuL56ugsgEMCDYQNjICNTQ6AjU0ugHIATAEmgFhCllodHRwczovL3AzLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9uZXdfdXNlcl9ncmFkZV9sZXZlbF92MV80LnBuZ350cGx2LW9iai5pbWFnZRgQICAwAaIBXwpXaHR0cHM6Ly9wMTEtd2ViY2FzdC5kb3V5aW5waWMuY29tL2ltZy93ZWJjYXN0L2F3ZW1lX3BheV9ncmFkZV8yeF8xXzQucG5nfnRwbHYtb2JqLmltYWdlGAwgDDABwgEKCggiBgoECAASALADA+oDfQpZaHR0cHM6Ly9wMy13ZWJjYXN0LmRvdXlpbnBpYy5jb20vaW1nL3dlYmNhc3QvbmV3X3VzZXJfZ3JhZGVfbGV2ZWxfdjFfNC5wbmd+dHBsdi1vYmouaW1hZ2UYECAgMAFCGhgEIhbojaPoqonnrYnnuqc057qn5YuL56ugkAQBogQG5LqRKioqygRHTVM0d0xqUGJva2tsSE1fd0p5UDBnb3ZvLTU3MVpLRGMwcEhHSV9qQXo0V0RTMGtNYXRYYTAtSkpnSlJBamphM2lXR0RKTE3yBIsCCoUCCAYSgAIKWWh0dHBzOi8vcDMtd2ViY2FzdC5kb3V5aW5waWMuY29tL2ltZy93ZWJjYXN0L25ld191c2VyX2dyYWRlX2xldmVsX3YxXzQucG5nfnRwbHYtb2JqLmltYWdlClpodHRwczovL3AxMS13ZWJjYXN0LmRvdXlpbnBpYy5jb20vaW1nL3dlYmNhc3QvbmV3X3VzZXJfZ3JhZGVfbGV2ZWxfdjFfNC5wbmd+dHBsdi1vYmouaW1hZ2USJXdlYmNhc3QvbmV3X3VzZXJfZ3JhZGVfbGV2ZWxfdjFfNC5wbmcYECAgMAFCGhgEIhbojaPoqonnrYnnuqc057qn5YuL56ugEgEGokAGMTExMTExSAFQAViQyAKIAQPAAegIErMICIfkBhoG5LqRKioqMAFKyQEKUGh0dHBzOi8vcDExLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9zbWFsbF9EZWZBdmF0YXIucG5nfnRwbHYtb2JqLmltYWdlCk9odHRwczovL3AzLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9zbWFsbF9EZWZBdmF0YXIucG5nfnRwbHYtb2JqLmltYWdlEht3ZWJjYXN0L3NtYWxsX0RlZkF2YXRhci5wbmcqByM0MDUyMzeqAX0KWWh0dHBzOi8vcDMtd2ViY2FzdC5kb3V5aW5waWMuY29tL2ltZy93ZWJjYXN0L25ld191c2VyX2dyYWRlX2xldmVsX3YxXzQucG5nfnRwbHYtb2JqLmltYWdlGBAgIDABQhoYBCIW6I2j6KqJ562J57qnNOe6p+WLi+eroLIBDAg2EDYyAjU0OgI1NLoByAEwBJoBYQpZaHR0cHM6Ly9wMy13ZWJjYXN0LmRvdXlpbnBpYy5jb20vaW1nL3dlYmNhc3QvbmV3X3VzZXJfZ3JhZGVfbGV2ZWxfdjFfNC5wbmd+dHBsdi1vYmouaW1hZ2UYECAgMAGiAV8KV2h0dHBzOi8vcDExLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9hd2VtZV9wYXlfZ3JhZGVfMnhfMV80LnBuZ350cGx2LW9iai5pbWFnZRgMIAwwAcIBCgoIIgYKBAgAEgCwAwPqA30KWWh0dHBzOi8vcDMtd2ViY2FzdC5kb3V5aW5waWMuY29tL2ltZy93ZWJjYXN0L25ld191c2VyX2dyYWRlX2xldmVsX3YxXzQucG5nfnRwbHYtb2JqLmltYWdlGBAgIDABQhoYBCIW6I2j6KqJ562J57qnNOe6p+WLi+eroJAEAaIEBuS6kSoqKsoER01TNHdMalBib2trbEhNX3dKeVAwZ292by01NzFaS0RjMHBIR0lfakF6NFdEUzBrTWF0WGEwLUpKZ0pSQWpqYTNpV0dESkxN8gSLAgqFAggGEoACCllodHRwczovL3AzLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9uZXdfdXNlcl9ncmFkZV9sZXZlbF92MV80LnBuZ350cGx2LW9iai5pbWFnZQpaaHR0cHM6Ly9wMTEtd2ViY2FzdC5kb3V5aW5waWMuY29tL2ltZy93ZWJjYXN0L25ld191c2VyX2dyYWRlX2xldmVsX3YxXzQucG5nfnRwbHYtb2JqLmltYWdlEiV3ZWJjYXN0L25ld191c2VyX2dyYWRlX2xldmVsX3YxXzQucG5nGBAgIDABQhoYBCIW6I2j6KqJ562J57qnNOe6p+WLi+eroBIBBqJABjExMTExMRjkAVABkgGHCQoXaW52YWxpZF9rZXlfcGxhY2Vob2xkZXISD3swOnVzZXJ9IOadpeS6hhoOCgkjYjhmZmZmZmYgkAMiyggICxIMCgcjOENFN0ZGIJADqgG2CAqzCAiH5AYaBuS6kSoqKjABSskBCk9odHRwczovL3AzLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9zbWFsbF9EZWZBdmF0YXIucG5nfnRwbHYtb2JqLmltYWdlClBodHRwczovL3AxMS13ZWJjYXN0LmRvdXlpbnBpYy5jb20vaW1nL3dlYmNhc3Qvc21hbGxfRGVmQXZhdGFyLnBuZ350cGx2LW9iai5pbWFnZRIbd2ViY2FzdC9zbWFsbF9EZWZBdmF0YXIucG5nKgcjRkZGRkZGqgF9CllodHRwczovL3AzLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9uZXdfdXNlcl9ncmFkZV9sZXZlbF92MV80LnBuZ350cGx2LW9iai5pbWFnZRgQICAwAUIaGAQiFuiNo+iqieetiee6pzTnuqfli4vnq6CyAQwINhA2MgI1NDoCNTS6AcgBMASaAWEKWWh0dHBzOi8vcDMtd2ViY2FzdC5kb3V5aW5waWMuY29tL2ltZy93ZWJjYXN0L25ld191c2VyX2dyYWRlX2xldmVsX3YxXzQucG5nfnRwbHYtb2JqLmltYWdlGBAgIDABogFfCldodHRwczovL3AxMS13ZWJjYXN0LmRvdXlpbnBpYy5jb20vaW1nL3dlYmNhc3QvYXdlbWVfcGF5X2dyYWRlXzJ4XzFfNC5wbmd+dHBsdi1vYmouaW1hZ2UYDCAMMAHCAQoKCCIGCgQIABIAsAMD6gN9CllodHRwczovL3AzLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9uZXdfdXNlcl9ncmFkZV9sZXZlbF92MV80LnBuZ350cGx2LW9iai5pbWFnZRgQICAwAUIaGAQiFuiNo+iqieetiee6pzTnuqfli4vnq6CQBAGiBAbkupEqKirKBEdNUzR3TGpQYm9ra2xITV93SnlQMGdvdm8tNTcxWktEYzBwSEdJX2pBejRXRFMwa01hdFhhMC1KSmdKUkFqamEzaVdHREpMTfIEiwIKhQIIBhKAAgpZaHR0cHM6Ly9wMy13ZWJjYXN0LmRvdXlpbnBpYy5jb20vaW1nL3dlYmNhc3QvbmV3X3VzZXJfZ3JhZGVfbGV2ZWxfdjFfNC5wbmd+dHBsdi1vYmouaW1hZ2UKWmh0dHBzOi8vcDExLXdlYmNhc3QuZG91eWlucGljLmNvbS9pbWcvd2ViY2FzdC9uZXdfdXNlcl9ncmFkZV9sZXZlbF92MV80LnBuZ350cGx2LW9iai5pbWFnZRIld2ViY2FzdC9uZXdfdXNlcl9ncmFkZV9sZXZlbF92MV80LnBuZxgQICAwAUIaGAQiFuiNo+iqieetiee6pzTnuqfli4vnq6ASAQaiQAYxMTExMTGaAQQgMkAC

base64解码后的二进制数据

无Proto源文件?解锁ProtoBuf反序列化新姿势:Python3 & PHP双版本实现直接解码_第1张图片
通过上面的图片可以看到 解码后的二进制数据能看到部分明文数据 下面使用python和php实现内容解码

使用python实现


import myProtobuf as pbparser
from typing import Dict
import json

def pxprint(dict, indent=0):
    spaces = "    "
    for k,v in dict.items():
        if isinstance(v, Dict):
            print(spaces*(indent+1) + f'"{k}": {{')
            pxprint(v,indent+1)
            print(spaces*(indent+1) + '}')
        else:
            try:
                print(spaces*(indent+1) + f'"{k}":"{v}"')
            except UnicodeEncodeError as e:
                print(spaces*(indent+1) + f'"{k}":"error-v"')

def main():
    with open(r"base64.txt", "r", encoding="utf-8") as f:
        base64_content = f.read()
    dict = pbparser.ParseProtoFromBase64(base64_content)
    pxprint(dict)
main()


运行结果

无Proto源文件?解锁ProtoBuf反序列化新姿势:Python3 & PHP双版本实现直接解码_第2张图片

使用php实现

 $v) {
        if (is_array($v)) {
            echo str_repeat($spaces, $indent + 1) . "\"$k\": {\n";
            pxprint($v, $indent + 1);
            echo str_repeat($spaces, $indent + 1) . "}\n";
        } else {
            try {
                echo str_repeat($spaces, $indent + 1) . "\"$k\":\"$v\"\n";
            } catch (Exception $e) {
                echo str_repeat($spaces, $indent + 1) . "\"$k\":\"error-v\"\n";
            }
        }
    }
}

function main(): void {
    $base64Content = file_get_contents("D:\\base64.txt");
    $dict = parseProtoFromBase64($base64Content);
    pxprint($dict);
}

main();
?>

运行结果

无Proto源文件?解锁ProtoBuf反序列化新姿势:Python3 & PHP双版本实现直接解码_第3张图片

可以看到已经格式化输出了JSON数据。这样就实现了无Proto源文件直接解码ProtoBuf文件数据

你可能感兴趣的:(常用记录,php,python)