Telegram on go, part 2: binary protocol

In the previous part , we described the approaches used when writing a parser for the MTProto schema. The article turned out to be a little more general than I expected, this time I will try to tell more about the specifics of Telegram.







The Go client continues to evolve , and we will go back in time and remember how the protocol serializer and deserializer was written for it .







The basics



There are two ways to deserialize: streamed and buffered. In practice, in MTProto, a message larger than a megabyte cannot be transmitted, so I chose the option with a buffer: let's say that we can always keep a complete message in memory.







You get the following structure:







// Buffer implements low level binary (de-)serialization for TL.
type Buffer struct {
    Buf []byte
}
      
      





And yet, MTProto basically aligns values ​​by 4 bytes (32 bits), let's put this into a constant:







// Word represents 4-byte sequence.
// Values in TL are generally aligned to Word.
const Word = 4
      
      





Serialization



Knowing that almost everything in MTProto is little-endian, we can start by serializing uint32:







// PutUint32 serializes unsigned 32-bit integer.
func (b *Buffer) PutUint32(v uint32) {
    t := make([]byte, Word)
    binary.LittleEndian.PutUint32(t, v)
    b.Buf = append(b.Buf, t...)
}
      
      





We will serialize all other values ​​in the same way: first, we allocated the slice (the Go compiler is smart enough not to put it in the heap in this case, since the slice size is small and constant), then we wrote the value there, and then added the slice to the buffer.







, , . , grammers, Rust Telegram.









, , , gotd/td/bin .







uint32:







// Uint32 decodes unsigned 32-bit integer from Buffer.
func (b *Buffer) Uint32() (uint32, error) {
    if len(b.Buf) < Word {
        return 0, io.ErrUnexpectedEOF
    }
    v := binary.LittleEndian.Uint32(b.Buf)
    b.Buf = b.Buf[Word:]
    return v, nil
}
      
      





, , io.ErrUnexpectedEOF



. . .









([]byte



string



) - 4 .







253, , :







b = append(b, byte(l))
b = append(b, v...)
currentLen := l + 1
// Padding:
b = append(b, make([]byte, nearestPaddedValueLength(currentLen)-currentLen)...)
return b
      
      





, 254, little-endian, :







b = append(b, 254, byte(l), byte(l>>8), byte(l>>16))
b = append(b, v...)
currentLen := l + 4
// Padding:
b = append(b, make([]byte, nearestPaddedValueLength(currentLen)-currentLen)...)
      
      





encodeString(b []byte, v string) []byte



b



, :







// PutString serializes bare string.
func (b *Buffer) PutString(s string) {
    b.Buf = encodeString(b.Buf, s)
}
      
      





, . , .









, , . : ID ( #5b38c6c1



, uint32), , .







, ( ):







// msg#9bdd8f1a code:int32 message:string = Message;
type Message struct {
    Code    int32
    Message string
}
      
      





c Buffer



:







// EncodeTo implements bin.Encoder.
func (m Message) Encode(b *Buffer) error {
    b.PutID(0x9bdd8f1a)
    b.PutInt32(m.Code)
    b.PutString(m.Message)
    return nil
}
      
      





Encode, :







m := Message{
    Code:    204,
    Message: "Wake up, Neo",
}
b := new(Buffer)
_ = m.Encode(b)

raw := []byte{
    // Type ID.
    0x1a, 0x8f, 0xdd, 0x9b,

    // Code as int32.
    204, 0x00, 0x00, 0x00,

    // String length.
    byte(len(m.Message)),

    // "Wake up, Neo" in hex.
    0x57, 0x61, 0x6b,
    0x65, 0x20, 0x75, 0x70,
    0x2c, 0x20, 0x4e, 0x65,
    0x6f, 0x00, 0x00, 0x00,
}
      
      





, . Buf, :







// PeekID returns next type id in Buffer, but does not consume it.
func (b *Buffer) PeekID() (uint32, error) {
    if len(b.Buf) < Word {
        return 0, io.ErrUnexpectedEOF
    }
    v := binary.LittleEndian.Uint32(b.Buf)
    return v, nil
}
      
      





ConsumeID(id uint32)



: PeekID



, . :







func (m *Message) Decode(b *Buffer) error {
    if err := b.ConsumeID(0x9bdd8f1a); err != nil {
        return err
    }
    {
        v, err := b.Int32()
        if err != nil {
            return err
        }
        m.Code = v
    }
    {
        v, err := b.String()
        if err != nil {
            return err
        }
        m.Message = v
    }

    return nil
}
      
      





(-) , :







// Encoder can encode it's binary form to Buffer.
type Encoder interface {
    Encode(b *Buffer) error
}

// Decoder can decode it's binary form from Buffer.
type Decoder interface {
    Decode(b *Buffer) error
}
      
      







, .









. :







messageActionChatCreate#a6638b9a title:string users:Vector<int> = MessageAction;
      
      





, title, users?







Vector :







vector#0x1cb5c415 {t:Type} # [ t ] = Vector t
      
      





. , , .







: (0x1cb5c415), , :







// PutVectorHeader serializes vector header with provided length.
func (b *Buffer) PutVectorHeader(length int) {
    b.PutID(TypeVector)
    b.PutInt32(int32(length))
}
      
      





, 10 uint32, PutVectorHeader(10)



, 10 uint32.









, , :







boolTrue#997275b5 = Bool;
boolFalse#bc799737 = Bool;
      
      





, Bool, 0x997275b5, 0xbc799737:







const (
    TypeTrue  = 0x997275b5 // boolTrue#997275b5 = Bool
    TypeFalse = 0xbc799737 // boolFalse#bc799737 = Bool    
)

// PutBool serializes bare boolean.
func (b *Buffer) PutBool(v bool) {
    switch v {
    case true:
        b.PutID(TypeTrue)
    case false:
        b.PutID(TypeFalse)
    }
}
      
      





, , , .









, , . : , (-), , .







(flags.0?true



): , 0x997275b5



, .







! flags.0?Bool



, Bool, , . , legacy.







bitfield Go :







// Fields represent a bitfield value that compactly encodes
// information about provided conditional fields.
type Fields uint32

// Has reports whether field with index n was set.
func (f Fields) Has(n int) bool {
    return f&(1<<n) != 0
}

// Set sets field with index n.
func (f *Fields) Set(n int) {
    *f |= 1 << n
}
      
      





uint32.







:







// msg flags:# escape:flags.0?true ttl_seconds:flags.1?int = Message;
type FieldsMessage struct {
    Flags      bin.Fields
    Escape     bool
    TTLSeconds int
}

func (f *FieldsMessage) Encode(b *bin.Buffer) error {
    b.PutID(FieldsMessageTypeID)
    if f.Escape {
        f.Flags.Set(0)
    }
    if err := f.Flags.Encode(b); err != nil {
        return err
    }
    if f.Flags.Has(1) {
        b.PutInt(f.TTLSeconds)
    }
    return nil
}
      
      





, TTLSeconds



1



, Escape



Flags



.









int128 int256:







int128 4*[ int ] = Int128;
int256 8*[ int ] = Int256;
      
      





go :







type Int128 [16]byte
type Int256 [32]byte
      
      





, :







func (b *Buffer) PutInt128(v Int128) {
    b.Buf = append(b.Buf, v[:]...)
}
func (b *Buffer) PutInt256(v Int256) {
    b.Buf = append(b.Buf, v[:]...)
}
      
      





, big.Int.







MTProto big-endian OpenSSL. Go big.Int



.







, SetBytes FillBytes :







var v Int256

i := new(big.Int).SetBytes(v[:]) // v -> i
i.FillBytes(v[:]) // i -> v
      
      







bin



, . , , .







This problem is solved by generating (de-) serialization code from the schema (that's why we wrote a parser!). Perhaps I'll give the generator a separate part in a series of articles. This project module turned out to be complex, it was rewritten several times and I would like to make life a little easier for people who will write code generators in Go for other formats.







For reference, about 180K SLOC is currently generated from telegram schemes (api, mtproto, secret chats).







I would like to thank tdakkota and zweihander for their invaluable contribution to the development of the project! It would be very difficult without you.








All Articles