Protocol Buffers (protobuf)
Protocol Buffers is Google's language-neutral, platform-neutral mechanism for serializing structured data. This guide covers .proto file syntax and compilation with protoc.
Quick Start
Installation
# Install protoc compiler (Linux/Mac)
# From https://github.com/protocolbuffers/protobuf/releases
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v27.0/protoc-27.0-linux-x86_64.zip
unzip protoc-27.0-linux-x86_64.zip -d protoc
sudo cp protoc/bin/protoc /usr/local/bin/
sudo cp -r protoc/include/google /usr/local/include/
# Verify installation
protoc --version
Language Plugins
# Go
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
# Python (included with protobuf library)
pip install protobuf
# Java/Kotlin (built into protoc)
# JavaScript/TypeScript
npm install -g @protobuf-ts/plugin
Core Concepts
.proto File Structure
// File header
syntax = "proto3"; // or "proto2" or edition = "2024"
package com.example;
// Imports
import "google/protobuf/timestamp.proto";
// Options
option java_package = "com.example.proto";
option go_package = "./proto";
// Message definitions
message Person {
string name = 1;
int32 age = 2;
}
Message Definitions
Basic Message
syntax = "proto3";
message User {
int32 id = 1; // Field number must be unique
string username = 2;
string email = 3;
bool active = 4;
}
Nested Messages
message Person {
string name = 1;
message Address {
string street = 1;
string city = 2;
string country = 3;
}
Address address = 2;
}
Field Rules (Proto2 vs Proto3)
// Proto2
syntax = "proto2";
message User {
required string name = 1; // Must be provided
optional int32 age = 2; // Can be omitted
repeated string tags = 3; // Can have 0 or more values
}
// Proto3 (more common)
syntax = "proto3";
message User {
string name = 1; // Implicit presence
optional int32 age = 2; // Explicit presence (proto3.15+)
repeated string tags = 3; // 0 or more values
}
Scalar Types
Numeric Types
message NumericTypes {
double price = 1; // 64-bit floating point
float rating = 2; // 32-bit floating point
int32 count = 3; // Variable-length encoding
int64 big_count = 4; // Variable-length encoding
uint32 positive = 5; // Variable-length encoding
uint64 big_positive = 6;
sint32 signed_val = 7; // Better for negative numbers
sint64 big_signed = 8;
fixed32 fixed_val = 9; // Always 4 bytes
fixed64 big_fixed = 10; // Always 8 bytes
sfixed32 signed_fixed = 11; // Always 4 bytes, signed
sfixed64 big_signed_fixed = 12; // Always 8 bytes, signed
bool enabled = 13;
}
String and Bytes
message TextTypes {
string text = 1; // UTF-8 encoded string
bytes data = 2; // Arbitrary byte sequence
}
Complex Types
Enumerations
enum Status {
UNKNOWN = 0; // First value must be 0 in proto3
PENDING = 1;
APPROVED = 2;
REJECTED = 3;
}
message Request {
Status status = 1;
}
Repeated Fields
message ShoppingCart {
repeated string items = 1; // List of strings
repeated int32 quantities = 2; // List of integers
repeated Product products = 3; // List of messages
}
// With packed encoding (more efficient for primitives)
message Measurements {
repeated int32 values = 1 [packed = true];
}
Maps
message UserSettings {
map<string, string> preferences = 1; // String to string
map<int32, User> users_by_id = 2; // Integer to message
map<string, bool> features = 3; // String to boolean
}
Oneof (Union Types)
message SearchRequest {
string query = 1;
oneof filter {
string category = 2;
int32 price_max = 3;
bool on_sale = 4;
}
}
Advanced Features
Any Type
import "google/protobuf/any.proto";
message ErrorInfo {
string message = 1;
google.protobuf.Any details = 2; // Can contain any message type
}
Well-Known Types
import "google/protobuf/timestamp.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/struct.proto";
message Event {
google.protobuf.Timestamp created_at = 1;
google.protobuf.Duration timeout = 2;
google.protobuf.Struct metadata = 3;
}
Field Options
message Product {
string name = 1;
double price = 2 [(validate.rules).double.gt = 0]; // Custom validation
string description = 3 [deprecated = true]; // Mark as deprecated
}
Reserved Fields
message User {
reserved 2, 15, 9 to 11; // Reserve field numbers
reserved "old_name", "legacy_id"; // Reserve field names
string name = 1;
string email = 3;
// Field 2 is reserved and cannot be used
}
Services (gRPC)
Basic Service Definition
syntax = "proto3";
service UserService {
// Unary RPC
rpc GetUser(GetUserRequest) returns (User);
// Server streaming
rpc ListUsers(ListUsersRequest) returns (stream User);
// Client streaming
rpc CreateUsers(stream CreateUserRequest) returns (CreateUsersResponse);
// Bidirectional streaming
rpc Chat(stream ChatMessage) returns (stream ChatMessage);
}
message GetUserRequest {
int32 user_id = 1;
}
message ListUsersRequest {
int32 page_size = 1;
string page_token = 2;
}
message CreateUserRequest {
User user = 1;
}
message CreateUsersResponse {
repeated User users = 1;
}
message ChatMessage {
string message = 1;
string user_id = 2;
}
Compilation with protoc
Basic Usage
# Check version
protoc --version
# Generate code for single language
protoc --python_out=./generated user.proto
# Generate for multiple languages
protoc --java_out=./java --python_out=./python --go_out=./go user.proto
# Specify import paths
protoc -I./protos -I./vendor --python_out=./generated protos/user.proto
Language-Specific Generation
Go
# Basic Go generation
protoc --go_out=. --go_opt=paths=source_relative user.proto
# With gRPC
protoc --go_out=. --go-grpc_out=. \
--go_opt=paths=source_relative \
--go-grpc_opt=paths=source_relative \
user.proto
# Custom Go package
protoc --go_out=. --go_opt=Muser.proto=./internal/proto user.proto
Python
# Basic Python generation
protoc --python_out=./generated user.proto
# With type stubs (.pyi files)
protoc --python_out=./generated --pyi_out=./generated user.proto
# With gRPC
protoc --python_out=./generated --grpc_python_out=./generated user.proto
Java/Kotlin
# Java
protoc --java_out=./src/main/java user.proto
# Java Lite (smaller runtime)
protoc --java_out=lite:./src/main/java user.proto
# Kotlin
protoc --java_out=./src/main/java --kotlin_out=./src/main/kotlin user.proto
JavaScript/TypeScript
# JavaScript
protoc --js_out=import_style=commonjs,binary:./generated user.proto
# TypeScript with protobuf-ts
protoc --ts_out=./generated user.proto
C++
# C++
protoc --cpp_out=./generated user.proto
# With gRPC
protoc --cpp_out=./generated --grpc_out=./generated \
--plugin=protoc-gen-grpc=grpc_cpp_plugin user.proto
C
# C#
protoc --csharp_out=./Generated user.proto
# With gRPC
protoc --csharp_out=./Generated --grpc_out=./Generated \
--plugin=protoc-gen-grpc=grpc_csharp_plugin user.proto
Advanced protoc Options
Multiple Files and Directories
# Compile all .proto files in directory
protoc --python_out=./generated protos/*.proto
# Recursive compilation
find ./protos -name "*.proto" -exec protoc --python_out=./generated {} \;
# With include paths
protoc -I./protos -I./vendor -I./third_party \
--python_out=./generated \
protos/user.proto protos/order.proto
Descriptor Sets
# Generate descriptor set (for reflection)
protoc --descriptor_set_out=user.desc --include_imports user.proto
# Generate descriptor set with source info
protoc --descriptor_set_out=user.desc \
--include_imports --include_source_info user.proto
Custom Plugins
# Use custom plugin
protoc --plugin=protoc-gen-custom=./my-plugin \
--custom_out=./generated user.proto
# Plugin with options
protoc --plugin=protoc-gen-validate=protoc-gen-validate \
--validate_out="lang=go:./generated" user.proto
Build Integration
Makefile
# Variables
PROTO_FILES = $(wildcard protos/*.proto)
GENERATED_GO = $(PROTO_FILES:protos/%.proto=generated/%.pb.go)
GENERATED_PY = $(PROTO_FILES:protos/%.proto=generated/%_pb2.py)
# Go generation
generated/%.pb.go: protos/%.proto
protoc --go_out=generated --go_opt=paths=source_relative $<
# Python generation
generated/%_pb2.py: protos/%.proto
protoc --python_out=generated $<
# Targets
.PHONY: go python clean
go: $(GENERATED_GO)
python: $(GENERATED_PY)
clean:
rm -rf generated/*
all: go python
CMake
# Find protobuf
find_package(protobuf REQUIRED)
# Function to compile protobuf files
function(compile_proto_files)
foreach(proto_file ${ARGN})
get_filename_component(proto_name ${proto_file} NAME_WE)
get_filename_component(proto_dir ${proto_file} DIRECTORY)
set(generated_files
${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.pb.h
${CMAKE_CURRENT_BINARY_DIR}/${proto_name}.pb.cc
)
add_custom_command(
OUTPUT ${generated_files}
COMMAND protobuf::protoc
ARGS --cpp_out=${CMAKE_CURRENT_BINARY_DIR}
-I${proto_dir}
${proto_file}
DEPENDS ${proto_file}
)
list(APPEND PROTO_GENERATED_FILES ${generated_files})
endforeach()
set(PROTO_GENERATED_FILES ${PROTO_GENERATED_FILES} PARENT_SCOPE)
endfunction()
# Usage
compile_proto_files(protos/user.proto protos/order.proto)
add_executable(myapp main.cpp ${PROTO_GENERATED_FILES})
target_link_libraries(myapp protobuf::libprotobuf)
Bazel
# BUILD file
load("@rules_proto//proto:defs.bzl", "proto_library")
load("@io_grpc_grpc_java//:java_grpc_library.bzl", "java_grpc_library")
proto_library(
name = "user_proto",
srcs = ["user.proto"],
deps = [
"@com_google_protobuf//:timestamp_proto",
],
)
java_proto_library(
name = "user_java_proto",
deps = [":user_proto"],
)
java_grpc_library(
name = "user_java_grpc",
srcs = [":user_proto"],
deps = [":user_java_proto"],
)
Best Practices
Schema Design
// Good: Use clear, descriptive names
message UserProfile {
string full_name = 1; // Better than 'name'
string email_address = 2; // Better than 'email'
int64 created_timestamp = 3; // Better than 'created'
}
// Good: Group related fields
message Address {
string street_line_1 = 1;
string street_line_2 = 2;
string city = 3;
string state = 4;
string postal_code = 5;
string country_code = 6;
}
// Good: Use enums for fixed sets of values
enum UserRole {
ROLE_UNSPECIFIED = 0; // Always include zero value
ROLE_USER = 1;
ROLE_ADMIN = 2;
ROLE_MODERATOR = 3;
}
Field Numbering
message Product {
// Reserve low numbers (1-15) for frequently used fields
// They use 1 byte for tag encoding
string name = 1;
double price = 2;
bool available = 3;
// Higher numbers (16+) use 2+ bytes
string detailed_description = 16;
repeated string tags = 17;
// Reserve ranges for future use
reserved 4 to 10;
reserved 100 to 200;
}
Versioning and Evolution
// Original version
message User {
string name = 1;
string email = 2;
}
// Evolved version - backward compatible
message User {
string name = 1;
string email = 2;
// New optional fields don't break compatibility
optional int32 age = 3;
repeated string interests = 4;
// Nested messages can be added
optional Address address = 5;
}
Performance Considerations
// Use appropriate field types for your data
message Metrics {
// Use packed repeated for primitive arrays
repeated int32 values = 1 [packed = true];
// Consider fixed types for known-size data
fixed64 timestamp_nanos = 2; // Better than int64 for large numbers
// Use bytes for binary data
bytes thumbnail = 3; // Not string for binary data
// Consider string vs bytes for text
string utf8_text = 4; // For valid UTF-8
bytes raw_text = 5; // For potentially invalid UTF-8
}
Common Patterns
Request/Response Patterns
// Standard CRUD operations
service UserService {
rpc CreateUser(CreateUserRequest) returns (CreateUserResponse);
rpc GetUser(GetUserRequest) returns (GetUserResponse);
rpc UpdateUser(UpdateUserRequest) returns (UpdateUserResponse);
rpc DeleteUser(DeleteUserRequest) returns (DeleteUserResponse);
rpc ListUsers(ListUsersRequest) returns (ListUsersResponse);
}
message CreateUserRequest {
User user = 1;
}
message CreateUserResponse {
User user = 1;
string message = 2;
}
message GetUserRequest {
string user_id = 1;
}
message GetUserResponse {
User user = 1;
}
// Pagination pattern
message ListUsersRequest {
int32 page_size = 1;
string page_token = 2;
string filter = 3;
}
message ListUsersResponse {
repeated User users = 1;
string next_page_token = 2;
int32 total_count = 3;
}
Error Handling
import "google/rpc/status.proto";
import "google/protobuf/any.proto";
message ErrorResponse {
google.rpc.Status status = 1; // Standard error status
string message = 2; // Human-readable message
repeated google.protobuf.Any details = 3; // Additional error details
}
// Custom error details
message ValidationError {
repeated FieldError field_errors = 1;
}
message FieldError {
string field = 1;
string message = 2;
string code = 3;
}
Gotchas and Common Mistakes
Field Number Management
// DON'T: Reuse field numbers
message User {
string name = 1;
// string old_email = 2; // Removed field
string email = 2; // DON'T reuse number 2
}
// DO: Reserve removed field numbers
message User {
reserved 2; // or: reserved "old_email";
string name = 1;
string email = 3; // Use new number
}
Default Values and Presence
// Proto3: Cannot distinguish between default value and not set
message User {
int32 age = 1; // age=0 could mean "not set" or actually 0
}
// Solution: Use optional or wrapper types
import "google/protobuf/wrappers.proto";
message User {
optional int32 age = 1; // Can detect presence
// or
google.protobuf.Int32Value age_wrapper = 2;
}
Package and Import Issues
// File: protos/user.proto
syntax = "proto3";
package myapp.user; // Use consistent package naming
import "protos/common.proto"; // Use relative paths consistently
// File: protos/common.proto
syntax = "proto3";
package myapp.common; // Must match directory structure
Compilation Ordering
# Wrong: May fail if dependencies aren't found
protoc --python_out=. user.proto
# Right: Include all necessary import paths
protoc -I. -I./vendor -I./third_party --python_out=. user.proto
Quick Reference
Essential protoc Flags
| Flag | Purpose | Example |
|---|---|---|
--version |
Show protoc version | protoc --version |
-I, --proto_path |
Add import directory | protoc -I./protos |
--python_out |
Generate Python code | --python_out=./gen |
--go_out |
Generate Go code | --go_out=. |
--java_out |
Generate Java code | --java_out=./src |
--cpp_out |
Generate C++ code | --cpp_out=./gen |
--descriptor_set_out |
Generate descriptor | --descriptor_set_out=desc.pb |
--include_imports |
Include dependencies in descriptor | Use with --descriptor_set_out |
Scalar Type Mapping
| Proto Type | Go | Python | Java | C++ | JavaScript |
|---|---|---|---|---|---|
double |
float64 |
float |
double |
double |
number |
float |
float32 |
float |
float |
float |
number |
int32 |
int32 |
int |
int |
int32 |
number |
int64 |
int64 |
int |
long |
int64 |
string |
string |
string |
str |
String |
string |
string |
bool |
bool |
bool |
boolean |
bool |
boolean |
bytes |
[]byte |
bytes |
ByteString |
string |
Uint8Array |
Protocol Buffers provide efficient, language-agnostic data serialization with strong schema evolution capabilities. Focus on clear field naming, proper type selection, and maintaining backward compatibility for production systems.