GraphQL Federation: Building Distributed Graph APIs at Scale

GraphQL Federation allows multiple teams to build parts of a unified GraphQL API independently. After implementing federated graphs serving millions of requests daily, I’ll share patterns that work at scale and pitfalls to avoid.

The Problem with Monolithic GraphQL

Traditional GraphQL APIs become bottlenecks as organizations scale:

# Monolithic schema - single team owns everything
type Query {
  user(id: ID!): User
  product(id: ID!): Product
  order(id: ID!): Order
  review(id: ID!): Review
  # 100+ more types and fields...
}

# All resolvers in one codebase
# One deployment
# One team to review all changes
# Scaling nightmare

GraphQL Federation Solution

Federation splits the graph across services:

Client → Apollo Gateway → User Service (owns User type)
                      → Product Service (owns Product type)
                      → Order Service (owns Order type)
                      → Review Service (owns Review type)

Each service owns part of the schema and can deploy independently.

Implementing Federation

Service 1: Users Service

// users-service/schema.ts
import { buildSubgraphSchema } from '@apollo/subgraph';
import { gql } from 'apollo-server';

const typeDefs = gql`
  # Extend Query type
  extend type Query {
    user(id: ID!): User
    me: User
  }

  # Define User entity with @key directive
  type User @key(fields: "id") {
    id: ID!
    email: String!
    name: String!
    createdAt: DateTime!
  }
`;

const resolvers = {
  Query: {
    user: async (_: any, { id }: { id: string }) => {
      return await userRepository.findById(id);
    },
    me: async (_: any, __: any, context: any) => {
      return await userRepository.findById(context.userId);
    }
  },

  User: {
    // Reference resolver - allows other services to extend User
    __resolveReference: async (user: { id: string }) => {
      return await userRepository.findById(user.id);
    }
  }
};

export const schema = buildSubgraphSchema({ typeDefs, resolvers });

Service 2: Products Service

// products-service/schema.ts
import { buildSubgraphSchema } from '@apollo/subgraph';
import { gql } from 'apollo-server';

const typeDefs = gql`
  extend type Query {
    product(id: ID!): Product
    products(category: String): [Product!]!
  }

  # Extend User type from Users service
  extend type User @key(fields: "id") {
    id: ID! @external
    # Add product-specific fields to User
    purchaseHistory: [Product!]!
  }

  type Product @key(fields: "id") {
    id: ID!
    name: String!
    price: Float!
    category: String!
    # Reference to User (owned by Users service)
    seller: User!
  }
`;

const resolvers = {
  Query: {
    product: async (_: any, { id }: { id: string }) => {
      return await productRepository.findById(id);
    },
    products: async (_: any, { category }: { category?: string }) => {
      return await productRepository.findByCategory(category);
    }
  },

  Product: {
    seller: (product: any) => {
      // Return reference to User
      // Gateway will resolve using Users service
      return { __typename: 'User', id: product.sellerId };
    },
    __resolveReference: async (product: { id: string }) => {
      return await productRepository.findById(product.id);
    }
  },

  User: {
    // Extend User type with product data
    purchaseHistory: async (user: { id: string }) => {
      const orderIds = await orderRepository.findByUserId(user.id);
      return await productRepository.findByOrderIds(orderIds);
    }
  }
};

export const schema = buildSubgraphSchema({ typeDefs, resolvers });

Service 3: Reviews Service

// reviews-service/schema.ts
const typeDefs = gql`
  extend type Query {
    review(id: ID!): Review
  }

  # Extend Product with reviews
  extend type Product @key(fields: "id") {
    id: ID! @external
    reviews: [Review!]!
    averageRating: Float!
  }

  # Extend User with reviews
  extend type User @key(fields: "id") {
    id: ID! @external
    reviews: [Review!]!
  }

  type Review @key(fields: "id") {
    id: ID!
    rating: Int!
    comment: String!
    product: Product!
    author: User!
    createdAt: DateTime!
  }
`;

const resolvers = {
  Product: {
    reviews: async (product: { id: string }) => {
      return await reviewRepository.findByProductId(product.id);
    },
    averageRating: async (product: { id: string }) => {
      return await reviewRepository.calculateAverageRating(product.id);
    }
  },

  User: {
    reviews: async (user: { id: string }) => {
      return await reviewRepository.findByAuthorId(user.id);
    }
  },

  Review: {
    product: (review: any) => {
      return { __typename: 'Product', id: review.productId };
    },
    author: (review: any) => {
      return { __typename: 'User', id: review.authorId };
    },
    __resolveReference: async (review: { id: string }) => {
      return await reviewRepository.findById(review.id);
    }
  }
};

export const schema = buildSubgraphSchema({ typeDefs, resolvers });

Gateway Configuration

// gateway/index.ts
import { ApolloGateway, IntrospectAndCompose } from '@apollo/gateway';
import { ApolloServer } from 'apollo-server';

const gateway = new ApolloGateway({
  supergraphSdl: new IntrospectAndCompose({
    subgraphs: [
      { name: 'users', url: 'http://users-service:4001/graphql' },
      { name: 'products', url: 'http://products-service:4002/graphql' },
      { name: 'reviews', url: 'http://reviews-service:4003/graphql' },
    ],
    // Poll for schema updates
    pollIntervalInMs: 10000,
  }),
});

const server = new ApolloServer({
  gateway,
  // Disable GraphQL Playground in production
  introspection: process.env.NODE_ENV !== 'production',
  context: ({ req }) => {
    // Extract auth token and add to context
    const token = req.headers.authorization || '';
    const userId = verifyToken(token);
    return { userId };
  },
});

server.listen({ port: 4000 }).then(({ url }) => {
  console.log(`🚀 Gateway ready at ${url}`);
});

Advanced Patterns

DataLoader for N+1 Prevention

import DataLoader from 'dataloader';

class UserService {
  private userLoader: DataLoader<string, User>;

  constructor() {
    this.userLoader = new DataLoader(async (userIds: string[]) => {
      // Batch load users
      const users = await userRepository.findByIds(userIds);

      // Return in same order as requested
      const userMap = new Map(users.map(u => [u.id, u]));
      return userIds.map(id => userMap.get(id) || null);
    });
  }

  async getUser(id: string): Promise<User | null> {
    return this.userLoader.load(id);
  }
}

// In resolver
const resolvers = {
  Product: {
    seller: async (product: any, _: any, context: any) => {
      // Uses DataLoader - batches multiple seller lookups
      return await context.userService.getUser(product.sellerId);
    }
  }
};

Computed Fields with @requires

// Extend Product with computed field that needs seller data
const typeDefs = gql`
  extend type Product @key(fields: "id") {
    id: ID! @external
    price: Float! @external
    seller: User! @external @requires(fields: "seller { name }")
    displayName: String! @requires(fields: "name seller { name }")
  }

  extend type User @key(fields: "id") {
    id: ID! @external
    name: String! @external
  }
`;

const resolvers = {
  Product: {
    displayName: (product: any) => {
      // Has access to product.name and product.seller.name
      return `${product.name} by ${product.seller.name}`;
    }
  }
};

Authorization Across Services

// Centralized authorization directives
const typeDefs = gql`
  directive @auth(requires: Role!) on FIELD_DEFINITION

  enum Role {
    USER
    ADMIN
    SELLER
  }

  extend type Query {
    adminUsers: [User!]! @auth(requires: ADMIN)
  }

  extend type Product @key(fields: "id") {
    id: ID! @external
    # Only seller can see sales data
    salesData: SalesData! @auth(requires: SELLER)
  }
`;

// Gateway-level authorization
class AuthDirective extends SchemaDirectiveVisitor {
  visitFieldDefinition(field: GraphQLField<any, any>) {
    const { requires } = this.args;
    const { resolve = defaultFieldResolver } = field;

    field.resolve = async function (source, args, context, info) {
      if (!context.user) {
        throw new AuthenticationError('Not authenticated');
      }

      if (!context.user.roles.includes(requires)) {
        throw new ForbiddenError('Insufficient permissions');
      }

      return resolve.call(this, source, args, context, info);
    };
  }
}

Caching Strategies

import { InMemoryLRUCache } from '@apollo/utils.keyvaluecache';
import responseCachePlugin from 'apollo-server-plugin-response-cache';

const server = new ApolloServer({
  gateway,
  cache: new InMemoryLRUCache({
    maxSize: 100 * 1024 * 1024, // 100 MB
    ttl: 300, // 5 minutes
  }),
  plugins: [
    responseCachePlugin({
      sessionId: (context) => context.userId || null,
    }),
  ],
});

// In subgraph schemas
const typeDefs = gql`
  type Product @key(fields: "id") {
    id: ID!
    # Cache for 1 hour
    name: String! @cacheControl(maxAge: 3600)
    # Don't cache (frequently changing)
    inventory: Int! @cacheControl(maxAge: 0)
  }
`;

Error Handling

// Custom error handling in gateway
const gateway = new ApolloGateway({
  serviceHealthCheck: true,
  buildService: ({ url }) => {
    return new RemoteGraphQLDataSource({
      url,
      willSendRequest: ({ request, context }) => {
        // Forward auth headers
        request.http.headers.set('authorization',
          context.authToken || '');
      },
      didReceiveResponse: async ({ response, request, context }) => {
        // Log errors from subgraphs
        if (response.errors) {
          logger.error('Subgraph error', {
            url,
            query: request.query,
            errors: response.errors,
          });
        }
        return response;
      },
      didEncounterError: (error) => {
        // Service is down
        logger.error('Subgraph unavailable', { url, error });
      },
    });
  },
});

// Graceful degradation
const resolvers = {
  Query: {
    user: async (_: any, { id }: { id: string }) => {
      try {
        return await userRepository.findById(id);
      } catch (error) {
        logger.error('User lookup failed', { id, error });
        // Return partial data instead of complete failure
        return {
          id,
          email: 'unavailable@example.com',
          name: 'User data temporarily unavailable',
        };
      }
    }
  }
};

Performance Monitoring

import { ApolloServerPluginUsageReporting } from 'apollo-server-core';

const server = new ApolloServer({
  gateway,
  plugins: [
    // Apollo Studio integration
    ApolloServerPluginUsageReporting({
      sendVariableValues: { all: true },
      sendHeaders: { all: true },
    }),

    // Custom metrics
    {
      requestDidStart: () => ({
        async willSendResponse({ metrics, context }) {
          // Record query duration by operation
          const duration = metrics.responseCachingMetrics.responseCacheDuration;

          metricsCollector.histogram('graphql.query.duration', duration, {
            operation: context.operationName,
          });

          // Track subgraph calls
          metrics.queryPlanTrace?.forEach((trace: any) => {
            metricsCollector.counter('graphql.subgraph.calls', {
              service: trace.serviceName,
            });
          });
        },

        async didEncounterErrors({ errors }) {
          errors.forEach((error) => {
            metricsCollector.counter('graphql.errors', {
              type: error.extensions?.code || 'UNKNOWN',
            });
          });
        }
      })
    }
  ]
});

Schema Management

# Using Rover CLI for schema management

# Check if schema changes are valid
rover subgraph check my-graph@production \
  --name products \
  --schema ./products-service/schema.graphql

# Publish schema to Apollo Studio
rover subgraph publish my-graph@production \
  --name products \
  --schema ./products-service/schema.graphql \
  --routing-url http://products-service:4002/graphql

# Download supergraph schema
rover supergraph fetch my-graph@production > supergraph.graphql

Testing Federated Services

import { ApolloServer } from 'apollo-server';
import { buildSubgraphSchema } from '@apollo/subgraph';

describe('Products Service', () => {
  let server: ApolloServer;

  beforeAll(() => {
    server = new ApolloServer({
      schema: buildSubgraphSchema({ typeDefs, resolvers }),
    });
  });

  it('resolves product by ID', async () => {
    const result = await server.executeOperation({
      query: `
        query GetProduct($id: ID!) {
          product(id: $id) {
            id
            name
            price
          }
        }
      `,
      variables: { id: '123' },
    });

    expect(result.errors).toBeUndefined();
    expect(result.data?.product).toMatchObject({
      id: '123',
      name: 'Test Product',
      price: 29.99,
    });
  });

  it('extends User type correctly', async () => {
    const result = await server.executeOperation({
      query: `
        query {
          _entities(representations: [{__typename: "User", id: "user-1"}]) {
            ... on User {
              purchaseHistory {
                id
                name
              }
            }
          }
        }
      `,
    });

    expect(result.errors).toBeUndefined();
  });
});

Deployment Strategy

# Kubernetes deployment for federated services

# Gateway
apiVersion: apps/v1
kind: Deployment
metadata:
  name: graphql-gateway
spec:
  replicas: 3
  template:
    spec:
      containers:
      - name: gateway
        image: my-org/graphql-gateway:latest
        env:
        - name: APOLLO_GRAPH_REF
          value: "my-graph@production"
        - name: APOLLO_KEY
          valueFrom:
            secretKeyRef:
              name: apollo-secrets
              key: api-key
        resources:
          requests:
            memory: "256Mi"
            cpu: "500m"
          limits:
            memory: "512Mi"
            cpu: "1000m"

---
# Subgraph service
apiVersion: apps/v1
kind: Deployment
metadata:
  name: products-service
spec:
  replicas: 2
  template:
    spec:
      containers:
      - name: products
        image: my-org/products-service:latest
        livenessProbe:
          httpGet:
            path: /.well-known/apollo/server-health
            port: 4002
        readinessProbe:
          httpGet:
            path: /.well-known/apollo/server-health
            port: 4002

Key Takeaways

Use @key directive: Defines entities that can be referenced across services
Prevent N+1 queries: Always use DataLoader in resolvers
Plan schema ownership: Clear boundaries prevent conflicts
Monitor query performance: Track which fields are slow
Version schemas carefully: Breaking changes impact multiple teams
Cache strategically: Use @cacheControl directive
Test reference resolvers: Ensure cross-service references work

GraphQL Federation enables true microservices independence while maintaining a unified API. Start with 2-3 services, establish patterns, then scale to dozens of services as teams grow.