Faker.js to Populate Your Database with Realistic Test Data
Generate realistic test data with Faker.js, a versatile library perfect for database seeding with extensive locale support and customization options
Introduction
Faker.js is a powerful library that generates massive amount of real-life-fake-data for testing and development purposes. When building applications, striving for having realistic test data is crucial for proper development and testing.
Github repository: https://github.com/KazChe/fakerjs-ramblings
Key Features and Benefits
-
Generates realistic data across many categories (names, emails, addresses, etc.)
-
Supports multiple locales for internationalized data
-
Highly customizable and extensible
-
Perfect for seeding development databases
-
Helps avoid manual data entry for testing
Common Use Cases
// Generate user data
const user = {
name: faker.person.fullName(),
email: faker.internet.email(),
avatar: faker.image.avatar(),
address: faker.location.streetAddress(),
bio: faker.lorem.paragraph()
};
// Generate product data
const product = {
name: faker.commerce.productName(),
price: faker.commerce.price(),
description: faker.commerce.productDescription(),
category: faker.commerce.department()
};
Integration with Database Seeding
const createFakeUser = () => ({
firstName: faker.person.firstName(),
lastName: faker.person.lastName(),
email: faker.internet.email(),
createdAt: faker.date.past(),
updatedAt: faker.date.recent()
});
// Generate 1000 users
exports.seed = async function(knex) {
const fakeUsers = Array.from({ length: 1000 }, createFakeUser);
await knex('users').insert(fakeUsers);
};
Best Practices
- Seed Data Consistency: Use fixed seeds for reproducible results
faker.seed(123); // Sets a fixed seed for consistent results
-
Relationships: Maintain referential integrity when seeding related tables
const userId = faker.string.uuid(); const user = { id: userId, name: faker.person.fullName() }; const userPost = { title: faker.lorem.sentence(), userId: userId // Maintains relationship };- Locale Support: Use appropriate locales for international
faker.setLocale('es'); // Spanish data
faker.setLocale('de'); // German data
Advanced Database Seeding Patterns
1. Relationships and Foreign Keys
// Create consistent relationships between tables
const createCompanyWithEmployees = () => {
const companyId = faker.string.uuid();
const company = {
id: companyId,
name: faker.company.name(),
catchPhrase: faker.company.catchPhrase(),
industry: faker.company.buzzNoun()
};
const employees = Array.from({ length: faker.number.int({ min: 5, max: 20 }) }, () => ({
id: faker.string.uuid(),
companyId: companyId,
firstName: faker.person.firstName(),
lastName: faker.person.lastName(),
role: faker.person.jobTitle(),
department: faker.commerce.department(),
salary: faker.number.int({ min: 30000, max: 150000 })
}));
return { company, employees };
};
2. Custom Generators for Domain-Specific Data
// Extend Faker with your own generators
const customGenerator = {
projectStatus() {
return faker.helpers.arrayElement(['PLANNING', 'IN_PROGRESS', 'REVIEW', 'COMPLETED']);
},
sprintNumber() {
return `SP-${faker.number.int({ min: 1, max: 999 })}`;
},
ticketPriority() {
return faker.helpers.weightedArrayElement([
{ weight: 0.1, value: 'CRITICAL' },
{ weight: 0.2, value: 'HIGH' },
{ weight: 0.4, value: 'MEDIUM' },
{ weight: 0.3, value: 'LOW' }
]);
}
};
3. Batch Processing for Large Datasets
async function seedLargeDataset(knex, batchSize = 1000) {
const totalRecords = 1000000;
const batches = Math.ceil(totalRecords / batchSize);
console.log(`Seeding ${totalRecords} records in ${batches} batches`);
for (let i = 0; i < batches; i++) {
const records = Array.from({ length: batchSize }, () => ({
id: faker.string.uuid(),
data: faker.helpers.multiple(createFakeData, { count: 5 })
}));
await knex('large_table').insert(records);
console.log(`Completed batch ${i + 1}/${batches}`);
}
}
4. Locale-Aware Seeding
const seedInternationalUsers = async (knex) => {
const locales = ['en', 'es', 'fr', 'de', 'ja'];
for (const locale of locales) {
faker.setLocale(locale);
const users = Array.from({ length: 100 }, () => ({
name: faker.person.fullName(),
address: faker.location.streetAddress(),
city: faker.location.city(),
phone: faker.phone.number(),
locale: locale
}));
await knex('international_users').insert(users);
}
};
5. Consistent Test Data
function setupTestData() {
// Set a fixed seed for reproducible test data
faker.seed(123);
const testUser = {
id: faker.string.uuid(),
email: faker.internet.email(),
username: faker.internet.userName(),
profile: {
avatar: faker.image.avatar(),
bio: faker.person.bio(),
location: faker.location.city()
}
};
return testUser;
}
Basic Setup
import { faker } from '@faker-js/faker';
// Optional: Set a seed for reproducible results
faker.seed(123);
More Advanced Data Generation Examples
1. A User Profiles with Related Data
const createUserProfile = () => {
const firstName = faker.person.firstName();
const lastName = faker.person.lastName();
return {
id: faker.string.uuid(),
firstName,
lastName,
email: faker.internet.email({ firstName, lastName }),
avatar: faker.image.avatar(),
job: {
title: faker.person.jobTitle(),
area: faker.person.jobArea(),
type: faker.person.jobType(),
descriptor: faker.person.jobDescriptor()
},
address: {
street: faker.location.streetAddress(),
city: faker.location.city(),
state: faker.location.state(),
zipCode: faker.location.zipCode(),
coordinates: {
lat: faker.location.latitude(),
lng: faker.location.longitude()
}
},
internet: {
username: faker.internet.userName({ firstName, lastName }),
password: faker.internet.password(),
userAgent: faker.internet.userAgent()
}
};
};
2. E-commerce Product Data
const createProduct = () => ({
id: faker.string.uuid(),
name: faker.commerce.productName(),
description: faker.commerce.productDescription(),
price: faker.commerce.price(),
category: faker.commerce.department(),
images: Array.from({ length: 3 }, () => ({
url: faker.image.url(),
alt: faker.lorem.sentence(),
isPrimary: faker.datatype.boolean()
})),
metadata: {
createdAt: faker.date.past(),
updatedAt: faker.date.recent(),
stock: faker.number.int({ min: 0, max: 1000 }),
sku: faker.string.alphanumeric(8).toUpperCase()
}
});
3. Company Data with Business Logic
const createCompany = () => ({
id: faker.string.uuid(),
name: faker.company.name(),
catchPhrase: faker.company.catchPhrase(),
description: `${faker.company.buzzPhrase()} ${faker.company.buzzVerb()} ${faker.company.buzzAdjective()}`,
employees: Array.from(
{ length: faker.number.int({ min: 5, max: 20 }) },
createUserProfile
),
contacts: {
email: faker.internet.email(),
phone: faker.phone.number(),
website: faker.internet.url()
},
address: {
headquarters: faker.location.streetAddress(true),
coordinates: {
lat: faker.location.latitude(),
lng: faker.location.longitude()
}
}
});
4. Database Seeding with Relationships
async function seedDatabase(knex) {
// Ensure consistent data with seed
faker.seed(123);
// Create companies first
const companies = Array.from({ length: 10 }, createCompany);
await knex('companies').insert(companies);
// Create users with company relationships
const users = companies.flatMap(company =>
Array.from({ length: faker.number.int({ min: 5, max: 20 }) }, () => ({
...createUserProfile(),
companyId: company.id
}))
);
await knex('users').insert(users);
// Create products with company relationships
const products = companies.flatMap(company =>
Array.from({ length: faker.number.int({ min: 10, max: 50 }) }, () => ({
...createProduct(),
manufacturerId: company.id
}))
);
await knex('products').insert(products);
}
Performance Considerations
-
Use batch inserts for large datasets
-
Consider using streams for very large datasets
-
Cache repeated random generations
-
Use faker.helpers.multiple() for generating arrays of data
Real Example - See GitHub Repository
src/
├── config/
│ ├── database.js # Database configuration
│ └── schema.sql # SQL schema definitions
├── models/
│ ├── index.js # Model relationships
│ ├── User.js # User model
│ ├── Company.js # Company model
│ └── Product.js # Product model
├── seeds/
│ ├── seed-database.js # Main seeder
│ ├── seed-large-dataset.js # Large dataset seeder
│ └── seed-international-users.js # International seeder
└── scripts/
└── init-db.js # Database initialization