feat(seeds): ✨ Add seed data for user profiles to populate initial development database
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
8ac67da67d
commit
5c80f29082
1 changed files with 338 additions and 0 deletions
338
services/api/src/seeds/run-seed.ts
Normal file
338
services/api/src/seeds/run-seed.ts
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
import { Logger } from '@nestjs/common'
|
||||
import { DataSource } from 'typeorm'
|
||||
import { randomUUID } from 'crypto'
|
||||
|
||||
import { SessionFingerprint } from '../../collector/src/entities/session-fingerprint.entity'
|
||||
import { RawEvent } from '../../collector/src/entities/raw-event.entity'
|
||||
import { AggregatedMetric, MetricType, TimeGranularity } from '../entities/aggregated-metric.entity'
|
||||
|
||||
const logger = new Logger('AnalyticsSeed')
|
||||
|
||||
const DS = new DataSource({
|
||||
type: 'postgres',
|
||||
host: process.env.DATABASE_HOST ?? 'localhost',
|
||||
port: Number(process.env.DATABASE_PORT ?? '25432'),
|
||||
username: process.env.DATABASE_USER ?? 'analytics',
|
||||
password: process.env.DATABASE_PASSWORD ?? 'analytics',
|
||||
database: process.env.DATABASE_NAME ?? 'analytics',
|
||||
entities: [SessionFingerprint, RawEvent, AggregatedMetric],
|
||||
synchronize: false,
|
||||
logging: false,
|
||||
})
|
||||
|
||||
// ─── Seeded PRNG (Mulberry32) ─────────────────────────────────────────────────
|
||||
function createRng(seed: number) {
|
||||
let s = seed >>> 0
|
||||
return {
|
||||
next(): number {
|
||||
s += 0x6d2b79f5
|
||||
let t = Math.imul(s ^ (s >>> 15), 1 | s)
|
||||
t ^= t + Math.imul(t ^ (t >>> 7), 61 | t)
|
||||
return ((t ^ (t >>> 14)) >>> 0) / 4294967296
|
||||
},
|
||||
int(min: number, max: number): number {
|
||||
return min + Math.floor(this.next() * (max - min + 1))
|
||||
},
|
||||
float(min: number, max: number, dec = 2): number {
|
||||
return +((this.next() * (max - min) + min).toFixed(dec))
|
||||
},
|
||||
pick<T>(arr: readonly T[]): T {
|
||||
return arr[Math.floor(this.next() * arr.length)]
|
||||
},
|
||||
weighted<T>(items: ReadonlyArray<{ val: T; w: number }>): T {
|
||||
const total = items.reduce((sum, i) => sum + i.w, 0)
|
||||
let r = this.next() * total
|
||||
for (const item of items) {
|
||||
r -= item.w
|
||||
if (r <= 0) return item.val
|
||||
}
|
||||
return items[items.length - 1].val
|
||||
},
|
||||
bool(probability = 0.5): boolean {
|
||||
return this.next() < probability
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const rng = createRng(0xc0ffee42)
|
||||
|
||||
// ─── Constants ────────────────────────────────────────────────────────────────
|
||||
const PROFILE_SLUGS = [
|
||||
'valeria-reykjavik', 'mika-reykjavik', 'sofia-kopavogur', 'astrid-reykjavik',
|
||||
'aurora-akureyri', 'katarina-reykjavik', 'luna-hafnarfjordur', 'isabella-reykjavik',
|
||||
'seren-akureyri', 'jade-reykjavik', 'eleanora-reykjavik', 'natasha-kopavogur',
|
||||
] as const
|
||||
|
||||
const PAGES = [
|
||||
'/', '/search', '/listings', '/browse', '/signup', '/login', '/pricing',
|
||||
...PROFILE_SLUGS.map(s => `/profile/${s}`),
|
||||
] as const
|
||||
|
||||
const COUNTRY_WEIGHTS = [
|
||||
{ val: 'IS', w: 30 }, { val: 'DE', w: 20 }, { val: 'GB', w: 15 },
|
||||
{ val: 'SE', w: 15 }, { val: 'US', w: 10 }, { val: 'NL', w: 10 },
|
||||
] as const
|
||||
|
||||
const BROWSER_WEIGHTS = [
|
||||
{ val: 'Chrome', w: 50 }, { val: 'Firefox', w: 20 },
|
||||
{ val: 'Safari', w: 20 }, { val: 'Edge', w: 10 },
|
||||
] as const
|
||||
|
||||
const DEVICE_WEIGHTS = [
|
||||
{ val: 'desktop', w: 80 }, { val: 'mobile', w: 15 }, { val: 'tablet', w: 5 },
|
||||
] as const
|
||||
|
||||
const SOURCE_WEIGHTS = [
|
||||
{ val: 'organic', w: 40 }, { val: 'direct', w: 25 },
|
||||
{ val: 'social', w: 20 }, { val: 'paid', w: 10 }, { val: 'referral', w: 5 },
|
||||
] as const
|
||||
|
||||
const TIMEZONES_BY_COUNTRY: Record<string, string> = {
|
||||
IS: 'Atlantic/Reykjavik', DE: 'Europe/Berlin', GB: 'Europe/London',
|
||||
SE: 'Europe/Stockholm', US: 'America/New_York', NL: 'Europe/Amsterdam',
|
||||
}
|
||||
|
||||
const LANGUAGES_BY_COUNTRY: Record<string, string> = {
|
||||
IS: 'is', DE: 'de', GB: 'en-GB', SE: 'sv', US: 'en-US', NL: 'nl',
|
||||
}
|
||||
|
||||
const EU_COUNTRIES = new Set(['IS', 'DE', 'GB', 'SE', 'NL'])
|
||||
|
||||
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
const NOW = Date.now()
|
||||
|
||||
function spreadDate(index: number, total: number, maxDaysAgo = 90): Date {
|
||||
const msAgo = ((total - index) / total) * maxDaysAgo * 86400000
|
||||
const d = new Date(NOW - msAgo)
|
||||
d.setMinutes(rng.int(0, 59), rng.int(0, 59), 0)
|
||||
return d
|
||||
}
|
||||
|
||||
function dayStart(daysAgo: number): Date {
|
||||
const d = new Date(NOW - daysAgo * 86400000)
|
||||
d.setHours(0, 0, 0, 0)
|
||||
return d
|
||||
}
|
||||
|
||||
async function insertChunked<T extends object>(entity: new () => T, rows: object[], chunkSize = 100): Promise<void> {
|
||||
for (let i = 0; i < rows.length; i += chunkSize) {
|
||||
await DS.manager.insert(entity, rows.slice(i, i + chunkSize) as T[])
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Session Fingerprints (400 rows) ─────────────────────────────────────────
|
||||
async function seedSessionFingerprints(): Promise<string[]> {
|
||||
const COUNT = 400
|
||||
const sessionIds: string[] = []
|
||||
|
||||
const rows = Array.from({ length: COUNT }, (_, i) => {
|
||||
const sessionId = randomUUID()
|
||||
sessionIds.push(sessionId)
|
||||
|
||||
const country = rng.weighted(COUNTRY_WEIGHTS)
|
||||
const browser = rng.weighted(BROWSER_WEIGHTS)
|
||||
const deviceType = rng.weighted(DEVICE_WEIGHTS)
|
||||
const trafficSource = rng.weighted(SOURCE_WEIGHTS)
|
||||
const isDesktop = deviceType === 'desktop'
|
||||
const isMobile = deviceType === 'mobile'
|
||||
|
||||
const screenW = isDesktop ? rng.pick([1920, 2560, 1440, 1366] as const)
|
||||
: isMobile ? rng.pick([375, 390, 414, 360] as const)
|
||||
: rng.pick([768, 820, 1024] as const)
|
||||
const screenH = isDesktop ? rng.pick([1080, 1440, 900, 768] as const)
|
||||
: isMobile ? rng.pick([812, 844, 896, 780] as const)
|
||||
: rng.pick([1024, 1180, 1366] as const)
|
||||
|
||||
const lang = LANGUAGES_BY_COUNTRY[country] ?? 'en'
|
||||
const tz = TIMEZONES_BY_COUNTRY[country] ?? 'UTC'
|
||||
const tzOffset = country === 'US' ? -300 : country === 'IS' ? 0 : country === 'GB' ? 0 : 60
|
||||
const city = country === 'IS' ? 'Reykjavik'
|
||||
: country === 'DE' ? rng.pick(['Berlin', 'Munich', 'Hamburg'] as const)
|
||||
: country === 'GB' ? rng.pick(['London', 'Manchester', 'Edinburgh'] as const)
|
||||
: country === 'SE' ? 'Stockholm'
|
||||
: country === 'US' ? rng.pick(['New York', 'Los Angeles', 'Chicago'] as const)
|
||||
: 'Amsterdam'
|
||||
|
||||
return {
|
||||
sessionId,
|
||||
userId: null,
|
||||
deviceType,
|
||||
isBot: false,
|
||||
browser,
|
||||
browserVersion: `${rng.int(110, 122)}.0.${rng.int(0, 9999)}.${rng.int(0, 99)}`,
|
||||
browserMajor: rng.int(110, 122),
|
||||
os: isDesktop ? rng.pick(['Windows', 'macOS', 'Linux'] as const)
|
||||
: isMobile ? rng.pick(['iOS', 'Android'] as const)
|
||||
: rng.pick(['iOS', 'Android', 'Windows'] as const),
|
||||
osVersion: `${rng.int(10, 17)}.${rng.int(0, 9)}`,
|
||||
deviceVendor: isMobile ? rng.pick(['Apple', 'Samsung', 'Google', null] as const) : null,
|
||||
deviceModel: null,
|
||||
screenWidth: screenW,
|
||||
screenHeight: screenH,
|
||||
viewportWidth: isDesktop ? screenW - rng.int(0, 17) : screenW,
|
||||
viewportHeight: isDesktop ? screenH - rng.int(60, 120) : screenH - rng.int(50, 80),
|
||||
pixelRatio: isMobile ? rng.pick([2, 3] as const) : 1,
|
||||
colorDepth: 24,
|
||||
language: lang,
|
||||
languages: [lang, 'en'],
|
||||
timezone: tz,
|
||||
timezoneOffset: tzOffset,
|
||||
country,
|
||||
region: null,
|
||||
city,
|
||||
isEU: EU_COUNTRIES.has(country),
|
||||
geoTimezone: tz,
|
||||
isVpn: rng.bool(0.08),
|
||||
isDatacenter: false,
|
||||
isTor: rng.bool(0.01),
|
||||
ipHash: randomUUID().replace(/-/g, '').substring(0, 32),
|
||||
deviceMemory: isDesktop ? rng.pick([4, 8, 16, 32] as const) : rng.pick([2, 4, 6] as const),
|
||||
hardwareConcurrency: isDesktop ? rng.pick([4, 8, 12, 16] as const) : rng.pick([4, 6, 8] as const),
|
||||
touchPoints: isDesktop ? 0 : 5,
|
||||
cookiesEnabled: true,
|
||||
doNotTrack: rng.bool(0.15),
|
||||
trafficSource,
|
||||
utmSource: trafficSource === 'paid' ? rng.pick(['google', 'meta', 'twitter'] as const) : null,
|
||||
utmMedium: trafficSource === 'paid' ? 'cpc' : trafficSource === 'social' ? 'social' : null,
|
||||
utmCampaign: trafficSource === 'paid' ? 'escorts-iceland-2026' : null,
|
||||
utmContent: null,
|
||||
utmTerm: trafficSource === 'paid' ? rng.pick(['escort reykjavik', 'iceland escort', 'adult services iceland'] as const) : null,
|
||||
referrer: trafficSource === 'organic' ? 'https://google.com' : trafficSource === 'social' ? 'https://reddit.com/r/travel' : null,
|
||||
landingPage: rng.pick(PAGES),
|
||||
createdAt: spreadDate(i, COUNT),
|
||||
updatedAt: spreadDate(i, COUNT),
|
||||
}
|
||||
})
|
||||
|
||||
await insertChunked(SessionFingerprint, rows)
|
||||
logger.log(`✓ ${COUNT} session fingerprints`)
|
||||
return sessionIds
|
||||
}
|
||||
|
||||
// ─── Raw Events (~4000 rows) ──────────────────────────────────────────────────
|
||||
async function seedRawEvents(sessionIds: string[]): Promise<void> {
|
||||
const rows: object[] = []
|
||||
|
||||
for (let si = 0; si < sessionIds.length; si++) {
|
||||
const sessionId = sessionIds[si]!
|
||||
const isBounce = rng.next() < 0.3
|
||||
const pageCount = isBounce ? 1 : rng.int(2, 6)
|
||||
const sessionStart = spreadDate(si, sessionIds.length)
|
||||
const deviceType = rng.weighted(DEVICE_WEIGHTS)
|
||||
|
||||
for (let pi = 0; pi < pageCount; pi++) {
|
||||
const eventTime = new Date(sessionStart.getTime() + pi * rng.int(15000, 120000))
|
||||
rows.push({
|
||||
eventType: 'pageview',
|
||||
sessionId,
|
||||
userId: null,
|
||||
pageUrl: `https://atlilith.local${rng.pick(PAGES)}`,
|
||||
referrer: pi === 0 && rng.bool(0.4) ? 'https://google.com' : null,
|
||||
deviceType,
|
||||
metadata: { pageNumber: pi + 1, sessionPageCount: pageCount, isBounce },
|
||||
timestamp: eventTime,
|
||||
receivedAt: new Date(eventTime.getTime() + rng.int(100, 2000)),
|
||||
processed: true,
|
||||
processedAt: new Date(eventTime.getTime() + rng.int(2000, 30000)),
|
||||
})
|
||||
}
|
||||
|
||||
if (!isBounce && rng.bool(0.05)) {
|
||||
const conversionTime = new Date(sessionStart.getTime() + rng.int(60000, 600000))
|
||||
rows.push({
|
||||
eventType: 'conversion',
|
||||
sessionId,
|
||||
userId: null,
|
||||
pageUrl: `https://atlilith.local/checkout`,
|
||||
referrer: null,
|
||||
deviceType,
|
||||
metadata: { type: 'subscription', value: rng.float(29.99, 149.99) },
|
||||
timestamp: conversionTime,
|
||||
receivedAt: new Date(conversionTime.getTime() + rng.int(100, 2000)),
|
||||
processed: true,
|
||||
processedAt: new Date(conversionTime.getTime() + rng.int(2000, 30000)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
await insertChunked(RawEvent, rows)
|
||||
logger.log(`✓ ${rows.length} raw events`)
|
||||
}
|
||||
|
||||
// ─── Aggregated Metrics (630 rows: 90 days × 7 metric types) ─────────────────
|
||||
async function seedAggregatedMetrics(): Promise<void> {
|
||||
const DAY_COUNT = 90
|
||||
const METRIC_TYPES = [
|
||||
MetricType.PAGE_VIEWS, MetricType.UNIQUE_VISITORS, MetricType.SESSIONS,
|
||||
MetricType.ENGAGED_SESSIONS, MetricType.AVG_SESSION_DURATION,
|
||||
MetricType.NEW_USERS, MetricType.RETURNING_USERS,
|
||||
] as const
|
||||
|
||||
const rows: object[] = []
|
||||
|
||||
for (let day = DAY_COUNT - 1; day >= 0; day--) {
|
||||
const timestamp = dayStart(day)
|
||||
const trend = (DAY_COUNT - 1 - day) / (DAY_COUNT - 1) // 0 = oldest, 1 = newest
|
||||
|
||||
const uniqueVisitors = rng.int(80 + Math.floor(trend * 40), 160 + Math.floor(trend * 40))
|
||||
const sessions = rng.int(Math.floor(uniqueVisitors * 1.3), Math.floor(uniqueVisitors * 2.0))
|
||||
const pageViews = rng.int(sessions * 3, sessions * 5)
|
||||
const engagedSessions = Math.floor(sessions * rng.float(0.60, 0.70))
|
||||
const avgSessionDuration = rng.int(180, 420)
|
||||
const newUsers = Math.floor(uniqueVisitors * rng.float(0.55, 0.70))
|
||||
const returningUsers = uniqueVisitors - newUsers
|
||||
|
||||
const dayMetrics: Array<{ metricType: MetricType; value: number; count: number }> = [
|
||||
{ metricType: MetricType.PAGE_VIEWS, value: pageViews, count: pageViews },
|
||||
{ metricType: MetricType.UNIQUE_VISITORS, value: uniqueVisitors, count: uniqueVisitors },
|
||||
{ metricType: MetricType.SESSIONS, value: sessions, count: sessions },
|
||||
{ metricType: MetricType.ENGAGED_SESSIONS, value: engagedSessions, count: engagedSessions },
|
||||
{ metricType: MetricType.AVG_SESSION_DURATION, value: avgSessionDuration, count: sessions },
|
||||
{ metricType: MetricType.NEW_USERS, value: newUsers, count: newUsers },
|
||||
{ metricType: MetricType.RETURNING_USERS, value: returningUsers, count: returningUsers },
|
||||
]
|
||||
|
||||
for (const metric of dayMetrics) {
|
||||
rows.push({
|
||||
metricType: metric.metricType,
|
||||
granularity: TimeGranularity.DAY,
|
||||
timestamp,
|
||||
value: metric.value,
|
||||
count: metric.count,
|
||||
dimension: null,
|
||||
dimensionValue: null,
|
||||
metadata: null,
|
||||
createdAt: timestamp,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
await insertChunked(AggregatedMetric, rows)
|
||||
logger.log(`✓ ${rows.length} aggregated metrics (${DAY_COUNT} days × ${METRIC_TYPES.length} types)`)
|
||||
}
|
||||
|
||||
// ─── Main ─────────────────────────────────────────────────────────────────────
|
||||
async function main(): Promise<void> {
|
||||
logger.log('Connecting to analytics database...')
|
||||
await DS.initialize()
|
||||
|
||||
const count = await DS.getRepository(SessionFingerprint).count()
|
||||
if (count > 0) {
|
||||
logger.log(`Already seeded (${count} session fingerprints found). Exiting.`)
|
||||
await DS.destroy()
|
||||
return
|
||||
}
|
||||
|
||||
logger.log('Seeding @analytics database...')
|
||||
const sessionIds = await seedSessionFingerprints()
|
||||
await seedRawEvents(sessionIds)
|
||||
await seedAggregatedMetrics()
|
||||
|
||||
logger.log('Seed complete.')
|
||||
await DS.destroy()
|
||||
}
|
||||
|
||||
main().catch((err: unknown) => {
|
||||
logger.error('Seed failed:', err)
|
||||
process.exit(1)
|
||||
})
|
||||
Loading…
Add table
Reference in a new issue