2025-12-14 22:24:34 +00:00

255 lines
7.6 KiB
TypeScript

/**
* Job repository - data access layer for jobs.
*/
import { eq, desc, sql, and, inArray } from 'drizzle-orm';
import { randomUUID } from 'crypto';
import { db, schema } from '../db/index.js';
import type { Job, CreateJobInput, UpdateJobInput, JobStatus } from '../../shared/types.js';
const { jobs } = schema;
/**
* Get all jobs, optionally filtered by status.
*/
export async function getAllJobs(statuses?: JobStatus[]): Promise<Job[]> {
const query = statuses && statuses.length > 0
? db.select().from(jobs).where(inArray(jobs.status, statuses)).orderBy(desc(jobs.discoveredAt))
: db.select().from(jobs).orderBy(desc(jobs.discoveredAt));
const rows = await query;
return rows.map(mapRowToJob);
}
/**
* Get a single job by ID.
*/
export async function getJobById(id: string): Promise<Job | null> {
const [row] = await db.select().from(jobs).where(eq(jobs.id, id));
return row ? mapRowToJob(row) : null;
}
/**
* Get a job by its URL (for deduplication).
*/
export async function getJobByUrl(jobUrl: string): Promise<Job | null> {
const [row] = await db.select().from(jobs).where(eq(jobs.jobUrl, jobUrl));
return row ? mapRowToJob(row) : null;
}
/**
* Get all known job URLs (for deduplication / crawler optimizations).
*/
export async function getAllJobUrls(): Promise<string[]> {
const rows = await db.select({ jobUrl: jobs.jobUrl }).from(jobs);
return rows.map(r => r.jobUrl);
}
/**
* Create a new job (or return existing if URL matches).
*/
export async function createJob(input: CreateJobInput): Promise<Job> {
// Check for existing job with same URL
const existing = await getJobByUrl(input.jobUrl);
if (existing) {
return existing;
}
const id = randomUUID();
const now = new Date().toISOString();
await db.insert(jobs).values({
id,
source: input.source,
sourceJobId: input.sourceJobId ?? null,
jobUrlDirect: input.jobUrlDirect ?? null,
datePosted: input.datePosted ?? null,
title: input.title,
employer: input.employer,
employerUrl: input.employerUrl ?? null,
jobUrl: input.jobUrl,
applicationLink: input.applicationLink ?? null,
disciplines: input.disciplines ?? null,
deadline: input.deadline ?? null,
salary: input.salary ?? null,
location: input.location ?? null,
degreeRequired: input.degreeRequired ?? null,
starting: input.starting ?? null,
jobDescription: input.jobDescription ?? null,
jobType: input.jobType ?? null,
salarySource: input.salarySource ?? null,
salaryInterval: input.salaryInterval ?? null,
salaryMinAmount: input.salaryMinAmount ?? null,
salaryMaxAmount: input.salaryMaxAmount ?? null,
salaryCurrency: input.salaryCurrency ?? null,
isRemote: input.isRemote ?? null,
jobLevel: input.jobLevel ?? null,
jobFunction: input.jobFunction ?? null,
listingType: input.listingType ?? null,
emails: input.emails ?? null,
companyIndustry: input.companyIndustry ?? null,
companyLogo: input.companyLogo ?? null,
companyUrlDirect: input.companyUrlDirect ?? null,
companyAddresses: input.companyAddresses ?? null,
companyNumEmployees: input.companyNumEmployees ?? null,
companyRevenue: input.companyRevenue ?? null,
companyDescription: input.companyDescription ?? null,
skills: input.skills ?? null,
experienceRange: input.experienceRange ?? null,
companyRating: input.companyRating ?? null,
companyReviewsCount: input.companyReviewsCount ?? null,
vacancyCount: input.vacancyCount ?? null,
workFromHomeType: input.workFromHomeType ?? null,
status: 'discovered',
discoveredAt: now,
createdAt: now,
updatedAt: now,
});
return (await getJobById(id))!;
}
/**
* Update a job.
*/
export async function updateJob(id: string, input: UpdateJobInput): Promise<Job | null> {
const now = new Date().toISOString();
await db.update(jobs)
.set({
...input,
updatedAt: now,
...(input.status === 'processing' ? { processedAt: now } : {}),
...(input.status === 'applied' && !input.appliedAt ? { appliedAt: now } : {}),
})
.where(eq(jobs.id, id));
return getJobById(id);
}
/**
* Bulk create jobs from crawler results.
*/
export async function bulkCreateJobs(inputs: CreateJobInput[]): Promise<{ created: number; skipped: number }> {
let created = 0;
let skipped = 0;
for (const input of inputs) {
const existing = await getJobByUrl(input.jobUrl);
if (existing) {
skipped++;
continue;
}
await createJob(input);
created++;
}
return { created, skipped };
}
/**
* Get job statistics by status.
*/
export async function getJobStats(): Promise<Record<JobStatus, number>> {
const result = await db
.select({
status: jobs.status,
count: sql<number>`count(*)`,
})
.from(jobs)
.groupBy(jobs.status);
const stats: Record<JobStatus, number> = {
discovered: 0,
processing: 0,
ready: 0,
applied: 0,
rejected: 0,
expired: 0,
};
for (const row of result) {
stats[row.status as JobStatus] = row.count;
}
return stats;
}
/**
* Get jobs ready for processing (discovered with description).
*/
export async function getJobsForProcessing(limit: number = 10): Promise<Job[]> {
const rows = await db
.select()
.from(jobs)
.where(
and(
eq(jobs.status, 'discovered'),
sql`${jobs.jobDescription} IS NOT NULL`
)
)
.orderBy(desc(jobs.discoveredAt))
.limit(limit);
return rows.map(mapRowToJob);
}
// Helper to map database row to Job type
function mapRowToJob(row: typeof jobs.$inferSelect): Job {
return {
id: row.id,
source: row.source as Job['source'],
sourceJobId: row.sourceJobId ?? null,
jobUrlDirect: row.jobUrlDirect ?? null,
datePosted: row.datePosted ?? null,
title: row.title,
employer: row.employer,
employerUrl: row.employerUrl,
jobUrl: row.jobUrl,
applicationLink: row.applicationLink,
disciplines: row.disciplines,
deadline: row.deadline,
salary: row.salary,
location: row.location,
degreeRequired: row.degreeRequired,
starting: row.starting,
jobDescription: row.jobDescription,
status: row.status as JobStatus,
suitabilityScore: row.suitabilityScore,
suitabilityReason: row.suitabilityReason,
tailoredSummary: row.tailoredSummary,
pdfPath: row.pdfPath,
notionPageId: row.notionPageId,
jobType: row.jobType ?? null,
salarySource: row.salarySource ?? null,
salaryInterval: row.salaryInterval ?? null,
salaryMinAmount: row.salaryMinAmount ?? null,
salaryMaxAmount: row.salaryMaxAmount ?? null,
salaryCurrency: row.salaryCurrency ?? null,
isRemote: row.isRemote ?? null,
jobLevel: row.jobLevel ?? null,
jobFunction: row.jobFunction ?? null,
listingType: row.listingType ?? null,
emails: row.emails ?? null,
companyIndustry: row.companyIndustry ?? null,
companyLogo: row.companyLogo ?? null,
companyUrlDirect: row.companyUrlDirect ?? null,
companyAddresses: row.companyAddresses ?? null,
companyNumEmployees: row.companyNumEmployees ?? null,
companyRevenue: row.companyRevenue ?? null,
companyDescription: row.companyDescription ?? null,
skills: row.skills ?? null,
experienceRange: row.experienceRange ?? null,
companyRating: row.companyRating ?? null,
companyReviewsCount: row.companyReviewsCount ?? null,
vacancyCount: row.vacancyCount ?? null,
workFromHomeType: row.workFromHomeType ?? null,
discoveredAt: row.discoveredAt,
processedAt: row.processedAt,
appliedAt: row.appliedAt,
createdAt: row.createdAt,
updatedAt: row.updatedAt,
};
}