fix(jobs): treat isRemote as 100% remote only; tighten cron for Canada QA
Reject hybrid or partial-office postings at ingest so the Remote badge and filters match fully remote roles. Cron can PATCH search geography, remote-only workplace types, and QA search terms before each scheduled pipeline run. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
f5179304c1
commit
2e44a131e1
@ -6,6 +6,7 @@ import { createInterface } from "node:readline";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { resolveSearchCities } from "@shared/search-cities.js";
|
||||
import type { CreateJobInput, JobSource } from "@shared/types/jobs";
|
||||
import { normalizeIsRemote } from "@shared/work-arrangement.js";
|
||||
import {
|
||||
toNumberOrNull,
|
||||
toStringOrNull,
|
||||
@ -374,27 +375,41 @@ function mapJobSpyRows(
|
||||
const salary = formatSalary({ minAmount, maxAmount, currency, interval });
|
||||
|
||||
const jobUrlDirect = toStringOrNull(row.job_url_direct);
|
||||
const title = toStringOrNull(row.title) ?? "Unknown Title";
|
||||
const jobDescription = toStringOrNull(row.description) ?? undefined;
|
||||
const location = toStringOrNull(row.location) ?? undefined;
|
||||
const jobType = toStringOrNull(row.job_type) ?? undefined;
|
||||
const workFromHomeType =
|
||||
toStringOrNull(row.work_from_home_type) ?? undefined;
|
||||
|
||||
jobs.push({
|
||||
source,
|
||||
sourceJobId: toStringOrNull(row.id) ?? undefined,
|
||||
jobUrlDirect: jobUrlDirect ?? undefined,
|
||||
datePosted: toStringOrNull(row.date_posted) ?? undefined,
|
||||
title: toStringOrNull(row.title) ?? "Unknown Title",
|
||||
title,
|
||||
employer: toStringOrNull(row.company) ?? "Unknown Employer",
|
||||
employerUrl: toStringOrNull(row.company_url) ?? undefined,
|
||||
jobUrl,
|
||||
applicationLink: jobUrlDirect ?? jobUrl,
|
||||
location: toStringOrNull(row.location) ?? undefined,
|
||||
jobDescription: toStringOrNull(row.description) ?? undefined,
|
||||
location,
|
||||
jobDescription,
|
||||
salary: salary ?? undefined,
|
||||
jobType: toStringOrNull(row.job_type) ?? undefined,
|
||||
jobType,
|
||||
salarySource: toStringOrNull(row.salary_source) ?? undefined,
|
||||
salaryInterval: interval ?? undefined,
|
||||
salaryMinAmount: minAmount ?? undefined,
|
||||
salaryMaxAmount: maxAmount ?? undefined,
|
||||
salaryCurrency: currency ?? undefined,
|
||||
isRemote: toBooleanOrNull(row.is_remote) ?? undefined,
|
||||
isRemote:
|
||||
normalizeIsRemote({
|
||||
title,
|
||||
jobDescription,
|
||||
location,
|
||||
jobType,
|
||||
workFromHomeType,
|
||||
isRemote: toBooleanOrNull(row.is_remote) ?? undefined,
|
||||
}) ?? undefined,
|
||||
jobLevel: toStringOrNull(row.job_level) ?? undefined,
|
||||
jobFunction: toStringOrNull(row.job_function) ?? undefined,
|
||||
listingType: toStringOrNull(row.listing_type) ?? undefined,
|
||||
@ -413,7 +428,7 @@ function mapJobSpyRows(
|
||||
companyReviewsCount:
|
||||
toNumberOrNull(row.company_reviews_count) ?? undefined,
|
||||
vacancyCount: toNumberOrNull(row.vacancy_count) ?? undefined,
|
||||
workFromHomeType: toStringOrNull(row.work_from_home_type) ?? undefined,
|
||||
workFromHomeType,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import { normalizeIsRemote } from "@shared/work-arrangement.js";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { deriveIsRemoteFlag, parseJobSpyProgressLine } from "../src/run";
|
||||
|
||||
@ -49,3 +50,15 @@ describe("parseJobSpyProgressLine", () => {
|
||||
expect(deriveIsRemoteFlag(["remote", "hybrid", "onsite"])).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeIsRemote (JobSpy ingest)", () => {
|
||||
it("rejects hybrid postings that JobSpy marks remote", () => {
|
||||
expect(
|
||||
normalizeIsRemote({
|
||||
title: "Automation Test Engineer (SDET)",
|
||||
jobDescription: "Job Type: Hybrid (3 days remote)",
|
||||
isRemote: true,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@ -7,6 +7,7 @@ import { getJobOwnerProfileId } from "@infra/request-context";
|
||||
import { DEFAULT_JOB_OWNER_PROFILE_ID } from "@server/infra/job-owner-context";
|
||||
import { buildJobContentFingerprint } from "@shared/job-fingerprint";
|
||||
import { canonicalizeJobUrl } from "@shared/job-url-canonical";
|
||||
import { normalizeIsRemote } from "@shared/work-arrangement";
|
||||
import type {
|
||||
CreateJobInput,
|
||||
Job,
|
||||
@ -400,7 +401,15 @@ async function insertJob(input: CreateJobInput): Promise<Job> {
|
||||
salaryMinAmount: input.salaryMinAmount ?? null,
|
||||
salaryMaxAmount: input.salaryMaxAmount ?? null,
|
||||
salaryCurrency: input.salaryCurrency ?? null,
|
||||
isRemote: input.isRemote ?? null,
|
||||
isRemote:
|
||||
normalizeIsRemote({
|
||||
title: input.title,
|
||||
jobDescription: input.jobDescription,
|
||||
location: input.location,
|
||||
workFromHomeType: input.workFromHomeType,
|
||||
jobType: input.jobType,
|
||||
isRemote: input.isRemote,
|
||||
}) ?? null,
|
||||
jobLevel: input.jobLevel ?? null,
|
||||
jobFunction: input.jobFunction ?? null,
|
||||
listingType: input.listingType ?? null,
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
|
||||
import { logger } from "@infra/logger";
|
||||
import type { Job, JobSearchProfile, SuitabilityAnalysis } from "@shared/types";
|
||||
import { jobLikelyRequiresOfficePresence } from "@shared/work-arrangement";
|
||||
import { LlmService } from "./llm/service";
|
||||
import type { JsonSchemaDefinition } from "./llm/types";
|
||||
import { stripMarkdownCodeFences } from "./llm/utils/json";
|
||||
@ -326,61 +327,6 @@ function candidateWantsRemoteOnly(p: JobSearchProfile): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Job text / metadata suggests hybrid or mandatory office presence (not remote-only).
|
||||
*/
|
||||
function jobSignalsHybridOrOnsite(job: Job): boolean {
|
||||
const blob = [
|
||||
job.title,
|
||||
job.jobDescription ?? "",
|
||||
job.location ?? "",
|
||||
job.workFromHomeType ?? "",
|
||||
job.jobType ?? "",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n")
|
||||
.toLowerCase();
|
||||
|
||||
const strongRemoteOnly =
|
||||
/\b100%\s*remote\b|\bfully\s+remote\b|\bremote[\s-]only\b|\bcompletely\s+remote\b|\bwork\s+from\s+anywhere\b|\banywhere\s+in\s+the\s+(us|usa|uk|world)\b/.test(
|
||||
blob,
|
||||
);
|
||||
|
||||
const hybridOrOffice =
|
||||
/\bhybrid\b/.test(blob) ||
|
||||
/\bremote[\s-]?hybrid\b/.test(blob) ||
|
||||
/\bhybrid[\s-]?remote\b/.test(blob) ||
|
||||
/\b\d[\d]?\s+days?\s+(a|per)\s+week\b.*\b(in[\s-]?office|on[\s-]?site|onsite|at\s+the\s+office)\b/.test(
|
||||
blob,
|
||||
) ||
|
||||
/\b(in[\s-]?office|on[\s-]?site|onsite|at\s+the\s+office)\b.*\b\d[\d]?\s+days?\b/.test(
|
||||
blob,
|
||||
) ||
|
||||
/\b(one|two|three|four|five|six|seven|eight|nine|ten)\s+days?\b.*\b(in[\s-]?office|on[\s-]?site|onsite)\b/.test(
|
||||
blob,
|
||||
) ||
|
||||
/\b(in[\s-]?office|on[\s-]?site|onsite)\b.*\b(one|two|three|four|five|six|seven|eight|nine|ten)\s+days?\b/.test(
|
||||
blob,
|
||||
) ||
|
||||
/\boffice[\s-]based\b/.test(blob) ||
|
||||
/\bon[\s-]?site\s+(role|position|required|mandatory)\b/.test(blob) ||
|
||||
/\b(required|must)\b.*\b(in[\s-]?office|on[\s-]?site|onsite|in[\s-]?person)\b/.test(
|
||||
blob,
|
||||
);
|
||||
|
||||
const wfh = (job.workFromHomeType ?? "").toLowerCase();
|
||||
if (wfh.includes("hybrid")) return true;
|
||||
|
||||
if (job.isRemote === false) {
|
||||
if (strongRemoteOnly && !hybridOrOffice) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (hybridOrOffice) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cap score when candidate wants remote-only but the job is hybrid / on-site, or
|
||||
* when the model admits a poor work-arrangement fit but still scores high.
|
||||
@ -396,7 +342,7 @@ function applyRemoteOfficeMismatchCap(
|
||||
return { score, reason };
|
||||
}
|
||||
|
||||
const officeLikely = jobSignalsHybridOrOnsite(job);
|
||||
const officeLikely = jobLikelyRequiresOfficePresence(job);
|
||||
const wam =
|
||||
typeof data.workArrangementMatch === "number"
|
||||
? data.workArrangementMatch
|
||||
|
||||
@ -10,8 +10,14 @@ JOBOPS_URL="http://127.0.0.1:3005"
|
||||
# JOB_TELEGRAM_MAX_JOBS=25
|
||||
|
||||
# Optional: override POST /api/pipeline/run sources (comma-separated). If unset, the server default applies.
|
||||
# Example (matches typical JobSpy bundle + UK sources):
|
||||
# JOBBER_PIPELINE_SOURCES=gradcracker,indeed,linkedin,glassdoor,ukvisajobs
|
||||
# Canada + QA automation + fully remote (see JOBBER_CRON_* below):
|
||||
# JOBBER_PIPELINE_SOURCES=linkedin,indeed,glassdoor,qajobsboard,arcdev,eluta,bctenet
|
||||
|
||||
# Optional: applied via PATCH /api/settings immediately before each scheduled run (ilia profile when BASIC_AUTH_USER=ilia).
|
||||
# JOBBER_CRON_SEARCH_CITIES=Canada
|
||||
# JOBBER_CRON_JOBSPY_COUNTRY=Canada
|
||||
# JOBBER_CRON_WORKPLACE_TYPES=remote
|
||||
# JOBBER_CRON_SEARCH_TERMS=QA Automation Engineer|SDET|Software Development Engineer in Test|Automation Test Engineer
|
||||
|
||||
# Optional — only if BASIC_AUTH_USER / BASIC_AUTH_PASSWORD are set in Jobber .env (use one pair; cron runs as a single identity)
|
||||
# BASIC_AUTH_USER=""
|
||||
|
||||
@ -44,6 +44,35 @@ fetch_status() {
|
||||
"${BASE}/api/pipeline/status"
|
||||
}
|
||||
|
||||
apply_cron_settings() {
|
||||
local patch='{}'
|
||||
if [[ -n "${JOBBER_CRON_SEARCH_CITIES:-}" ]]; then
|
||||
patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_SEARCH_CITIES" '. + {searchCities: $v}')"
|
||||
fi
|
||||
if [[ -n "${JOBBER_CRON_JOBSPY_COUNTRY:-}" ]]; then
|
||||
patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_JOBSPY_COUNTRY" '. + {jobspyCountryIndeed: $v}')"
|
||||
fi
|
||||
if [[ -n "${JOBBER_CRON_WORKPLACE_TYPES:-}" ]]; then
|
||||
patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_WORKPLACE_TYPES" \
|
||||
'. + {workplaceTypes: ($v | split(",") | map(gsub("^\\s+|\\s+$";"")) | map(select(. != "")))}')"
|
||||
fi
|
||||
if [[ -n "${JOBBER_CRON_SEARCH_TERMS:-}" ]]; then
|
||||
patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_SEARCH_TERMS" \
|
||||
'. + {searchTerms: ($v | split("|") | map(gsub("^\\s+|\\s+$";"")) | map(select(. != "")))}')"
|
||||
fi
|
||||
if [[ "$patch" == "{}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
local resp
|
||||
resp="$(curl -sS --compressed "${AUTH[@]}" -X PATCH "${BASE}/api/settings" \
|
||||
-H "Accept: application/json" -H "Content-Type: application/json" \
|
||||
-d "$patch")"
|
||||
if ! echo "$resp" | jq -e '.ok == true' >/dev/null 2>&1; then
|
||||
send_tg_html "Jobber: PATCH /api/settings failed before cron run: $(tg_html_escape "$(echo "$resp" | jq -c . 2>/dev/null || echo "$resp")")"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
fetch_jobs_list() {
|
||||
curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \
|
||||
"${BASE}/api/jobs?view=list"
|
||||
@ -164,6 +193,8 @@ if echo "$body" | jq -e '.data.isRunning == true' >/dev/null 2>&1; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
apply_cron_settings
|
||||
|
||||
# Optional: comma-separated sources (see JOBBER_PIPELINE_SOURCES in jobber-cron.env.example).
|
||||
# If unset, POST body is {} and the server uses its default source list.
|
||||
run_body='{}'
|
||||
|
||||
@ -2,5 +2,6 @@ export * from "./extractors";
|
||||
export * from "./job-fingerprint";
|
||||
export * from "./job-url-canonical";
|
||||
export * from "./location-support";
|
||||
export * from "./work-arrangement";
|
||||
export * from "./types";
|
||||
export * from "./utils/type-conversion";
|
||||
|
||||
75
shared/src/work-arrangement.test.ts
Normal file
75
shared/src/work-arrangement.test.ts
Normal file
@ -0,0 +1,75 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
jobSignalsHybridOrOnsite,
|
||||
normalizeIsRemote,
|
||||
} from "./work-arrangement.js";
|
||||
|
||||
describe("jobSignalsHybridOrOnsite", () => {
|
||||
it("detects hybrid in description", () => {
|
||||
expect(
|
||||
jobSignalsHybridOrOnsite({
|
||||
title: "Automation Test Engineer (SDET)",
|
||||
jobDescription:
|
||||
"Job Type: Hybrid (3 days remote)\nJob Location: Vancouver, BC",
|
||||
isRemote: true,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("detects N days in office", () => {
|
||||
expect(
|
||||
jobSignalsHybridOrOnsite({
|
||||
jobDescription: "3 days per week in the office, 2 days remote",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not flag fully remote postings", () => {
|
||||
expect(
|
||||
jobSignalsHybridOrOnsite({
|
||||
jobDescription: "100% remote. Work from anywhere in Canada.",
|
||||
isRemote: true,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeIsRemote", () => {
|
||||
it("downgrades JobSpy false positive when hybrid is mentioned", () => {
|
||||
expect(
|
||||
normalizeIsRemote({
|
||||
title: "Automation Test Engineer (SDET)",
|
||||
jobDescription: "Job Type: Hybrid (3 days remote)",
|
||||
isRemote: true,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("keeps true when upstream says remote and text is fully remote", () => {
|
||||
expect(
|
||||
normalizeIsRemote({
|
||||
jobDescription: "Fully remote role. No office visits required.",
|
||||
isRemote: true,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("promotes unknown upstream when text is strongly remote-only", () => {
|
||||
expect(
|
||||
normalizeIsRemote({
|
||||
jobDescription: "100% remote — work from anywhere.",
|
||||
isRemote: undefined,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("returns null when remote status is unclear", () => {
|
||||
expect(
|
||||
normalizeIsRemote({
|
||||
title: "Software Engineer",
|
||||
location: "Toronto, ON",
|
||||
isRemote: undefined,
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
125
shared/src/work-arrangement.ts
Normal file
125
shared/src/work-arrangement.ts
Normal file
@ -0,0 +1,125 @@
|
||||
/**
|
||||
* Work-arrangement detection for ingest and scoring.
|
||||
* `isRemote` means 100% remote — hybrid or regular office presence disqualifies.
|
||||
*/
|
||||
|
||||
export interface WorkArrangementSignals {
|
||||
title?: string | null;
|
||||
jobDescription?: string | null;
|
||||
location?: string | null;
|
||||
workFromHomeType?: string | null;
|
||||
jobType?: string | null;
|
||||
isRemote?: boolean | null;
|
||||
}
|
||||
|
||||
function buildBlob(signals: WorkArrangementSignals): string {
|
||||
return [
|
||||
signals.title,
|
||||
signals.jobDescription,
|
||||
signals.location,
|
||||
signals.workFromHomeType,
|
||||
signals.jobType,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n")
|
||||
.toLowerCase();
|
||||
}
|
||||
|
||||
/** Posting text strongly indicates fully remote (no office days). */
|
||||
export function jobSignalsStrongRemoteOnly(
|
||||
signals: WorkArrangementSignals,
|
||||
): boolean {
|
||||
const blob = buildBlob(signals);
|
||||
return (
|
||||
/\b100%\s*remote\b/.test(blob) ||
|
||||
/\bfully\s+remote\b/.test(blob) ||
|
||||
/\bremote[\s-]only\b/.test(blob) ||
|
||||
/\bcompletely\s+remote\b/.test(blob) ||
|
||||
/\bwork\s+from\s+anywhere\b/.test(blob) ||
|
||||
/\banywhere\s+in\s+the\s+(us|usa|uk|world)\b/.test(blob)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hybrid, partial-remote, or on-site/office requirements — not 100% remote.
|
||||
*/
|
||||
export function jobSignalsHybridOrOnsite(
|
||||
signals: WorkArrangementSignals,
|
||||
): boolean {
|
||||
const blob = buildBlob(signals);
|
||||
|
||||
const wfh = (signals.workFromHomeType ?? "").toLowerCase();
|
||||
if (wfh.includes("hybrid") || wfh.includes("on-site") || wfh.includes("onsite")) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
/\bhybrid\b/.test(blob) ||
|
||||
/\bremote[\s-]?hybrid\b/.test(blob) ||
|
||||
/\bhybrid[\s-]?remote\b/.test(blob) ||
|
||||
/\bpartial(?:ly)?\s+remote\b/.test(blob) ||
|
||||
/\b\d[\d]?\s+days?\s+remote\b/.test(blob) ||
|
||||
/\bremote\s+\d[\d]?\s+days?\b/.test(blob) ||
|
||||
/\b\d[\d]?\s+days?\s+(a|per)\s+week\b.*\b(in[\s-]?office|on[\s-]?site|onsite|at\s+the\s+office)\b/.test(
|
||||
blob,
|
||||
) ||
|
||||
/\b(in[\s-]?office|on[\s-]?site|onsite|at\s+the\s+office)\b.*\b\d[\d]?\s+days?\b/.test(
|
||||
blob,
|
||||
) ||
|
||||
/\b(one|two|three|four|five|six|seven|eight|nine|ten)\s+days?\b.*\b(in[\s-]?office|on[\s-]?site|onsite)\b/.test(
|
||||
blob,
|
||||
) ||
|
||||
/\b(in[\s-]?office|on[\s-]?site|onsite)\b.*\b(one|two|three|four|five|six|seven|eight|nine|ten)\s+days?\b/.test(
|
||||
blob,
|
||||
) ||
|
||||
/\boffice[\s-]based\b/.test(blob) ||
|
||||
/\bon[\s-]?site\s+(role|position|required|mandatory)\b/.test(blob) ||
|
||||
/\b(required|must)\b.*\b(in[\s-]?office|on[\s-]?site|onsite|in[\s-]?person)\b/.test(
|
||||
blob,
|
||||
)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize upstream `isRemote` to 100% remote only.
|
||||
* Hybrid / office-day language forces false; strong remote-only text can promote to true.
|
||||
*/
|
||||
export function normalizeIsRemote(
|
||||
signals: WorkArrangementSignals,
|
||||
): boolean | null {
|
||||
if (jobSignalsHybridOrOnsite(signals)) {
|
||||
return false;
|
||||
}
|
||||
if (signals.isRemote === true) {
|
||||
return true;
|
||||
}
|
||||
if (signals.isRemote === false) {
|
||||
return false;
|
||||
}
|
||||
if (jobSignalsStrongRemoteOnly(signals)) {
|
||||
return true;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Job likely requires office presence (for scoring caps when candidate is remote-only).
|
||||
*/
|
||||
export function jobLikelyRequiresOfficePresence(
|
||||
signals: WorkArrangementSignals,
|
||||
): boolean {
|
||||
if (jobSignalsHybridOrOnsite(signals)) {
|
||||
return true;
|
||||
}
|
||||
if (signals.isRemote === false) {
|
||||
if (jobSignalsStrongRemoteOnly(signals)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user