fix(jobs): treat isRemote as 100% remote only; tighten cron for Canada QA

Reject hybrid or partial-office postings at ingest so the Remote badge and
filters match fully remote roles. Cron can PATCH search geography, remote-only
workplace types, and QA search terms before each scheduled pipeline run.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ilia 2026-05-16 15:53:55 -04:00
parent f5179304c1
commit 2e44a131e1
9 changed files with 286 additions and 65 deletions

View File

@ -6,6 +6,7 @@ import { createInterface } from "node:readline";
import { fileURLToPath } from "node:url"; import { fileURLToPath } from "node:url";
import { resolveSearchCities } from "@shared/search-cities.js"; import { resolveSearchCities } from "@shared/search-cities.js";
import type { CreateJobInput, JobSource } from "@shared/types/jobs"; import type { CreateJobInput, JobSource } from "@shared/types/jobs";
import { normalizeIsRemote } from "@shared/work-arrangement.js";
import { import {
toNumberOrNull, toNumberOrNull,
toStringOrNull, toStringOrNull,
@ -374,27 +375,41 @@ function mapJobSpyRows(
const salary = formatSalary({ minAmount, maxAmount, currency, interval }); const salary = formatSalary({ minAmount, maxAmount, currency, interval });
const jobUrlDirect = toStringOrNull(row.job_url_direct); const jobUrlDirect = toStringOrNull(row.job_url_direct);
const title = toStringOrNull(row.title) ?? "Unknown Title";
const jobDescription = toStringOrNull(row.description) ?? undefined;
const location = toStringOrNull(row.location) ?? undefined;
const jobType = toStringOrNull(row.job_type) ?? undefined;
const workFromHomeType =
toStringOrNull(row.work_from_home_type) ?? undefined;
jobs.push({ jobs.push({
source, source,
sourceJobId: toStringOrNull(row.id) ?? undefined, sourceJobId: toStringOrNull(row.id) ?? undefined,
jobUrlDirect: jobUrlDirect ?? undefined, jobUrlDirect: jobUrlDirect ?? undefined,
datePosted: toStringOrNull(row.date_posted) ?? undefined, datePosted: toStringOrNull(row.date_posted) ?? undefined,
title: toStringOrNull(row.title) ?? "Unknown Title", title,
employer: toStringOrNull(row.company) ?? "Unknown Employer", employer: toStringOrNull(row.company) ?? "Unknown Employer",
employerUrl: toStringOrNull(row.company_url) ?? undefined, employerUrl: toStringOrNull(row.company_url) ?? undefined,
jobUrl, jobUrl,
applicationLink: jobUrlDirect ?? jobUrl, applicationLink: jobUrlDirect ?? jobUrl,
location: toStringOrNull(row.location) ?? undefined, location,
jobDescription: toStringOrNull(row.description) ?? undefined, jobDescription,
salary: salary ?? undefined, salary: salary ?? undefined,
jobType: toStringOrNull(row.job_type) ?? undefined, jobType,
salarySource: toStringOrNull(row.salary_source) ?? undefined, salarySource: toStringOrNull(row.salary_source) ?? undefined,
salaryInterval: interval ?? undefined, salaryInterval: interval ?? undefined,
salaryMinAmount: minAmount ?? undefined, salaryMinAmount: minAmount ?? undefined,
salaryMaxAmount: maxAmount ?? undefined, salaryMaxAmount: maxAmount ?? undefined,
salaryCurrency: currency ?? undefined, salaryCurrency: currency ?? undefined,
isRemote: toBooleanOrNull(row.is_remote) ?? undefined, isRemote:
normalizeIsRemote({
title,
jobDescription,
location,
jobType,
workFromHomeType,
isRemote: toBooleanOrNull(row.is_remote) ?? undefined,
}) ?? undefined,
jobLevel: toStringOrNull(row.job_level) ?? undefined, jobLevel: toStringOrNull(row.job_level) ?? undefined,
jobFunction: toStringOrNull(row.job_function) ?? undefined, jobFunction: toStringOrNull(row.job_function) ?? undefined,
listingType: toStringOrNull(row.listing_type) ?? undefined, listingType: toStringOrNull(row.listing_type) ?? undefined,
@ -413,7 +428,7 @@ function mapJobSpyRows(
companyReviewsCount: companyReviewsCount:
toNumberOrNull(row.company_reviews_count) ?? undefined, toNumberOrNull(row.company_reviews_count) ?? undefined,
vacancyCount: toNumberOrNull(row.vacancy_count) ?? undefined, vacancyCount: toNumberOrNull(row.vacancy_count) ?? undefined,
workFromHomeType: toStringOrNull(row.work_from_home_type) ?? undefined, workFromHomeType,
}); });
} }

View File

@ -1,3 +1,4 @@
import { normalizeIsRemote } from "@shared/work-arrangement.js";
import { describe, expect, it } from "vitest"; import { describe, expect, it } from "vitest";
import { deriveIsRemoteFlag, parseJobSpyProgressLine } from "../src/run"; import { deriveIsRemoteFlag, parseJobSpyProgressLine } from "../src/run";
@ -49,3 +50,15 @@ describe("parseJobSpyProgressLine", () => {
expect(deriveIsRemoteFlag(["remote", "hybrid", "onsite"])).toBeUndefined(); expect(deriveIsRemoteFlag(["remote", "hybrid", "onsite"])).toBeUndefined();
}); });
}); });
describe("normalizeIsRemote (JobSpy ingest)", () => {
it("rejects hybrid postings that JobSpy marks remote", () => {
expect(
normalizeIsRemote({
title: "Automation Test Engineer (SDET)",
jobDescription: "Job Type: Hybrid (3 days remote)",
isRemote: true,
}),
).toBe(false);
});
});

View File

@ -7,6 +7,7 @@ import { getJobOwnerProfileId } from "@infra/request-context";
import { DEFAULT_JOB_OWNER_PROFILE_ID } from "@server/infra/job-owner-context"; import { DEFAULT_JOB_OWNER_PROFILE_ID } from "@server/infra/job-owner-context";
import { buildJobContentFingerprint } from "@shared/job-fingerprint"; import { buildJobContentFingerprint } from "@shared/job-fingerprint";
import { canonicalizeJobUrl } from "@shared/job-url-canonical"; import { canonicalizeJobUrl } from "@shared/job-url-canonical";
import { normalizeIsRemote } from "@shared/work-arrangement";
import type { import type {
CreateJobInput, CreateJobInput,
Job, Job,
@ -400,7 +401,15 @@ async function insertJob(input: CreateJobInput): Promise<Job> {
salaryMinAmount: input.salaryMinAmount ?? null, salaryMinAmount: input.salaryMinAmount ?? null,
salaryMaxAmount: input.salaryMaxAmount ?? null, salaryMaxAmount: input.salaryMaxAmount ?? null,
salaryCurrency: input.salaryCurrency ?? null, salaryCurrency: input.salaryCurrency ?? null,
isRemote: input.isRemote ?? null, isRemote:
normalizeIsRemote({
title: input.title,
jobDescription: input.jobDescription,
location: input.location,
workFromHomeType: input.workFromHomeType,
jobType: input.jobType,
isRemote: input.isRemote,
}) ?? null,
jobLevel: input.jobLevel ?? null, jobLevel: input.jobLevel ?? null,
jobFunction: input.jobFunction ?? null, jobFunction: input.jobFunction ?? null,
listingType: input.listingType ?? null, listingType: input.listingType ?? null,

View File

@ -4,6 +4,7 @@
import { logger } from "@infra/logger"; import { logger } from "@infra/logger";
import type { Job, JobSearchProfile, SuitabilityAnalysis } from "@shared/types"; import type { Job, JobSearchProfile, SuitabilityAnalysis } from "@shared/types";
import { jobLikelyRequiresOfficePresence } from "@shared/work-arrangement";
import { LlmService } from "./llm/service"; import { LlmService } from "./llm/service";
import type { JsonSchemaDefinition } from "./llm/types"; import type { JsonSchemaDefinition } from "./llm/types";
import { stripMarkdownCodeFences } from "./llm/utils/json"; import { stripMarkdownCodeFences } from "./llm/utils/json";
@ -326,61 +327,6 @@ function candidateWantsRemoteOnly(p: JobSearchProfile): boolean {
return true; return true;
} }
/**
* Job text / metadata suggests hybrid or mandatory office presence (not remote-only).
*/
function jobSignalsHybridOrOnsite(job: Job): boolean {
const blob = [
job.title,
job.jobDescription ?? "",
job.location ?? "",
job.workFromHomeType ?? "",
job.jobType ?? "",
]
.filter(Boolean)
.join("\n")
.toLowerCase();
const strongRemoteOnly =
/\b100%\s*remote\b|\bfully\s+remote\b|\bremote[\s-]only\b|\bcompletely\s+remote\b|\bwork\s+from\s+anywhere\b|\banywhere\s+in\s+the\s+(us|usa|uk|world)\b/.test(
blob,
);
const hybridOrOffice =
/\bhybrid\b/.test(blob) ||
/\bremote[\s-]?hybrid\b/.test(blob) ||
/\bhybrid[\s-]?remote\b/.test(blob) ||
/\b\d[\d]?\s+days?\s+(a|per)\s+week\b.*\b(in[\s-]?office|on[\s-]?site|onsite|at\s+the\s+office)\b/.test(
blob,
) ||
/\b(in[\s-]?office|on[\s-]?site|onsite|at\s+the\s+office)\b.*\b\d[\d]?\s+days?\b/.test(
blob,
) ||
/\b(one|two|three|four|five|six|seven|eight|nine|ten)\s+days?\b.*\b(in[\s-]?office|on[\s-]?site|onsite)\b/.test(
blob,
) ||
/\b(in[\s-]?office|on[\s-]?site|onsite)\b.*\b(one|two|three|four|five|six|seven|eight|nine|ten)\s+days?\b/.test(
blob,
) ||
/\boffice[\s-]based\b/.test(blob) ||
/\bon[\s-]?site\s+(role|position|required|mandatory)\b/.test(blob) ||
/\b(required|must)\b.*\b(in[\s-]?office|on[\s-]?site|onsite|in[\s-]?person)\b/.test(
blob,
);
const wfh = (job.workFromHomeType ?? "").toLowerCase();
if (wfh.includes("hybrid")) return true;
if (job.isRemote === false) {
if (strongRemoteOnly && !hybridOrOffice) return false;
return true;
}
if (hybridOrOffice) return true;
return false;
}
/** /**
* Cap score when candidate wants remote-only but the job is hybrid / on-site, or * Cap score when candidate wants remote-only but the job is hybrid / on-site, or
* when the model admits a poor work-arrangement fit but still scores high. * when the model admits a poor work-arrangement fit but still scores high.
@ -396,7 +342,7 @@ function applyRemoteOfficeMismatchCap(
return { score, reason }; return { score, reason };
} }
const officeLikely = jobSignalsHybridOrOnsite(job); const officeLikely = jobLikelyRequiresOfficePresence(job);
const wam = const wam =
typeof data.workArrangementMatch === "number" typeof data.workArrangementMatch === "number"
? data.workArrangementMatch ? data.workArrangementMatch

View File

@ -10,8 +10,14 @@ JOBOPS_URL="http://127.0.0.1:3005"
# JOB_TELEGRAM_MAX_JOBS=25 # JOB_TELEGRAM_MAX_JOBS=25
# Optional: override POST /api/pipeline/run sources (comma-separated). If unset, the server default applies. # Optional: override POST /api/pipeline/run sources (comma-separated). If unset, the server default applies.
# Example (matches typical JobSpy bundle + UK sources): # Canada + QA automation + fully remote (see JOBBER_CRON_* below):
# JOBBER_PIPELINE_SOURCES=gradcracker,indeed,linkedin,glassdoor,ukvisajobs # JOBBER_PIPELINE_SOURCES=linkedin,indeed,glassdoor,qajobsboard,arcdev,eluta,bctenet
# Optional: applied via PATCH /api/settings immediately before each scheduled run (ilia profile when BASIC_AUTH_USER=ilia).
# JOBBER_CRON_SEARCH_CITIES=Canada
# JOBBER_CRON_JOBSPY_COUNTRY=Canada
# JOBBER_CRON_WORKPLACE_TYPES=remote
# JOBBER_CRON_SEARCH_TERMS=QA Automation Engineer|SDET|Software Development Engineer in Test|Automation Test Engineer
# Optional — only if BASIC_AUTH_USER / BASIC_AUTH_PASSWORD are set in Jobber .env (use one pair; cron runs as a single identity) # Optional — only if BASIC_AUTH_USER / BASIC_AUTH_PASSWORD are set in Jobber .env (use one pair; cron runs as a single identity)
# BASIC_AUTH_USER="" # BASIC_AUTH_USER=""

View File

@ -44,6 +44,35 @@ fetch_status() {
"${BASE}/api/pipeline/status" "${BASE}/api/pipeline/status"
} }
apply_cron_settings() {
local patch='{}'
if [[ -n "${JOBBER_CRON_SEARCH_CITIES:-}" ]]; then
patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_SEARCH_CITIES" '. + {searchCities: $v}')"
fi
if [[ -n "${JOBBER_CRON_JOBSPY_COUNTRY:-}" ]]; then
patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_JOBSPY_COUNTRY" '. + {jobspyCountryIndeed: $v}')"
fi
if [[ -n "${JOBBER_CRON_WORKPLACE_TYPES:-}" ]]; then
patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_WORKPLACE_TYPES" \
'. + {workplaceTypes: ($v | split(",") | map(gsub("^\\s+|\\s+$";"")) | map(select(. != "")))}')"
fi
if [[ -n "${JOBBER_CRON_SEARCH_TERMS:-}" ]]; then
patch="$(echo "$patch" | jq --arg v "$JOBBER_CRON_SEARCH_TERMS" \
'. + {searchTerms: ($v | split("|") | map(gsub("^\\s+|\\s+$";"")) | map(select(. != "")))}')"
fi
if [[ "$patch" == "{}" ]]; then
return 0
fi
local resp
resp="$(curl -sS --compressed "${AUTH[@]}" -X PATCH "${BASE}/api/settings" \
-H "Accept: application/json" -H "Content-Type: application/json" \
-d "$patch")"
if ! echo "$resp" | jq -e '.ok == true' >/dev/null 2>&1; then
send_tg_html "Jobber: PATCH /api/settings failed before cron run: $(tg_html_escape "$(echo "$resp" | jq -c . 2>/dev/null || echo "$resp")")"
exit 1
fi
}
fetch_jobs_list() { fetch_jobs_list() {
curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \ curl -sS --compressed "${AUTH[@]}" -H "Accept: application/json" \
"${BASE}/api/jobs?view=list" "${BASE}/api/jobs?view=list"
@ -164,6 +193,8 @@ if echo "$body" | jq -e '.data.isRunning == true' >/dev/null 2>&1; then
exit 0 exit 0
fi fi
apply_cron_settings
# Optional: comma-separated sources (see JOBBER_PIPELINE_SOURCES in jobber-cron.env.example). # Optional: comma-separated sources (see JOBBER_PIPELINE_SOURCES in jobber-cron.env.example).
# If unset, POST body is {} and the server uses its default source list. # If unset, POST body is {} and the server uses its default source list.
run_body='{}' run_body='{}'

View File

@ -2,5 +2,6 @@ export * from "./extractors";
export * from "./job-fingerprint"; export * from "./job-fingerprint";
export * from "./job-url-canonical"; export * from "./job-url-canonical";
export * from "./location-support"; export * from "./location-support";
export * from "./work-arrangement";
export * from "./types"; export * from "./types";
export * from "./utils/type-conversion"; export * from "./utils/type-conversion";

View File

@ -0,0 +1,75 @@
import { describe, expect, it } from "vitest";
import {
jobSignalsHybridOrOnsite,
normalizeIsRemote,
} from "./work-arrangement.js";
describe("jobSignalsHybridOrOnsite", () => {
it("detects hybrid in description", () => {
expect(
jobSignalsHybridOrOnsite({
title: "Automation Test Engineer (SDET)",
jobDescription:
"Job Type: Hybrid (3 days remote)\nJob Location: Vancouver, BC",
isRemote: true,
}),
).toBe(true);
});
it("detects N days in office", () => {
expect(
jobSignalsHybridOrOnsite({
jobDescription: "3 days per week in the office, 2 days remote",
}),
).toBe(true);
});
it("does not flag fully remote postings", () => {
expect(
jobSignalsHybridOrOnsite({
jobDescription: "100% remote. Work from anywhere in Canada.",
isRemote: true,
}),
).toBe(false);
});
});
describe("normalizeIsRemote", () => {
it("downgrades JobSpy false positive when hybrid is mentioned", () => {
expect(
normalizeIsRemote({
title: "Automation Test Engineer (SDET)",
jobDescription: "Job Type: Hybrid (3 days remote)",
isRemote: true,
}),
).toBe(false);
});
it("keeps true when upstream says remote and text is fully remote", () => {
expect(
normalizeIsRemote({
jobDescription: "Fully remote role. No office visits required.",
isRemote: true,
}),
).toBe(true);
});
it("promotes unknown upstream when text is strongly remote-only", () => {
expect(
normalizeIsRemote({
jobDescription: "100% remote — work from anywhere.",
isRemote: undefined,
}),
).toBe(true);
});
it("returns null when remote status is unclear", () => {
expect(
normalizeIsRemote({
title: "Software Engineer",
location: "Toronto, ON",
isRemote: undefined,
}),
).toBeNull();
});
});

View File

@ -0,0 +1,125 @@
/**
* Work-arrangement detection for ingest and scoring.
* `isRemote` means 100% remote hybrid or regular office presence disqualifies.
*/
export interface WorkArrangementSignals {
title?: string | null;
jobDescription?: string | null;
location?: string | null;
workFromHomeType?: string | null;
jobType?: string | null;
isRemote?: boolean | null;
}
function buildBlob(signals: WorkArrangementSignals): string {
return [
signals.title,
signals.jobDescription,
signals.location,
signals.workFromHomeType,
signals.jobType,
]
.filter(Boolean)
.join("\n")
.toLowerCase();
}
/** Posting text strongly indicates fully remote (no office days). */
export function jobSignalsStrongRemoteOnly(
signals: WorkArrangementSignals,
): boolean {
const blob = buildBlob(signals);
return (
/\b100%\s*remote\b/.test(blob) ||
/\bfully\s+remote\b/.test(blob) ||
/\bremote[\s-]only\b/.test(blob) ||
/\bcompletely\s+remote\b/.test(blob) ||
/\bwork\s+from\s+anywhere\b/.test(blob) ||
/\banywhere\s+in\s+the\s+(us|usa|uk|world)\b/.test(blob)
);
}
/**
* Hybrid, partial-remote, or on-site/office requirements not 100% remote.
*/
export function jobSignalsHybridOrOnsite(
signals: WorkArrangementSignals,
): boolean {
const blob = buildBlob(signals);
const wfh = (signals.workFromHomeType ?? "").toLowerCase();
if (wfh.includes("hybrid") || wfh.includes("on-site") || wfh.includes("onsite")) {
return true;
}
if (
/\bhybrid\b/.test(blob) ||
/\bremote[\s-]?hybrid\b/.test(blob) ||
/\bhybrid[\s-]?remote\b/.test(blob) ||
/\bpartial(?:ly)?\s+remote\b/.test(blob) ||
/\b\d[\d]?\s+days?\s+remote\b/.test(blob) ||
/\bremote\s+\d[\d]?\s+days?\b/.test(blob) ||
/\b\d[\d]?\s+days?\s+(a|per)\s+week\b.*\b(in[\s-]?office|on[\s-]?site|onsite|at\s+the\s+office)\b/.test(
blob,
) ||
/\b(in[\s-]?office|on[\s-]?site|onsite|at\s+the\s+office)\b.*\b\d[\d]?\s+days?\b/.test(
blob,
) ||
/\b(one|two|three|four|five|six|seven|eight|nine|ten)\s+days?\b.*\b(in[\s-]?office|on[\s-]?site|onsite)\b/.test(
blob,
) ||
/\b(in[\s-]?office|on[\s-]?site|onsite)\b.*\b(one|two|three|four|five|six|seven|eight|nine|ten)\s+days?\b/.test(
blob,
) ||
/\boffice[\s-]based\b/.test(blob) ||
/\bon[\s-]?site\s+(role|position|required|mandatory)\b/.test(blob) ||
/\b(required|must)\b.*\b(in[\s-]?office|on[\s-]?site|onsite|in[\s-]?person)\b/.test(
blob,
)
) {
return true;
}
return false;
}
/**
* Normalize upstream `isRemote` to 100% remote only.
* Hybrid / office-day language forces false; strong remote-only text can promote to true.
*/
export function normalizeIsRemote(
signals: WorkArrangementSignals,
): boolean | null {
if (jobSignalsHybridOrOnsite(signals)) {
return false;
}
if (signals.isRemote === true) {
return true;
}
if (signals.isRemote === false) {
return false;
}
if (jobSignalsStrongRemoteOnly(signals)) {
return true;
}
return null;
}
/**
* Job likely requires office presence (for scoring caps when candidate is remote-only).
*/
export function jobLikelyRequiresOfficePresence(
signals: WorkArrangementSignals,
): boolean {
if (jobSignalsHybridOrOnsite(signals)) {
return true;
}
if (signals.isRemote === false) {
if (jobSignalsStrongRemoteOnly(signals)) {
return false;
}
return true;
}
return false;
}