diff --git a/Dockerfile b/Dockerfile index e7addd9..32677c6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,6 +54,7 @@ WORKDIR /app COPY shared ./shared COPY docs-site ./docs-site COPY orchestrator ./orchestrator +COPY visa-sponsor-providers ./visa-sponsor-providers COPY extractors/adzuna ./extractors/adzuna COPY extractors/hiringcafe ./extractors/hiringcafe COPY extractors/gradcracker ./extractors/gradcracker @@ -116,6 +117,7 @@ COPY --from=builder /app/orchestrator/dist ./orchestrator/dist COPY --from=builder /app/docs-site/build ./orchestrator/dist/docs COPY shared ./shared COPY orchestrator ./orchestrator +COPY visa-sponsor-providers ./visa-sponsor-providers COPY extractors/adzuna ./extractors/adzuna COPY extractors/hiringcafe ./extractors/hiringcafe COPY extractors/gradcracker ./extractors/gradcracker diff --git a/docker-compose.yml b/docker-compose.yml index fddd092..2dac750 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,6 +43,9 @@ services: - path: ./orchestrator/src target: /app/orchestrator/src action: sync+restart + - path: ./visa-sponsor-providers + target: /app/visa-sponsor-providers + action: sync+restart # Sync extractor changes - path: ./extractors/gradcracker/src target: /app/extractors/gradcracker/src diff --git a/docs-site/docs/features/visa-sponsors.md b/docs-site/docs/features/visa-sponsors.md index e97d749..33319b8 100644 --- a/docs-site/docs/features/visa-sponsors.md +++ b/docs-site/docs/features/visa-sponsors.md @@ -1,56 +1,75 @@ --- id: visa-sponsors title: Visa Sponsors -description: Search the UK licensed sponsor register and use sponsor matches in your job workflow. +description: Search licensed sponsor registers across multiple countries and use sponsor matches in your job workflow. sidebar_position: 4 --- ## What it is -The Visa Sponsors page lets you search the UK Home Office licensed sponsor register from inside JobOps. +The Visa Sponsors page lets you search official licensed sponsor registers from inside JobOps. + +Each provider corresponds to a country's official register and is auto-discovered at startup from the `visa-sponsor-providers/` directory. For each company, it shows: - Match score against your query - Company location (when available) - Licensed routes and type/rating details -- Last data refresh time and sponsor count +- Per-provider last refresh time and sponsor count ## Why it exists -Many roles require sponsorship-ready employers. This page helps you quickly validate whether a target company appears on the official sponsor list, so you can prioritize applications and sourcing terms. +Many roles require sponsorship-ready employers. This page helps you quickly validate whether a target company appears on an official sponsor list, so you can prioritize applications and sourcing terms. ## How to use it 1. Open **Visa Sponsors** in the app. 2. Enter a company name in the search box. -3. Select a result to view sponsor details. -4. Use the score and route details to decide whether to prioritize that employer. +3. Optionally filter by country using the country field. +4. Select a result to view sponsor details. +5. Use the score and route details to decide whether to prioritize that employer. ### Refresh schedule -- Automatic update runs daily at about **02:00** (server local time). -- Use the download/update button in the page header to fetch the latest register immediately. +Each provider refreshes independently on its own daily schedule (default: **02:00 UTC**). Use the download/update button in the page header to fetch the latest register immediately for all providers. ### API examples ```bash -# Search sponsors +# Search sponsors across all providers curl -X POST http://localhost:3001/api/visa-sponsors/search \ -H "content-type: application/json" \ -d '{"query":"Monzo","limit":100,"minScore":20}' ``` +```bash +# Search sponsors restricted to a specific country +curl -X POST http://localhost:3001/api/visa-sponsors/search \ + -H "content-type: application/json" \ + -d '{"query":"Monzo","country":"united kingdom","limit":100}' +``` + ```bash # Get one organization's entries (all licensed routes) curl "http://localhost:3001/api/visa-sponsors/organization/Monzo%20Bank%20Ltd" ``` ```bash -# Trigger manual refresh +# Get status of all registered providers +curl "http://localhost:3001/api/visa-sponsors/status" +``` + +```bash +# Trigger manual refresh for all providers curl -X POST http://localhost:3001/api/visa-sponsors/update ``` +```bash +# Trigger manual refresh for a specific provider +curl -X POST http://localhost:3001/api/visa-sponsors/update/uk +``` + ## Common problems ### No results found @@ -61,14 +80,23 @@ curl -X POST http://localhost:3001/api/visa-sponsors/update ### Sponsor data is empty - Run a manual refresh with the header update button (or `POST /api/visa-sponsors/update`). -- Check that the server can reach `gov.uk` and `assets.publishing.service.gov.uk`. +- Check `GET /api/visa-sponsors/status` to see per-provider error details. +- Verify the server can reach the upstream source for that provider (e.g. `gov.uk` for the UK provider). ### Company appears once but has multiple routes - Open the detail panel for that company; route/type entries are shown there. +### A country's provider is missing + +- Check startup logs for registry warnings about that provider id, including skipped invalid manifests. +- Ensure the provider id is registered in `shared/src/visa-sponsor-providers/index.ts`. +- Ensure the manifest exists at `visa-sponsor-providers//manifest.ts` or `visa-sponsor-providers//src/manifest.ts`. +- See [Add a Visa Sponsor Provider](/docs/next/workflows/add-a-visa-sponsor-provider) for the full workflow. + ## Related pages +- [Add a Visa Sponsor Provider](/docs/next/workflows/add-a-visa-sponsor-provider) - [Orchestrator](/docs/next/features/orchestrator) - [Post-Application Tracking](/docs/next/features/post-application-tracking) - [Self-Hosting](/docs/next/getting-started/self-hosting) diff --git a/docs-site/docs/workflows/add-a-visa-sponsor-provider.md b/docs-site/docs/workflows/add-a-visa-sponsor-provider.md new file mode 100644 index 0000000..d09e957 --- /dev/null +++ b/docs-site/docs/workflows/add-a-visa-sponsor-provider.md @@ -0,0 +1,107 @@ +--- +id: add-a-visa-sponsor-provider +title: Add a Visa Sponsor Provider +description: How to add a new country's visa sponsor register using the provider manifest contract. +sidebar_position: 3 +--- + +## What it is + +This guide explains how to add a new country's visa sponsor register that is auto-discovered by the orchestrator at startup. + +Each provider is a directory under `visa-sponsor-providers/` containing a `manifest.ts` file. The manifest owns only what is country-specific: fetching and parsing the upstream register. Storage, scheduling, caching, and search are handled by the shared service layer. + +Provider ids must be registered in `shared/src/visa-sponsor-providers/index.ts` to be accepted at runtime. + +## Why it exists + +Without a manifest contract, adding a new country's register required touching multiple orchestrator files. + +With the provider system, contributors only need to: + +1. Add a manifest in `visa-sponsor-providers//`. +2. Register the new id in the shared catalog. + +The service layer handles everything else. + +## How to use it + +1. Create a directory under `visa-sponsor-providers//` where `` is a short lowercase slug (e.g. `au`, `ca`). +2. Add a `manifest.ts` in that directory (or `src/manifest.ts`). +3. Export a manifest that satisfies `VisaSponsorProviderManifest`: + - `id` — matches the directory name and the catalog entry + - `displayName` — human-readable country name + - `countryKey` — lowercase country string compatible with `normalizeCountryKey()` (e.g. `"australia"`) + - `scheduledUpdateHour` (optional) — UTC hour for the daily refresh; defaults to `2` + - `fetchSponsors()` — fetches the upstream source and returns `VisaSponsor[]`; throws on failure +4. Add the new id to `shared/src/visa-sponsor-providers/index.ts`: + - append to `VISA_SPONSOR_PROVIDER_IDS` + - add an entry in `VISA_SPONSOR_PROVIDER_METADATA` +5. Start the server and confirm the startup log reports the provider in the registry. +6. Run the full CI checks. + +Example manifest: + +```ts +import type { + VisaSponsor, + VisaSponsorProviderManifest, +} from "../../shared/src/types/visa-sponsors"; + +export const manifest: VisaSponsorProviderManifest = { + id: "au", + displayName: "Australia", + countryKey: "australia", + scheduledUpdateHour: 3, + + async fetchSponsors(): Promise { + // Fetch and parse the upstream register here. + // Return an array of VisaSponsor objects. + // Throw on failure — the service layer handles error state. + return []; + }, +}; + +export default manifest; +``` + +Example catalog update in `shared/src/visa-sponsor-providers/index.ts`: + +```ts +export const VISA_SPONSOR_PROVIDER_IDS = ["uk", "au"] as const; + +export const VISA_SPONSOR_PROVIDER_METADATA = { + uk: { label: "United Kingdom", countryKey: "united kingdom" }, + au: { label: "Australia", countryKey: "australia" }, +}; +``` + +## Common problems + +### Provider not registered at startup + +- Check the file path: valid locations are `visa-sponsor-providers//manifest.ts` or `visa-sponsor-providers//src/manifest.ts`. +- Ensure the file exports `default` or a named `manifest`. +- Check startup logs for registry warnings such as skipped invalid manifests, duplicate ids, duplicate country keys, or ids missing from the shared catalog. + +### Provider id rejected at runtime + +- The id must be in `VISA_SPONSOR_PROVIDER_IDS` in `shared/src/visa-sponsor-providers/index.ts`. +- Duplicate ids or duplicate `countryKey` values are skipped with a warning. + +### Provider loads but returns no sponsors + +- Verify `fetchSponsors()` returns a non-empty array and does not silently swallow errors. +- Check `GET /api/visa-sponsors/status` for the provider's error field. +- Trigger a manual refresh with `POST /api/visa-sponsors/update/` and watch server logs. + +### countryKey does not match job locations + +- The `countryKey` must produce the same output as `normalizeCountryKey()` when called on job location strings. +- Use lowercase, no diacritics, matching the canonical country name used in job data. + +## Related pages + +- [Visa Sponsors Feature](/docs/next/features/visa-sponsors) +- [Add an Extractor Workflow](/docs/next/workflows/add-an-extractor) +- [Extractors Overview](/docs/next/extractors/overview) diff --git a/docs-site/sidebars.ts b/docs-site/sidebars.ts index cefa234..3d3f089 100644 --- a/docs-site/sidebars.ts +++ b/docs-site/sidebars.ts @@ -18,6 +18,7 @@ const sidebars: SidebarsConfig = { "workflows/find-jobs-and-apply-workflow", "workflows/post-application-workflow", "workflows/add-an-extractor", + "workflows/add-a-visa-sponsor-provider", ], }, { diff --git a/orchestrator/src/client/api/client.ts b/orchestrator/src/client/api/client.ts index b10c3be..21c5220 100644 --- a/orchestrator/src/client/api/client.ts +++ b/orchestrator/src/client/api/client.ts @@ -1391,12 +1391,14 @@ export async function searchVisaSponsors(input: { query: string; limit?: number; minScore?: number; + country?: string; }): Promise { if (input.query?.trim()) { trackProductEvent("visa_sponsor_search", { query_length_bucket: bucketQueryLength(input.query.trim()), limit: input.limit, min_score: input.minScore, + country: input.country ?? "all", }); } return fetchApi("/visa-sponsors/search", { @@ -1407,9 +1409,12 @@ export async function searchVisaSponsors(input: { export async function getVisaSponsorOrganization( name: string, + providerId?: string, ): Promise { + const params = new URLSearchParams(); + if (providerId) params.set("providerId", providerId); return fetchApi( - `/visa-sponsors/organization/${encodeURIComponent(name)}`, + `/visa-sponsors/organization/${encodeURIComponent(name)}${params.size ? `?${params.toString()}` : ""}`, ); } diff --git a/orchestrator/src/client/lib/queryKeys.ts b/orchestrator/src/client/lib/queryKeys.ts index 6a1ced8..2d3f678 100644 --- a/orchestrator/src/client/lib/queryKeys.ts +++ b/orchestrator/src/client/lib/queryKeys.ts @@ -48,14 +48,23 @@ export const queryKeys = { visaSponsors: { all: ["visa-sponsors"] as const, status: () => [...queryKeys.visaSponsors.all, "status"] as const, - search: (query: string, limit: number, minScore: number) => + search: ( + query: string, + limit: number, + minScore: number, + country?: string, + ) => [ ...queryKeys.visaSponsors.all, "search", - { query, limit, minScore }, + { query, limit, minScore, country: country ?? null }, + ] as const, + organization: (name: string, providerId?: string) => + [ + ...queryKeys.visaSponsors.all, + "organization", + { name, providerId: providerId ?? null }, ] as const, - organization: (name: string) => - [...queryKeys.visaSponsors.all, "organization", name] as const, }, postApplication: { all: ["post-application"] as const, diff --git a/orchestrator/src/client/pages/VisaSponsorsPage.tsx b/orchestrator/src/client/pages/VisaSponsorsPage.tsx index a97af51..f511f75 100644 --- a/orchestrator/src/client/pages/VisaSponsorsPage.tsx +++ b/orchestrator/src/client/pages/VisaSponsorsPage.tsx @@ -1,8 +1,4 @@ -/** - * UK Visa Sponsors search page. - * Allows searching the government's list of licensed visa sponsors. - */ - +import { formatCountryLabel } from "@shared/location-support.js"; import type { VisaSponsor, VisaSponsorSearchResult, @@ -32,6 +28,13 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Drawer, DrawerClose, DrawerContent } from "@/components/ui/drawer"; import { Input } from "@/components/ui/input"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; import { cn, formatDateTime } from "@/lib/utils"; import * as api from "../api"; import { @@ -58,12 +61,24 @@ const getScoreTokens = (score: number) => { return { badge: "border-rose-500/30 bg-rose-500/10 text-rose-200" }; }; +const ALL_SOURCES_VALUE = "__all_sources__"; + +const getSearchScopeLabel = (countryLabel: string) => + countryLabel === "All sources" ? "all sources" : `the ${countryLabel} source`; + +const getResultKey = ( + result: Pick, +) => `${result.providerId}::${result.sponsor.organisationName}`; + export const VisaSponsorsPage: React.FC = () => { const queryClient = useQueryClient(); // State const [searchQuery, setSearchQuery] = useState(""); const [debouncedSearchQuery, setDebouncedSearchQuery] = useState(""); - const [selectedOrg, setSelectedOrg] = useState(null); + const [selectedResultKey, setSelectedResultKey] = useState( + null, + ); + const [selectedCountry, setSelectedCountry] = useState(null); // Loading states const [isDetailDrawerOpen, setIsDetailDrawerOpen] = useState(false); @@ -79,6 +94,35 @@ export const VisaSponsorsPage: React.FC = () => { }); const status = statusQuery.data ?? null; useQueryErrorToast(statusQuery.error, "Failed to fetch status"); + const statusProviders = status?.providers ?? []; + const providerOptions = statusProviders.map((provider) => ({ + value: provider.countryKey, + label: formatCountryLabel(provider.countryKey), + providerId: provider.providerId, + })); + const selectedCountryLabel = + providerOptions.find((option) => option.value === selectedCountry)?.label ?? + "All sources"; + const searchScopeLabel = getSearchScopeLabel(selectedCountryLabel); + const activeProviders = selectedCountry + ? statusProviders.filter( + (provider) => provider.countryKey === selectedCountry, + ) + : statusProviders; + const totalSponsors = activeProviders.reduce( + (sum, provider) => sum + provider.totalSponsors, + 0, + ); + const latestUpdatedAt = activeProviders.reduce( + (latest, provider) => { + if (!provider.lastUpdated) return latest; + if (!latest) return provider.lastUpdated; + return new Date(provider.lastUpdated) > new Date(latest) + ? provider.lastUpdated + : latest; + }, + null, + ); useEffect(() => { const timer = setTimeout(() => { @@ -92,53 +136,66 @@ export const VisaSponsorsPage: React.FC = () => { debouncedSearchQuery.trim(), 100, 20, + selectedCountry ?? undefined, ), queryFn: () => api.searchVisaSponsors({ query: debouncedSearchQuery.trim(), limit: 100, minScore: 20, + country: selectedCountry ?? undefined, }), enabled: Boolean(debouncedSearchQuery.trim()), }); useQueryErrorToast(searchQueryResult.error, "Search failed"); - const orgDetailsQuery = useQuery({ - queryKey: queryKeys.visaSponsors.organization(selectedOrg ?? ""), - queryFn: () => - selectedOrg - ? api.getVisaSponsorOrganization(selectedOrg) - : Promise.resolve([]), - enabled: Boolean(selectedOrg), - }); - const orgDetails = orgDetailsQuery.data ?? []; - useQueryErrorToast(orgDetailsQuery.error, "Failed to fetch details"); - const results = useMemo(() => { if (!debouncedSearchQuery.trim()) return []; return searchQueryResult.data?.results ?? []; }, [debouncedSearchQuery, searchQueryResult.data]); + const selectedResult = useMemo( + () => results.find((r) => getResultKey(r) === selectedResultKey) ?? null, + [results, selectedResultKey], + ); + const selectedOrg = selectedResult?.sponsor.organisationName ?? null; + + const orgDetailsQuery = useQuery({ + queryKey: queryKeys.visaSponsors.organization( + selectedOrg ?? "", + selectedResult?.providerId, + ), + queryFn: () => + selectedOrg + ? api.getVisaSponsorOrganization( + selectedOrg, + selectedResult?.providerId, + ) + : Promise.resolve([]), + enabled: Boolean(selectedOrg), + }); + const orgDetails = orgDetailsQuery.data ?? []; + useQueryErrorToast(orgDetailsQuery.error, "Failed to fetch details"); + // Auto-select first result useEffect(() => { if (results.length === 0) { - setSelectedOrg(null); + setSelectedResultKey(null); return; } if ( - !selectedOrg || - !results.some((r) => r.sponsor.organisationName === selectedOrg) + !selectedResultKey || + !results.some((r) => getResultKey(r) === selectedResultKey) ) { - const firstOrg = results[0].sponsor.organisationName; - setSelectedOrg(firstOrg); + setSelectedResultKey(getResultKey(results[0])); } - }, [results, selectedOrg]); + }, [results, selectedResultKey]); useEffect(() => { - if (!selectedOrg) { + if (!selectedResultKey) { setIsDetailDrawerOpen(false); } - }, [selectedOrg]); + }, [selectedResultKey]); useEffect(() => { if (typeof window === "undefined") return; @@ -170,6 +227,7 @@ export const VisaSponsorsPage: React.FC = () => { debouncedSearchQuery.trim(), 100, 20, + selectedCountry ?? undefined, ), }); } @@ -185,25 +243,27 @@ export const VisaSponsorsPage: React.FC = () => { await updateListMutation.mutateAsync(); }; - const handleSelectOrg = (orgName: string) => { - setSelectedOrg(orgName); + const handleSelectOrg = (resultKey: string) => { + setSelectedResultKey(resultKey); if (!isDesktop) { setIsDetailDrawerOpen(true); } }; - const selectedResult = useMemo( - () => - results.find((r) => r.sponsor.organisationName === selectedOrg) ?? null, - [results, selectedOrg], - ); + const handleCountryChange = (value: string) => { + setSelectedCountry(value === ALL_SOURCES_VALUE ? null : value); + setSelectedResultKey(null); + setIsDetailDrawerOpen(false); + }; - const isUpdateInProgress = updateListMutation.isPending || status?.isUpdating; + const isUpdateInProgress = + updateListMutation.isPending || + statusProviders.some((provider) => provider.isUpdating); const isLoadingStatus = statusQuery.isLoading; const isSearching = searchQueryResult.isFetching; const isLoadingDetails = orgDetailsQuery.isLoading; - const detailPanelContent = !selectedOrg ? ( + const detailPanelContent = !selectedResult ? (
Select a company

@@ -235,6 +295,9 @@ export const VisaSponsorsPage: React.FC = () => { )}

{selectedOrg}

+

+ Source: {formatCountryLabel(selectedResult.countryKey)} +

{/* Location */} @@ -286,9 +349,9 @@ export const VisaSponsorsPage: React.FC = () => { What does this mean?

- This organisation is licensed by the UK Home Office to sponsor workers - on the routes listed above. An "A rating" means they're fully - compliant. + This organisation appears in the selected sponsor source and may be + able to sponsor workers on the routes listed above. Always verify the + latest source entry before relying on it.

@@ -299,21 +362,21 @@ export const VisaSponsorsPage: React.FC = () => { : undefined } + subtitle="Search sponsor data across available sources" actions={ <> {status && (
- {status.totalSponsors.toLocaleString()} sponsors + {totalSponsors.toLocaleString()} sponsors - {formatDateTime(status.lastUpdated) || "Never"} + {formatDateTime(latestUpdatedAt) || "Never"}
)} @@ -337,37 +400,66 @@ export const VisaSponsorsPage: React.FC = () => { {/* Search section */}
-
- -
- - setSearchQuery(e.target.value)} - className="pl-10 pr-10 h-10" - autoFocus - /> - {searchQuery && ( - - )} + Company name + +
+ + setSearchQuery(e.target.value)} + className="pl-10 pr-10 h-10" + autoFocus + /> + {searchQuery && ( + + )} +
+

+ Enter a company name to check if they're a licensed visa + sponsor in {searchScopeLabel}. +

+
+ +
-

- Enter a company name to check if they're a licensed UK visa - sponsor. -

@@ -387,7 +479,7 @@ export const VisaSponsorsPage: React.FC = () => { ) : null } > - {!isLoadingStatus && status?.totalSponsors === 0 && ( + {!isLoadingStatus && status && totalSponsors === 0 && ( { /> )} - {status && status.totalSponsors > 0 && !searchQuery && ( + {status && totalSponsors > 0 && !searchQuery && ( )} @@ -431,13 +523,11 @@ export const VisaSponsorsPage: React.FC = () => { )} {results.length > 0 && - results.map((result, index) => ( + results.map((result) => ( - handleSelectOrg(result.sponsor.organisationName) - } + key={getResultKey(result)} + selected={selectedResultKey === getResultKey(result)} + onClick={() => handleSelectOrg(getResultKey(result))} className="gap-3" >
@@ -450,11 +540,22 @@ export const VisaSponsorsPage: React.FC = () => { {(result.sponsor.townCity || result.sponsor.county) && (
- {[result.sponsor.townCity, result.sponsor.county] + {[ + formatCountryLabel(result.countryKey), + result.sponsor.townCity, + result.sponsor.county, + ] .filter(Boolean) .join(", ")}
)} + {!result.sponsor.townCity && + !result.sponsor.county && + result.countryKey && ( +
+ {formatCountryLabel(result.countryKey)} +
+ )}
diff --git a/orchestrator/src/lib/analytics.ts b/orchestrator/src/lib/analytics.ts index d210a91..a2bbfff 100644 --- a/orchestrator/src/lib/analytics.ts +++ b/orchestrator/src/lib/analytics.ts @@ -119,6 +119,7 @@ type ProductEventMap = { query_length_bucket: string; limit?: number; min_score?: number; + country?: string; }; }; @@ -158,9 +159,7 @@ function getAnalyticsUserId(): string | null { function getAnalyticsAppVersion(): string | null { try { - return typeof __APP_VERSION__ !== "undefined" && __APP_VERSION__?.trim() - ? __APP_VERSION__ - : null; + return __APP_VERSION__?.trim() || null; } catch { return null; } diff --git a/orchestrator/src/server/api/routes/jobs.test.ts b/orchestrator/src/server/api/routes/jobs.test.ts index 0aec8b5..8b95044 100644 --- a/orchestrator/src/server/api/routes/jobs.test.ts +++ b/orchestrator/src/server/api/routes/jobs.test.ts @@ -885,8 +885,10 @@ describe.sequential("Jobs API routes", () => { const { searchSponsors } = await import( "@server/services/visa-sponsors/index" ); - vi.mocked(searchSponsors).mockReturnValue([ + vi.mocked(searchSponsors).mockResolvedValue([ { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "ACME CORP SPONSOR" } as any, score: 100, matchedName: "acme corp sponsor", diff --git a/orchestrator/src/server/api/routes/jobs.ts b/orchestrator/src/server/api/routes/jobs.ts index aaa616d..7c366b3 100644 --- a/orchestrator/src/server/api/routes/jobs.ts +++ b/orchestrator/src/server/api/routes/jobs.ts @@ -1164,7 +1164,7 @@ jobsRouter.post("/:id/check-sponsor", async (req: Request, res: Response) => { } // Search for sponsor matches - const sponsorResults = visaSponsors.searchSponsors(job.employer, { + const sponsorResults = await visaSponsors.searchSponsors(job.employer, { limit: 10, minScore: 50, }); diff --git a/orchestrator/src/server/api/routes/visa-sponsors.test.ts b/orchestrator/src/server/api/routes/visa-sponsors.test.ts index af9fdba..6579e28 100644 --- a/orchestrator/src/server/api/routes/visa-sponsors.test.ts +++ b/orchestrator/src/server/api/routes/visa-sponsors.test.ts @@ -20,36 +20,136 @@ describe.sequential("Visa sponsors API routes", () => { const { getStatus, downloadLatestCsv } = await import( "@server/services/visa-sponsors/index" ); - vi.mocked(getStatus).mockReturnValue({ - lastUpdated: null, - csvPath: null, - totalSponsors: 0, - isUpdating: false, - nextScheduledUpdate: null, - error: null, + vi.mocked(getStatus).mockResolvedValue({ + providers: [ + { + providerId: "uk", + countryKey: "united kingdom", + lastUpdated: null, + csvPath: null, + totalSponsors: 0, + isUpdating: false, + nextScheduledUpdate: null, + error: null, + }, + ], }); vi.mocked(downloadLatestCsv).mockResolvedValue({ success: false, message: "failed", + code: "ALL_PROVIDER_UPDATES_FAILED", }); const statusRes = await fetch(`${baseUrl}/api/visa-sponsors/status`); const statusBody = await statusRes.json(); expect(statusBody.ok).toBe(true); - expect(statusBody.data.totalSponsors).toBe(0); + expect(typeof statusBody.meta.requestId).toBe("string"); + expect(statusBody.data.providers).toHaveLength(1); + expect(statusBody.data.providers[0].totalSponsors).toBe(0); const updateRes = await fetch(`${baseUrl}/api/visa-sponsors/update`, { method: "POST", }); expect(updateRes.status).toBe(500); + const updateBody = await updateRes.json(); + expect(updateBody.ok).toBe(false); + expect(updateBody.error.code).toBe("INTERNAL_ERROR"); + expect(typeof updateBody.meta.requestId).toBe("string"); + }); + + it("returns service unavailable when no visa sponsor providers are registered", async () => { + const { downloadLatestCsv } = await import( + "@server/services/visa-sponsors/index" + ); + vi.mocked(downloadLatestCsv).mockResolvedValue({ + success: false, + message: "No providers registered", + code: "NO_PROVIDERS_REGISTERED", + }); + + const res = await fetch(`${baseUrl}/api/visa-sponsors/update`, { + method: "POST", + headers: { "x-request-id": "req-visa-sponsors-empty" }, + }); + const body = await res.json(); + + expect(res.status).toBe(503); + expect(res.headers.get("x-request-id")).toBe("req-visa-sponsors-empty"); + expect(body.ok).toBe(false); + expect(body.error.code).toBe("SERVICE_UNAVAILABLE"); + expect(body.meta.requestId).toBe("req-visa-sponsors-empty"); + }); + + it("updates an individual provider and returns its refreshed status", async () => { + const { downloadLatestCsv, getStatus } = await import( + "@server/services/visa-sponsors/index" + ); + vi.mocked(downloadLatestCsv).mockResolvedValue({ + success: true, + message: "Updated 1/1 providers", + }); + vi.mocked(getStatus).mockResolvedValue({ + providers: [ + { + providerId: "uk", + countryKey: "united kingdom", + lastUpdated: "2026-03-09T12:00:00.000Z", + csvPath: "/tmp/uk/visa_sponsors_2026-03-09.csv", + totalSponsors: 123, + isUpdating: false, + nextScheduledUpdate: "2026-03-10T02:00:00.000Z", + error: null, + }, + ], + }); + + const res = await fetch(`${baseUrl}/api/visa-sponsors/update/uk`, { + method: "POST", + headers: { "x-request-id": "req-visa-sponsors-uk" }, + }); + const body = await res.json(); + + expect(res.status).toBe(200); + expect(res.headers.get("x-request-id")).toBe("req-visa-sponsors-uk"); + expect(vi.mocked(downloadLatestCsv)).toHaveBeenCalledWith("uk"); + expect(body.ok).toBe(true); + expect(body.data.message).toBe("Updated 1/1 providers"); + expect(body.data.status.providers).toHaveLength(1); + expect(body.meta.requestId).toBe("req-visa-sponsors-uk"); + }); + + it("returns not found when updating an unknown provider", async () => { + const { downloadLatestCsv } = await import( + "@server/services/visa-sponsors/index" + ); + vi.mocked(downloadLatestCsv).mockResolvedValue({ + success: false, + message: "Provider 'au' not found", + code: "PROVIDER_NOT_FOUND", + }); + + const res = await fetch(`${baseUrl}/api/visa-sponsors/update/au`, { + method: "POST", + headers: { "x-request-id": "req-visa-sponsors-au" }, + }); + const body = await res.json(); + + expect(res.status).toBe(404); + expect(res.headers.get("x-request-id")).toBe("req-visa-sponsors-au"); + expect(body.ok).toBe(false); + expect(body.error.code).toBe("NOT_FOUND"); + expect(body.error.message).toBe("Provider 'au' not found"); + expect(body.meta.requestId).toBe("req-visa-sponsors-au"); }); it("validates search payloads and handles missing organizations", async () => { const { searchSponsors, getOrganizationDetails } = await import( "@server/services/visa-sponsors/index" ); - vi.mocked(searchSponsors).mockReturnValue([ + vi.mocked(searchSponsors).mockResolvedValue([ { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "Acme", townCity: "London", @@ -61,7 +161,7 @@ describe.sequential("Visa sponsors API routes", () => { matchedName: "acme", }, ]); - vi.mocked(getOrganizationDetails).mockReturnValue([]); + vi.mocked(getOrganizationDetails).mockResolvedValue([]); const badRes = await fetch(`${baseUrl}/api/visa-sponsors/search`, { method: "POST", @@ -77,11 +177,36 @@ describe.sequential("Visa sponsors API routes", () => { }); const body = await res.json(); expect(body.ok).toBe(true); + expect(typeof body.meta.requestId).toBe("string"); expect(body.data.total).toBe(1); const orgRes = await fetch( - `${baseUrl}/api/visa-sponsors/organization/Acme`, + `${baseUrl}/api/visa-sponsors/organization/Acme?providerId=uk`, ); expect(orgRes.status).toBe(404); }); + + it("rejects invalid provider ids before organization lookup", async () => { + const { getOrganizationDetails } = await import( + "@server/services/visa-sponsors/index" + ); + + const res = await fetch( + `${baseUrl}/api/visa-sponsors/organization/Acme?providerId=../secrets`, + { + headers: { "x-request-id": "req-visa-sponsors-invalid-provider" }, + }, + ); + const body = await res.json(); + + expect(res.status).toBe(400); + expect(res.headers.get("x-request-id")).toBe( + "req-visa-sponsors-invalid-provider", + ); + expect(body.ok).toBe(false); + expect(body.error.code).toBe("INVALID_REQUEST"); + expect(body.error.message).toBe("Unknown provider '../secrets'"); + expect(body.meta.requestId).toBe("req-visa-sponsors-invalid-provider"); + expect(vi.mocked(getOrganizationDetails)).not.toHaveBeenCalled(); + }); }); diff --git a/orchestrator/src/server/api/routes/visa-sponsors.ts b/orchestrator/src/server/api/routes/visa-sponsors.ts index 1e1f01d..c7a23ca 100644 --- a/orchestrator/src/server/api/routes/visa-sponsors.ts +++ b/orchestrator/src/server/api/routes/visa-sponsors.ts @@ -1,66 +1,69 @@ -import { notFound } from "@infra/errors"; -import { fail } from "@infra/http"; +import { + badRequest, + notFound, + serviceUnavailable, + toAppError, +} from "@infra/errors"; +import { fail, ok } from "@infra/http"; import * as visaSponsors from "@server/services/visa-sponsors/index"; +import { getVisaSponsorProviderRegistry } from "@server/services/visa-sponsors/providers/registry"; +import { normalizeCountryKey } from "@shared/location-support.js"; import type { - ApiResponse, VisaSponsorSearchResponse, VisaSponsorStatusResponse, } from "@shared/types"; +import { isVisaSponsorProviderId } from "@shared/visa-sponsor-providers"; import { type Request, type Response, Router } from "express"; import { z } from "zod"; export const visaSponsorsRouter = Router(); /** - * GET /api/visa-sponsors/status - Get status of the visa sponsor service + * GET /api/visa-sponsors/status - Get status of all registered providers */ visaSponsorsRouter.get("/status", async (_req: Request, res: Response) => { try { - const status = visaSponsors.getStatus(); - const response: ApiResponse = { - ok: true, - data: status, - }; - res.json(response); + const status = await visaSponsors.getStatus(); + ok(res, status); } catch (error) { - const message = error instanceof Error ? error.message : "Unknown error"; - res.status(500).json({ success: false, error: message }); + fail(res, toAppError(error)); } }); /** * POST /api/visa-sponsors/search - Search for visa sponsors + * Optional `country` field restricts results to a specific provider. */ const visaSponsorSearchSchema = z.object({ query: z.string().min(1), limit: z.number().int().min(1).max(200).optional(), minScore: z.number().int().min(0).max(100).optional(), + country: z.string().optional(), }); visaSponsorsRouter.post("/search", async (req: Request, res: Response) => { try { const input = visaSponsorSearchSchema.parse(req.body); + const countryKey = input.country + ? normalizeCountryKey(input.country) + : undefined; - const results = visaSponsors.searchSponsors(input.query, { + const results = await visaSponsors.searchSponsors(input.query, { limit: input.limit, minScore: input.minScore, + countryKey, }); - const response: ApiResponse = { - ok: true, - data: { - results, - query: input.query, - total: results.length, - }, - }; - res.json(response); + ok(res, { + results, + query: input.query, + total: results.length, + }); } catch (error) { if (error instanceof z.ZodError) { - return res.status(400).json({ success: false, error: error.message }); + return fail(res, badRequest(error.message, error.flatten())); } - const message = error instanceof Error ? error.message : "Unknown error"; - res.status(500).json({ success: false, error: message }); + fail(res, toAppError(error)); } }); @@ -71,44 +74,106 @@ visaSponsorsRouter.get( "/organization/:name", async (req: Request, res: Response) => { try { - const name = decodeURIComponent(req.params.name); - const entries = visaSponsors.getOrganizationDetails(name); + const name = req.params.name; + const providerId = + typeof req.query.providerId === "string" + ? req.query.providerId + : undefined; + + if (providerId) { + if (!isVisaSponsorProviderId(providerId)) { + return fail(res, badRequest(`Unknown provider '${providerId}'`)); + } + + const registry = await getVisaSponsorProviderRegistry(); + if (!registry.manifests.has(providerId)) { + return fail(res, notFound(`Provider '${providerId}' not found`)); + } + } + + const entries = await visaSponsors.getOrganizationDetails( + name, + providerId, + ); if (entries.length === 0) { return fail(res, notFound("Organization not found")); } - res.json({ - success: true, - data: entries, - }); + ok(res, entries); } catch (error) { - const message = error instanceof Error ? error.message : "Unknown error"; - res.status(500).json({ success: false, error: message }); + fail(res, toAppError(error)); } }, ); /** - * POST /api/visa-sponsors/update - Trigger a manual update of the visa sponsor list + * POST /api/visa-sponsors/update - Trigger a manual update for all providers */ visaSponsorsRouter.post("/update", async (_req: Request, res: Response) => { try { const result = await visaSponsors.downloadLatestCsv(); if (!result.success) { - return res.status(500).json({ success: false, error: result.message }); + return fail( + res, + result.code === "NO_PROVIDERS_REGISTERED" + ? serviceUnavailable(result.message) + : toAppError(new Error(result.message)), + ); } - res.json({ - success: true, - data: { - message: result.message, - status: visaSponsors.getStatus(), - }, + ok(res, { + message: result.message, + status: await visaSponsors.getStatus(), }); } catch (error) { - const message = error instanceof Error ? error.message : "Unknown error"; - res.status(500).json({ success: false, error: message }); + fail(res, toAppError(error)); } }); + +function mapUpdateProviderError(message: string) { + return toAppError(new Error(message)); +} + +function mapUpdateProviderErrorCode(input: { code?: string; message: string }) { + if (input.code === "PROVIDER_NOT_FOUND") { + return notFound(input.message); + } + + if (input.code === "NO_PROVIDERS_REGISTERED") { + return serviceUnavailable(input.message); + } + + return mapUpdateProviderError(input.message); +} + +/** + * POST /api/visa-sponsors/update/:providerId - Trigger a manual update for a specific provider + */ +visaSponsorsRouter.post( + "/update/:providerId", + async (req: Request, res: Response) => { + try { + const { providerId } = req.params; + const result = await visaSponsors.downloadLatestCsv(providerId); + + if (!result.success) { + return fail( + res, + mapUpdateProviderErrorCode({ + code: result.code, + message: result.message, + }), + ); + } + + ok(res, { + message: result.message, + status: await visaSponsors.getStatus(), + }); + } catch (error) { + fail(res, toAppError(error)); + } + }, +); diff --git a/orchestrator/src/server/app.ts b/orchestrator/src/server/app.ts index d244632..506ccbe 100644 --- a/orchestrator/src/server/app.ts +++ b/orchestrator/src/server/app.ts @@ -24,7 +24,7 @@ import { resolveTracerRedirect } from "./services/tracer-links"; const __dirname = dirname(fileURLToPath(import.meta.url)); -function createBasicAuthGuard() { +export function createBasicAuthGuard() { function getAuthConfig() { const user = process.env.BASIC_AUTH_USER || ""; const pass = process.env.BASIC_AUTH_PASSWORD || ""; diff --git a/orchestrator/src/server/basic-auth.test.ts b/orchestrator/src/server/basic-auth.test.ts index face46b..1d9740f 100644 --- a/orchestrator/src/server/basic-auth.test.ts +++ b/orchestrator/src/server/basic-auth.test.ts @@ -1,9 +1,6 @@ -import { mkdtemp, rm } from "node:fs/promises"; -import type { Server } from "node:http"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { afterEach, beforeEach, describe, expect, it } from "vitest"; -import { createApp } from "./app"; +import type { NextFunction, Request, Response } from "express"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { createBasicAuthGuard } from "./app"; const originalEnv = { ...process.env }; @@ -12,112 +9,126 @@ function buildAuthHeader(user: string, pass: string): string { return `Basic ${token}`; } -async function startServer(): Promise<{ server: Server; baseUrl: string }> { - const app = createApp(); - const server = app.listen(0); - await new Promise((resolve) => - server.once("listening", () => resolve()), - ); - const address = server.address(); - if (!address || typeof address === "string") { - throw new Error("Failed to resolve server address"); - } - return { server, baseUrl: `http://127.0.0.1:${address.port}` }; +function createMockRequest(input: { + method: string; + path: string; + authorization?: string; +}): Request { + return { + method: input.method, + path: input.path, + headers: input.authorization ? { authorization: input.authorization } : {}, + } as Request; +} + +function createMockResponse(): Response & { + statusCode: number; + jsonBody: unknown; +} { + return { + statusCode: 200, + jsonBody: null, + getHeader: vi.fn(() => undefined), + setHeader: vi.fn(), + status: vi.fn(function status( + this: Response & { statusCode: number }, + code: number, + ) { + this.statusCode = code; + return this; + }), + json: vi.fn(function json( + this: Response & { jsonBody: unknown }, + body: unknown, + ) { + this.jsonBody = body; + return this; + }), + } as unknown as Response & { statusCode: number; jsonBody: unknown }; } describe.sequential("Basic Auth read-only enforcement", () => { - let server: Server | null = null; - let baseUrl = ""; - let tempDir = ""; - - beforeEach(async () => { - tempDir = await mkdtemp(join(tmpdir(), "job-ops-auth-test-")); - process.env.DATA_DIR = tempDir; - process.env.NODE_ENV = "test"; - }); - - afterEach(async () => { - if (server) { - await new Promise((resolve) => server?.close(() => resolve())); - server = null; - } - if (tempDir) { - await rm(tempDir, { recursive: true, force: true }); - tempDir = ""; - } + afterEach(() => { process.env = { ...originalEnv }; }); - it("allows read-only GETs without auth when Basic Auth is enabled", async () => { + it("allows read-only GETs without auth when Basic Auth is enabled", () => { process.env.BASIC_AUTH_USER = "user"; process.env.BASIC_AUTH_PASSWORD = "pass"; - ({ server, baseUrl } = await startServer()); + const { middleware } = createBasicAuthGuard(); + const req = createMockRequest({ method: "GET", path: "/health" }); + const res = createMockResponse(); + const next = vi.fn() as NextFunction; - const healthRes = await fetch(`${baseUrl}/health`); - expect(healthRes.status).toBe(200); + middleware(req, res, next); - const pdfRes = await fetch(`${baseUrl}/pdfs/does-not-exist.pdf`); - expect(pdfRes.status).toBe(404); + expect(next).toHaveBeenCalledOnce(); + expect(res.status).not.toHaveBeenCalled(); }); - it("blocks POST/PATCH/DELETE without auth when Basic Auth is enabled", async () => { + it("blocks POST/PATCH/DELETE without auth when Basic Auth is enabled", () => { process.env.BASIC_AUTH_USER = "user"; process.env.BASIC_AUTH_PASSWORD = "pass"; - ({ server, baseUrl } = await startServer()); + const { middleware } = createBasicAuthGuard(); - const postRes = await fetch(`${baseUrl}/api/jobs/actions`, { + for (const request of [ + createMockRequest({ method: "POST", path: "/api/jobs/actions" }), + createMockRequest({ method: "PATCH", path: "/api/jobs/123" }), + createMockRequest({ method: "DELETE", path: "/api/jobs/status/skipped" }), + ]) { + const res = createMockResponse(); + const next = vi.fn() as NextFunction; + + middleware(request, res, next); + + expect(next).not.toHaveBeenCalled(); + expect(res.statusCode).toBe(401); + expect(res.jsonBody).toMatchObject({ + ok: false, + error: { + code: "UNAUTHORIZED", + message: "Authentication required", + }, + }); + } + }); + + it("allows writes with valid Basic Auth when enabled", () => { + process.env.BASIC_AUTH_USER = "user"; + process.env.BASIC_AUTH_PASSWORD = "pass"; + + const { middleware } = createBasicAuthGuard(); + const req = createMockRequest({ method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ action: "skip", jobIds: ["123"] }), + path: "/api/jobs/actions", + authorization: buildAuthHeader("user", "pass"), }); - expect(postRes.status).toBe(401); - expect(postRes.headers.get("www-authenticate")).toBeNull(); + const res = createMockResponse(); + const next = vi.fn() as NextFunction; - const patchRes = await fetch(`${baseUrl}/api/jobs/123`, { - method: "PATCH", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ status: "ready" }), - }); - expect(patchRes.status).toBe(401); + middleware(req, res, next); - const deleteRes = await fetch(`${baseUrl}/api/jobs/status/skipped`, { - method: "DELETE", - }); - expect(deleteRes.status).toBe(401); + expect(next).toHaveBeenCalledOnce(); + expect(res.status).not.toHaveBeenCalled(); }); - it("allows writes with valid Basic Auth when enabled", async () => { - process.env.BASIC_AUTH_USER = "user"; - process.env.BASIC_AUTH_PASSWORD = "pass"; - - ({ server, baseUrl } = await startServer()); - - const authHeader = buildAuthHeader("user", "pass"); - const res = await fetch(`${baseUrl}/api/jobs/actions`, { - method: "POST", - headers: { - Authorization: authHeader, - "Content-Type": "application/json", - }, - body: JSON.stringify({ action: "skip", jobIds: ["123"] }), - }); - - expect(res.status).not.toBe(401); - }); - - it("does not require auth when Basic Auth is disabled", async () => { + it("does not require auth when Basic Auth is disabled", () => { delete process.env.BASIC_AUTH_USER; delete process.env.BASIC_AUTH_PASSWORD; - ({ server, baseUrl } = await startServer()); - - const res = await fetch(`${baseUrl}/api/jobs/actions`, { + const { middleware } = createBasicAuthGuard(); + const req = createMockRequest({ method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ action: "skip", jobIds: ["123"] }), + path: "/api/jobs/actions", }); - expect(res.status).not.toBe(401); + const res = createMockResponse(); + const next = vi.fn() as NextFunction; + + middleware(req, res, next); + + expect(next).toHaveBeenCalledOnce(); + expect(res.status).not.toHaveBeenCalled(); }); }); diff --git a/orchestrator/src/server/pipeline/sponsor-matching.test.ts b/orchestrator/src/server/pipeline/sponsor-matching.test.ts index 911a7cb..057f645 100644 --- a/orchestrator/src/server/pipeline/sponsor-matching.test.ts +++ b/orchestrator/src/server/pipeline/sponsor-matching.test.ts @@ -117,8 +117,10 @@ describe("Sponsor Match Calculation", () => { getUnscoredDiscoveredJobs.mockResolvedValue([mockJob]); // Mock sponsor search returning a match - searchSponsors.mockReturnValue([ + searchSponsors.mockResolvedValue([ { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "ACME CORPORATION LIMITED" }, score: 85, matchedName: "acme corporation", @@ -152,18 +154,24 @@ describe("Sponsor Match Calculation", () => { getUnscoredDiscoveredJobs.mockResolvedValue([mockJob]); // Mock sponsor search returning perfect matches - searchSponsors.mockReturnValue([ + searchSponsors.mockResolvedValue([ { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "MICROSOFT UK LIMITED" }, score: 100, matchedName: "microsoft uk", }, { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "MICROSOFT UK LTD" }, score: 100, matchedName: "microsoft uk", }, { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "MICROSOFT LIMITED" }, score: 80, matchedName: "microsoft", @@ -191,13 +199,17 @@ describe("Sponsor Match Calculation", () => { getUnscoredDiscoveredJobs.mockResolvedValue([mockJob]); // Mock sponsor search returning partial matches only - searchSponsors.mockReturnValue([ + searchSponsors.mockResolvedValue([ { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "TECH CORPORATION" }, score: 75, matchedName: "tech corporation", }, { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "TECHNO CORP" }, score: 60, matchedName: "techno corp", @@ -222,7 +234,7 @@ describe("Sponsor Match Calculation", () => { getUnscoredDiscoveredJobs.mockResolvedValue([mockJob]); // Mock sponsor search returning no matches - searchSponsors.mockReturnValue([]); + searchSponsors.mockResolvedValue([]); const { runPipeline } = await import("./orchestrator"); await runPipeline({ sources: [], enableCrawling: false }); @@ -279,7 +291,7 @@ describe("Sponsor Match Calculation", () => { it("should use correct limit and minScore options", async () => { const mockJob = createJob({ employer: "Test Company" }); getUnscoredDiscoveredJobs.mockResolvedValue([mockJob]); - searchSponsors.mockReturnValue([]); + searchSponsors.mockResolvedValue([]); const { runPipeline } = await import("./orchestrator"); await runPipeline({ sources: [], enableCrawling: false }); @@ -294,8 +306,10 @@ describe("Sponsor Match Calculation", () => { const mockJob = createJob({ employer: "Google UK" }); getUnscoredDiscoveredJobs.mockResolvedValue([mockJob]); - searchSponsors.mockReturnValue([ + searchSponsors.mockResolvedValue([ { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "GOOGLE UK LIMITED" }, score: 100, matchedName: "google uk", @@ -329,15 +343,19 @@ describe("Sponsor Match Calculation", () => { // Different results for each employer searchSponsors - .mockReturnValueOnce([ + .mockResolvedValueOnce([ { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "AMAZON UK SERVICES LTD" }, score: 90, matchedName: "amazon uk", }, ]) - .mockReturnValueOnce([ + .mockResolvedValueOnce([ { + providerId: "uk", + countryKey: "united kingdom", sponsor: { organisationName: "META PLATFORMS IRELAND LIMITED" }, score: 80, matchedName: "meta platforms", diff --git a/orchestrator/src/server/pipeline/steps/score-jobs.test.ts b/orchestrator/src/server/pipeline/steps/score-jobs.test.ts index 1c3bd56..7a3a353 100644 --- a/orchestrator/src/server/pipeline/steps/score-jobs.test.ts +++ b/orchestrator/src/server/pipeline/steps/score-jobs.test.ts @@ -60,7 +60,7 @@ describe("scoreJobsStep auto-skip behavior", () => { score: 40, reason: "Low fit", }); - vi.mocked(visaSponsors.searchSponsors).mockReturnValue([]); + vi.mocked(visaSponsors.searchSponsors).mockResolvedValue([]); vi.mocked(visaSponsors.calculateSponsorMatchSummary).mockReturnValue({ sponsorMatchScore: 0, sponsorMatchNames: null, diff --git a/orchestrator/src/server/pipeline/steps/score-jobs.ts b/orchestrator/src/server/pipeline/steps/score-jobs.ts index d74302f..f6db508 100644 --- a/orchestrator/src/server/pipeline/steps/score-jobs.ts +++ b/orchestrator/src/server/pipeline/steps/score-jobs.ts @@ -70,7 +70,7 @@ export async function scoreJobsStep(args: { let sponsorMatchNames: string | undefined; if (job.employer) { - const sponsorResults = visaSponsors.searchSponsors(job.employer, { + const sponsorResults = await visaSponsors.searchSponsors(job.employer, { limit: 10, minScore: 50, }); diff --git a/orchestrator/src/server/services/visa-sponsors/index.test.ts b/orchestrator/src/server/services/visa-sponsors/index.test.ts index 8852340..7446e22 100644 --- a/orchestrator/src/server/services/visa-sponsors/index.test.ts +++ b/orchestrator/src/server/services/visa-sponsors/index.test.ts @@ -14,11 +14,15 @@ describe("calculateSponsorMatchSummary", () => { it("should report the top match when it is not a perfect match", () => { const results: VisaSponsorSearchResult[] = [ { + providerId: "uk", + countryKey: "united kingdom", score: 85, sponsor: { organisationName: "Tech Corp" } as any, matchedName: "tech corp", }, { + providerId: "uk", + countryKey: "united kingdom", score: 60, sponsor: { organisationName: "Other Ltd" } as any, matchedName: "other", @@ -34,11 +38,15 @@ describe("calculateSponsorMatchSummary", () => { it("should report a single perfect match", () => { const results: VisaSponsorSearchResult[] = [ { + providerId: "uk", + countryKey: "united kingdom", score: 100, sponsor: { organisationName: "Exact Match Ltd" } as any, matchedName: "exact match", }, { + providerId: "uk", + countryKey: "united kingdom", score: 90, sponsor: { organisationName: "Close Match" } as any, matchedName: "close", @@ -54,21 +62,29 @@ describe("calculateSponsorMatchSummary", () => { it("should report exactly two 100% matches when two or more exist", () => { const results: VisaSponsorSearchResult[] = [ { + providerId: "uk", + countryKey: "united kingdom", score: 100, sponsor: { organisationName: "First PerfectMatch" } as any, matchedName: "match", }, { + providerId: "uk", + countryKey: "united kingdom", score: 100, sponsor: { organisationName: "Second PerfectMatch" } as any, matchedName: "match", }, { + providerId: "uk", + countryKey: "united kingdom", score: 100, sponsor: { organisationName: "Third PerfectMatch" } as any, matchedName: "match", }, { + providerId: "uk", + countryKey: "united kingdom", score: 50, sponsor: { organisationName: "Common Co" } as any, matchedName: "common", @@ -88,11 +104,15 @@ describe("calculateSponsorMatchSummary", () => { it("should only report the single top result if no 100% matches exist", () => { const results: VisaSponsorSearchResult[] = [ { + providerId: "uk", + countryKey: "united kingdom", score: 99, sponsor: { organisationName: "Almost Perfect" } as any, matchedName: "almost", }, { + providerId: "uk", + countryKey: "united kingdom", score: 98, sponsor: { organisationName: "Second Best" } as any, matchedName: "best", diff --git a/orchestrator/src/server/services/visa-sponsors/index.ts b/orchestrator/src/server/services/visa-sponsors/index.ts index 6a62683..7a7ce7e 100644 --- a/orchestrator/src/server/services/visa-sponsors/index.ts +++ b/orchestrator/src/server/services/visa-sponsors/index.ts @@ -1,7 +1,11 @@ /** - * UK Visa Sponsors Service + * Visa Sponsors Service * - * Manages downloading, storing, and searching the UK visa sponsor list. + * Multi-provider facade that manages downloading, storing, and searching + * visa sponsor lists across different countries. + * + * Country-specific logic lives in visa-sponsor-providers/{country}/manifest.ts. + * This service handles storage, caching, scheduling, and search — all shared concerns. */ import fs from "node:fs"; @@ -10,22 +14,55 @@ import { getDataDir } from "@server/config/dataDir"; import { createScheduler } from "@server/utils/scheduler"; import type { VisaSponsor, + VisaSponsorProviderManifest, + VisaSponsorProviderStatus, VisaSponsorSearchResult, VisaSponsorStatusResponse, } from "@shared/types"; import { normalizeWhitespace } from "@shared/utils/string"; - -const DATA_DIR = path.join(getDataDir(), "visa-sponsors"); - -// Ensure data directory exists -if (!fs.existsSync(DATA_DIR)) { - fs.mkdirSync(DATA_DIR, { recursive: true }); -} +import { isVisaSponsorProviderId } from "@shared/visa-sponsor-providers"; +import { parseVisaSponsorsCsv } from "@shared/visa-sponsors/csv"; +import { + getVisaSponsorProviderRegistry, + initializeVisaSponsorProviderRegistry, +} from "./providers/registry"; export type { VisaSponsor, VisaSponsorSearchResult }; export type VisaSponsorStatus = VisaSponsorStatusResponse; -// Common company suffixes to strip during comparison +// ============================================================================ +// Per-provider in-memory state +// ============================================================================ + +interface ProviderState { + cache: VisaSponsor[] | null; + cacheLoadedAt: Date | null; + isUpdating: boolean; + updateError: string | null; + scheduler: ReturnType | null; +} + +const providerState = new Map(); + +function getOrCreateProviderState(providerId: string): ProviderState { + let state = providerState.get(providerId); + if (!state) { + state = { + cache: null, + cacheLoadedAt: null, + isUpdating: false, + updateError: null, + scheduler: null, + }; + providerState.set(providerId, state); + } + return state; +} + +// ============================================================================ +// Company name normalization and similarity (shared across all providers) +// ============================================================================ + const COMPANY_SUFFIXES = [ "limited", "ltd", @@ -49,38 +86,16 @@ const COMPANY_SUFFIXES = [ "the", ]; -// Cache for loaded sponsors -let sponsorsCache: VisaSponsor[] | null = null; -let cacheLoadedAt: Date | null = null; -let isUpdating = false; -let updateError: string | null = null; - -/** - * Normalize a company name for comparison (strips suffixes, punctuation, etc.) - */ export function normalizeCompanyName(name: string): string { let normalized = name.toLowerCase().trim(); - - // Remove common punctuation and special chars normalized = normalized.replace(/[.,'"()[\]{}!?@#$%^&*+=|\\/<>:;`~]/g, " "); - - // Remove suffixes for (const suffix of COMPANY_SUFFIXES) { - // Word boundary matching const regex = new RegExp(`\\b${suffix}\\b`, "gi"); normalized = normalized.replace(regex, ""); } - - // Collapse whitespace - normalized = normalizeWhitespace(normalized); - - return normalized; + return normalizeWhitespace(normalized); } -/** - * Calculate similarity score between two strings (0-100) - * Uses Levenshtein distance with some optimizations - */ export function calculateSimilarity(str1: string, str2: string): number { const s1 = str1.toLowerCase(); const s2 = str2.toLowerCase(); @@ -88,130 +103,62 @@ export function calculateSimilarity(str1: string, str2: string): number { if (s1 === s2) return 100; if (s1.length === 0 || s2.length === 0) return 0; - // Check if one contains the other if (s1.includes(s2) || s2.includes(s1)) { const longerLen = Math.max(s1.length, s2.length); const shorterLen = Math.min(s1.length, s2.length); return Math.round((shorterLen / longerLen) * 100); } - // Levenshtein distance const matrix: number[][] = []; - - for (let i = 0; i <= s1.length; i++) { - matrix[i] = [i]; - } - for (let j = 0; j <= s2.length; j++) { - matrix[0][j] = j; - } + for (let i = 0; i <= s1.length; i++) matrix[i] = [i]; + for (let j = 0; j <= s2.length; j++) matrix[0][j] = j; for (let i = 1; i <= s1.length; i++) { for (let j = 1; j <= s2.length; j++) { const cost = s1[i - 1] === s2[j - 1] ? 0 : 1; matrix[i][j] = Math.min( - matrix[i - 1][j] + 1, // deletion - matrix[i][j - 1] + 1, // insertion - matrix[i - 1][j - 1] + cost, // substitution + matrix[i - 1][j] + 1, + matrix[i][j - 1] + 1, + matrix[i - 1][j - 1] + cost, ); } } const distance = matrix[s1.length][s2.length]; const maxLen = Math.max(s1.length, s2.length); - return Math.round(((maxLen - distance) / maxLen) * 100); } -/** - * Parse CSV content into VisaSponsor array - */ -export function parseCsv(content: string): VisaSponsor[] { - const lines = content.split("\n"); - const sponsors: VisaSponsor[] = []; +// ============================================================================ +// CSV parsing (generic 5-column format used for stored files) +// ============================================================================ - // Skip header - for (let i = 1; i < lines.length; i++) { - const line = lines[i].trim(); - if (!line) continue; +export const parseCsv = parseVisaSponsorsCsv; - // Parse CSV with proper quote handling - const fields = parseCSVLine(line); - if (fields.length >= 5) { - sponsors.push({ - organisationName: fields[0] || "", - townCity: fields[1] || "", - county: fields[2] || "", - typeRating: fields[3] || "", - route: fields[4] || "", - }); - } +// ============================================================================ +// Per-provider storage helpers +// ============================================================================ + +function getProviderDataDir(providerId: string): string { + return path.join(getDataDir(), "visa-sponsors", providerId); +} + +function ensureProviderDir(providerId: string): void { + const dir = getProviderDataDir(providerId); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); } - - return sponsors; } -/** - * Parse a single CSV line handling quoted fields - */ -function parseCSVLine(line: string): string[] { - const fields: string[] = []; - let current = ""; - let inQuotes = false; - - for (let i = 0; i < line.length; i++) { - const char = line[i]; - const nextChar = line[i + 1]; - - if (char === '"' && !inQuotes) { - inQuotes = true; - } else if (char === '"' && inQuotes) { - if (nextChar === '"') { - // Escaped quote - current += '"'; - i++; - } else { - inQuotes = false; - } - } else if (char === "," && !inQuotes) { - fields.push(current.trim()); - current = ""; - } else { - current += char; - } - } - - fields.push(current.trim()); - return fields; +function getMetadataPath(providerId: string): string { + return path.join(getProviderDataDir(providerId), "metadata.json"); } -/** - * Get list of CSV files sorted by date (newest first) - */ -function getCsvFiles(): string[] { - if (!fs.existsSync(DATA_DIR)) return []; - - return fs - .readdirSync(DATA_DIR) - .filter((f) => f.endsWith(".csv")) - .sort() - .reverse(); -} - -/** - * Get metadata file path - */ -function getMetadataPath(): string { - return path.join(DATA_DIR, "metadata.json"); -} - -/** - * Read metadata - */ -function readMetadata(): { +function readMetadata(providerId: string): { lastUpdated: string | null; csvFile: string | null; } { - const metaPath = getMetadataPath(); + const metaPath = getMetadataPath(providerId); if (!fs.existsSync(metaPath)) { return { lastUpdated: null, csvFile: null }; } @@ -222,214 +169,304 @@ function readMetadata(): { } } -/** - * Write metadata - */ -function writeMetadata(data: { lastUpdated: string; csvFile: string }): void { - fs.writeFileSync(getMetadataPath(), JSON.stringify(data, null, 2)); +function writeMetadata( + providerId: string, + data: { lastUpdated: string; csvFile: string }, +): void { + fs.writeFileSync(getMetadataPath(providerId), JSON.stringify(data, null, 2)); } -/** - * Clean up old CSV files (keep only 2) - */ -function cleanupOldCsvFiles(): void { - const files = getCsvFiles(); +function getCsvFiles(providerId: string): string[] { + const dir = getProviderDataDir(providerId); + if (!fs.existsSync(dir)) return []; + return fs + .readdirSync(dir) + .filter((f) => f.endsWith(".csv")) + .sort() + .reverse(); +} + +function cleanupOldCsvFiles(providerId: string): void { + const dir = getProviderDataDir(providerId); + const files = getCsvFiles(providerId); if (files.length > 2) { for (const file of files.slice(2)) { - const filePath = path.join(DATA_DIR, file); + const filePath = path.join(dir, file); try { fs.unlinkSync(filePath); - console.log(`🗑️ Removed old visa sponsor CSV: ${file}`); + console.log(`🗑️ Removed old CSV for ${providerId}: ${file}`); } catch (err) { - console.warn(`⚠️ Failed to remove old CSV: ${file}`, err); + console.warn( + `⚠️ Failed to remove old CSV for ${providerId}: ${file}`, + err, + ); } } } } -/** - * Extract the CSV download URL from the gov.uk page - */ -async function extractCsvUrl(): Promise { - const pageUrl = - "https://www.gov.uk/government/publications/register-of-licensed-sponsors-workers"; +// ============================================================================ +// Core per-provider operations +// ============================================================================ - console.log("📄 Fetching gov.uk page to find CSV link..."); - const response = await fetch(pageUrl); +export type VisaSponsorDownloadErrorCode = + | "PROVIDER_NOT_FOUND" + | "NO_PROVIDERS_REGISTERED" + | "UPDATE_IN_PROGRESS" + | "ALL_PROVIDER_UPDATES_FAILED"; - if (!response.ok) { - throw new Error( - `Failed to fetch gov.uk page: ${response.status} ${response.statusText}`, - ); +export type VisaSponsorDownloadResult = + | { success: true; message: string } + | { + success: false; + message: string; + code: VisaSponsorDownloadErrorCode; + }; + +async function downloadLatestDataForProvider( + manifest: VisaSponsorProviderManifest, +): Promise { + const { id } = manifest; + const state = getOrCreateProviderState(id); + + if (state.isUpdating) { + return { + success: false, + message: `Update already in progress for ${id}`, + code: "UPDATE_IN_PROGRESS", + }; } - const html = await response.text(); - - // Look for the Worker and Temporary Worker CSV link - const csvMatch = html.match( - /href="(https:\/\/assets\.publishing\.service\.gov\.uk\/media\/[^"]+Worker_and_Temporary_Worker\.csv)"/, - ); - - if (!csvMatch) { - throw new Error( - "Could not find Worker and Temporary Worker CSV link on gov.uk page", - ); - } - - return csvMatch[1]; -} - -/** - * Download the latest visa sponsor CSV - */ -export async function downloadLatestCsv(): Promise<{ - success: boolean; - message: string; -}> { - if (isUpdating) { - return { success: false, message: "Update already in progress" }; - } - - isUpdating = true; - updateError = null; + state.isUpdating = true; + state.updateError = null; + ensureProviderDir(id); try { - // Extract the CSV URL from the page - const csvUrl = await extractCsvUrl(); - console.log(`📥 Downloading CSV from: ${csvUrl}`); + console.log(`📥 Fetching sponsor data for provider: ${id}`); + const sponsors = await manifest.fetchSponsors(); - const response = await fetch(csvUrl); - - if (!response.ok) { - throw new Error( - `Failed to download CSV: ${response.status} ${response.statusText}`, - ); - } - - const csvContent = await response.text(); - - // Validate CSV has content - const sponsors = parseCsv(csvContent); if (sponsors.length === 0) { - throw new Error("Downloaded CSV appears to be empty or invalid"); + throw new Error(`Provider ${id} returned an empty sponsor list`); } - // Generate filename with date + // Serialise to canonical CSV for storage + const csvContent = [ + "Organisation Name,Town/City,County,Type & Rating,Route", + ...sponsors.map((s) => + [s.organisationName, s.townCity, s.county, s.typeRating, s.route] + .map((f) => `"${f.replace(/"/g, '""')}"`) + .join(","), + ), + ].join("\n"); + const dateStr = new Date().toISOString().split("T")[0]; const filename = `visa_sponsors_${dateStr}.csv`; - const filepath = path.join(DATA_DIR, filename); + const dir = getProviderDataDir(id); + fs.writeFileSync(path.join(dir, filename), csvContent); - // Save the CSV - fs.writeFileSync(filepath, csvContent); - - // Update metadata - writeMetadata({ + writeMetadata(id, { lastUpdated: new Date().toISOString(), csvFile: filename, }); + cleanupOldCsvFiles(id); - // Cleanup old files - cleanupOldCsvFiles(); - - // Clear cache so next search loads new data - sponsorsCache = null; - cacheLoadedAt = null; - - console.log(`✅ Downloaded visa sponsor list: ${sponsors.length} sponsors`); + // Bust cache + state.cache = null; + state.cacheLoadedAt = null; + console.log( + `✅ Downloaded ${sponsors.length} sponsors for provider: ${id}`, + ); return { success: true, message: `Successfully downloaded ${sponsors.length} sponsors`, }; } catch (error) { const message = error instanceof Error ? error.message : "Unknown error"; - updateError = message; - console.error("❌ Failed to download visa sponsor list:", message); - return { success: false, message }; + state.updateError = message; + console.error( + `❌ Failed to download sponsors for provider ${id}:`, + message, + ); + return { + success: false, + message, + code: "ALL_PROVIDER_UPDATES_FAILED", + }; } finally { - isUpdating = false; + state.isUpdating = false; } } -/** - * Load sponsors from the latest CSV file - */ -export function loadSponsors(): VisaSponsor[] { - // Return cache if valid (less than 1 hour old) - if (sponsorsCache && cacheLoadedAt) { - const cacheAge = Date.now() - cacheLoadedAt.getTime(); - if (cacheAge < 60 * 60 * 1000) { - return sponsorsCache; +function loadSponsorsForProvider(providerId: string): VisaSponsor[] { + const state = getOrCreateProviderState(providerId); + + // Return valid cache (< 1 hour old) + if (state.cache && state.cacheLoadedAt) { + if (Date.now() - state.cacheLoadedAt.getTime() < 60 * 60 * 1000) { + return state.cache; } } - const metadata = readMetadata(); - if (!metadata.csvFile) { - return []; - } + const metadata = readMetadata(providerId); + if (!metadata.csvFile) return []; - const csvPath = path.join(DATA_DIR, metadata.csvFile); - if (!fs.existsSync(csvPath)) { - return []; - } + const csvPath = path.join(getProviderDataDir(providerId), metadata.csvFile); + if (!fs.existsSync(csvPath)) return []; try { const content = fs.readFileSync(csvPath, "utf-8"); - sponsorsCache = parseCsv(content); - cacheLoadedAt = new Date(); - return sponsorsCache; + const sponsors = parseCsv(content); + state.cache = sponsors; + state.cacheLoadedAt = new Date(); + return sponsors; } catch (error) { - console.error("Failed to load sponsors:", error); + console.error(`Failed to load sponsors for provider ${providerId}:`, error); return []; } } -/** - * Search for sponsors by company name - */ -export function searchSponsors( - query: string, - options: { limit?: number; minScore?: number } = {}, -): VisaSponsorSearchResult[] { - const { limit = 50, minScore = 30 } = options; - - const sponsors = loadSponsors(); - if (sponsors.length === 0 || !query.trim()) { - return []; +async function getRegisteredProviderManifest( + providerId: string, +): Promise { + if (!isVisaSponsorProviderId(providerId)) { + return null; } + const reg = await getVisaSponsorProviderRegistry(); + return reg.manifests.get(providerId) ?? null; +} + +// ============================================================================ +// Public API +// These entry points are async and preserve the legacy responsibilities +// (download, search, status, load) while operating across multiple providers. +// ============================================================================ + +/** + * Download the latest sponsor data. + * If providerId is omitted, updates all registered providers. + */ +export async function downloadLatestCsv( + providerId?: string, +): Promise { + const reg = await getVisaSponsorProviderRegistry(); + const validatedProvider = providerId + ? await getRegisteredProviderManifest(providerId) + : null; + + const manifests = providerId + ? ([validatedProvider].filter(Boolean) as VisaSponsorProviderManifest[]) + : [...reg.manifests.values()]; + + if (manifests.length === 0) { + return { + success: false, + message: providerId + ? `Provider '${providerId}' not found` + : "No providers registered", + code: providerId ? "PROVIDER_NOT_FOUND" : "NO_PROVIDERS_REGISTERED", + }; + } + + const results = await Promise.allSettled( + manifests.map((m) => downloadLatestDataForProvider(m)), + ); + + const failures = results.filter( + (r) => + r.status === "rejected" || (r.status === "fulfilled" && !r.value.success), + ); + + if (failures.length === manifests.length) { + const firstFailure = failures[0]; + if (firstFailure?.status === "fulfilled") { + return firstFailure.value; + } + return { + success: false, + message: "All provider updates failed", + code: "ALL_PROVIDER_UPDATES_FAILED", + }; + } + + const succeeded = manifests.length - failures.length; + return { + success: true, + message: `Updated ${succeeded}/${manifests.length} providers`, + }; +} + +/** + * Load sponsors across all registered providers, optionally filtered by countryKey. + */ +async function loadAllSponsors(countryKey?: string): Promise< + { + providerId: VisaSponsorProviderManifest["id"]; + countryKey: string; + sponsors: VisaSponsor[]; + }[] +> { + const reg = await getVisaSponsorProviderRegistry(); + const manifests = countryKey + ? ([reg.manifestByCountryKey.get(countryKey)].filter( + Boolean, + ) as VisaSponsorProviderManifest[]) + : [...reg.manifests.values()]; + + return manifests.map((m) => ({ + providerId: m.id, + countryKey: m.countryKey, + sponsors: loadSponsorsForProvider(m.id), + })); +} + +/** + * Search for sponsors by company name. + * Pass countryKey to restrict to a specific provider; omit to search all. + */ +export async function searchSponsors( + query: string, + options: { limit?: number; minScore?: number; countryKey?: string } = {}, +): Promise { + const { limit = 50, minScore = 30, countryKey } = options; + + if (!query.trim()) return []; + + const providerData = await loadAllSponsors(countryKey); const normalizedQuery = normalizeCompanyName(query); const results: VisaSponsorSearchResult[] = []; - const seen = new Set(); // Dedupe by org name + const seen = new Set(); - for (const sponsor of sponsors) { - // Skip if we've already seen this org name - if (seen.has(sponsor.organisationName)) continue; - seen.add(sponsor.organisationName); + for (const { + providerId, + countryKey: providerCountryKey, + sponsors, + } of providerData) { + for (const sponsor of sponsors) { + const dedupeKey = `${providerId}::${sponsor.organisationName}`; + if (seen.has(dedupeKey)) continue; + seen.add(dedupeKey); - const normalizedSponsor = normalizeCompanyName(sponsor.organisationName); + const normalizedSponsor = normalizeCompanyName(sponsor.organisationName); + const score = calculateSimilarity(normalizedQuery, normalizedSponsor); - // Calculate similarity - const score = calculateSimilarity(normalizedQuery, normalizedSponsor); - - if (score >= minScore) { - results.push({ - sponsor, - score, - matchedName: normalizedSponsor, - }); + if (score >= minScore) { + results.push({ + providerId, + countryKey: providerCountryKey, + sponsor, + score, + matchedName: normalizedSponsor, + }); + } } } - // Sort by score descending results.sort((a, b) => b.score - a.score); - return results.slice(0, limit); } -/** - * Calculate match summary from search results - */ export function calculateSponsorMatchSummary( results: VisaSponsorSearchResult[], ): { sponsorMatchScore: number; sponsorMatchNames: string | null } { @@ -438,7 +475,6 @@ export function calculateSponsorMatchSummary( } const topScore = results[0].score; - // Get all 100% matches, or just the top match const perfectMatches = results.filter((r) => r.score === 100); const matchesToReport = perfectMatches.length >= 2 ? perfectMatches.slice(0, 2) : [results[0]]; @@ -451,78 +487,93 @@ export function calculateSponsorMatchSummary( }; } -/** - * Get status of the visa sponsor service - */ -export function getStatus(): VisaSponsorStatus { - const metadata = readMetadata(); - const sponsors = loadSponsors(); +export async function getStatus(): Promise { + const reg = await getVisaSponsorProviderRegistry(); - return { - lastUpdated: metadata.lastUpdated, - csvPath: metadata.csvFile ? path.join(DATA_DIR, metadata.csvFile) : null, - totalSponsors: sponsors.length, - isUpdating, - nextScheduledUpdate: getNextScheduledUpdate(), - error: updateError, - }; + const providers: VisaSponsorProviderStatus[] = [ + ...reg.manifests.values(), + ].map((manifest) => { + const state = getOrCreateProviderState(manifest.id); + const metadata = readMetadata(manifest.id); + const dir = getProviderDataDir(manifest.id); + const sponsors = loadSponsorsForProvider(manifest.id); + + return { + providerId: manifest.id, + countryKey: manifest.countryKey, + lastUpdated: metadata.lastUpdated, + csvPath: metadata.csvFile ? path.join(dir, metadata.csvFile) : null, + totalSponsors: sponsors.length, + isUpdating: state.isUpdating, + nextScheduledUpdate: state.scheduler?.getNextRun() ?? null, + error: state.updateError, + }; + }); + + return { providers }; } -/** - * Get all entries for a specific organization (they may have multiple routes) - */ -export function getOrganizationDetails( +export async function getOrganizationDetails( organisationName: string, -): VisaSponsor[] { - const sponsors = loadSponsors(); - return sponsors.filter((s) => s.organisationName === organisationName); + providerId?: string, +): Promise { + const validatedProvider = providerId + ? await getRegisteredProviderManifest(providerId) + : null; + const providerData = providerId + ? [ + { + providerId: validatedProvider?.id ?? providerId, + countryKey: validatedProvider?.countryKey ?? "", + sponsors: validatedProvider + ? loadSponsorsForProvider(validatedProvider.id) + : [], + }, + ] + : await loadAllSponsors(); + return providerData + .flatMap(({ sponsors }) => sponsors) + .filter((s) => s.organisationName === organisationName); +} + +/** + * Load sponsors from the latest CSV file (kept for backwards compatibility). + * Returns all sponsors across all providers. + */ +export async function loadSponsors(): Promise { + const providerData = await loadAllSponsors(); + return providerData.flatMap(({ sponsors }) => sponsors); } // ============================================================================ -// Scheduled Updates (Cron-style) - Uses shared scheduler utility +// Initialization // ============================================================================ -const scheduler = createScheduler("visa-sponsors", async () => { - await downloadLatestCsv(); -}); - -/** - * Get the next scheduled update time as ISO string - */ -export function getNextScheduledUpdate(): string | null { - return scheduler.getNextRun(); -} - -/** - * Start the scheduler - */ -export function startScheduler(hour = 2): void { - scheduler.start(hour); -} - -/** - * Stop the scheduler - */ -export function stopScheduler(): void { - scheduler.stop(); -} - -/** - * Initialize the service (download if no data exists) - */ export async function initialize(): Promise { - const metadata = readMetadata(); + const reg = await initializeVisaSponsorProviderRegistry(); - if (!metadata.csvFile) { - console.log("📥 No visa sponsor data found, downloading..."); - await downloadLatestCsv(); - } else { - const sponsors = loadSponsors(); - console.log( - `✅ Visa sponsor service initialized with ${sponsors.length} sponsors`, - ); + for (const manifest of reg.manifests.values()) { + ensureProviderDir(manifest.id); + const metadata = readMetadata(manifest.id); + + if (!metadata.csvFile) { + console.log( + `📥 No data found for provider ${manifest.id}, downloading...`, + ); + await downloadLatestDataForProvider(manifest); + } else { + const sponsors = loadSponsorsForProvider(manifest.id); + console.log( + `✅ Provider ${manifest.id} initialized with ${sponsors.length} sponsors`, + ); + } + + // Start per-provider scheduler + const state = getOrCreateProviderState(manifest.id); + const schedulerName = `visa-sponsors-${manifest.id}`; + state.scheduler = createScheduler(schedulerName, async () => { + await downloadLatestDataForProvider(manifest); + }); + state.scheduler.start(manifest.scheduledUpdateHour ?? 2); } - - // Start the scheduler for automatic daily updates at 2 AM - startScheduler(2); } diff --git a/orchestrator/src/server/services/visa-sponsors/providers/discovery.test.ts b/orchestrator/src/server/services/visa-sponsors/providers/discovery.test.ts new file mode 100644 index 0000000..5187fe7 --- /dev/null +++ b/orchestrator/src/server/services/visa-sponsors/providers/discovery.test.ts @@ -0,0 +1,120 @@ +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { + discoverProviderManifestPaths, + loadProviderManifestFromFile, +} from "./discovery"; + +const tempRoots: string[] = []; +const originalCwd = process.cwd(); + +async function makeTempRepoRoot(): Promise { + const testTmpBase = join(originalCwd, "orchestrator", ".tmp"); + await mkdir(testTmpBase, { recursive: true }); + const tempDir = await mkdtemp(join(testTmpBase, "visa-sponsor-discovery-")); + tempRoots.push(tempDir); + return tempDir; +} + +afterEach(async () => { + process.chdir(originalCwd); + for (const root of tempRoots.splice(0)) { + await rm(root, { recursive: true, force: true }); + } +}); + +describe("visa sponsor provider discovery", () => { + it("finds provider manifests in the repo-local providers directory", async () => { + const repoRoot = await makeTempRepoRoot(); + const providersRoot = join(repoRoot, "visa-sponsor-providers"); + await mkdir(join(providersRoot, "uk"), { recursive: true }); + await writeFile( + join(providersRoot, "uk", "manifest.ts"), + [ + "export const manifest = {", + " id: 'uk',", + " displayName: 'United Kingdom',", + " countryKey: 'united kingdom',", + " async fetchSponsors() {", + " return [];", + " },", + "};", + ].join("\n"), + "utf8", + ); + + process.chdir(repoRoot); + + await expect(discoverProviderManifestPaths()).resolves.toEqual([ + join(providersRoot, "uk", "manifest.ts"), + ]); + }); + + it("loads provider manifests from named exports", async () => { + const repoRoot = await makeTempRepoRoot(); + const manifestPath = join(repoRoot, "provider-manifest.mjs"); + await writeFile( + manifestPath, + [ + "export const manifest = {", + " id: 'uk',", + " displayName: 'United Kingdom',", + " countryKey: 'united kingdom',", + " async fetchSponsors() {", + " return [];", + " },", + "};", + ].join("\n"), + "utf8", + ); + + const manifest = await loadProviderManifestFromFile(manifestPath); + + expect(manifest.id).toBe("uk"); + expect(manifest.countryKey).toBe("united kingdom"); + }); + + it("loads provider manifests from default exports", async () => { + const repoRoot = await makeTempRepoRoot(); + const manifestPath = join(repoRoot, "provider-manifest-default.mjs"); + await writeFile( + manifestPath, + [ + "export default {", + " id: 'uk',", + " displayName: 'United Kingdom',", + " countryKey: 'united kingdom',", + " async fetchSponsors() {", + " return [];", + " },", + "};", + ].join("\n"), + "utf8", + ); + + const manifest = await loadProviderManifestFromFile(manifestPath); + + expect(manifest.id).toBe("uk"); + expect(manifest.countryKey).toBe("united kingdom"); + }); + + it("rejects invalid manifest export shapes", async () => { + const repoRoot = await makeTempRepoRoot(); + const manifestPath = join(repoRoot, "provider-manifest-invalid.mjs"); + await writeFile( + manifestPath, + [ + "export default {", + " id: 'uk',", + " displayName: 'United Kingdom',", + "};", + ].join("\n"), + "utf8", + ); + + await expect(loadProviderManifestFromFile(manifestPath)).rejects.toThrow( + `Invalid visa sponsor provider manifest in ${manifestPath}`, + ); + }); +}); diff --git a/orchestrator/src/server/services/visa-sponsors/providers/discovery.ts b/orchestrator/src/server/services/visa-sponsors/providers/discovery.ts new file mode 100644 index 0000000..d42a22d --- /dev/null +++ b/orchestrator/src/server/services/visa-sponsors/providers/discovery.ts @@ -0,0 +1,168 @@ +import type { Dirent } from "node:fs"; +import { access, readdir, stat } from "node:fs/promises"; +import { basename, dirname, join, resolve } from "node:path"; +import { fileURLToPath, pathToFileURL } from "node:url"; +import { logger } from "@infra/logger"; +import { sanitizeUnknown } from "@infra/sanitize"; +import type { VisaSponsorProviderManifest } from "@shared/types"; + +const moduleDir = dirname(fileURLToPath(import.meta.url)); + +function getProvidersRootCandidates(): string[] { + return [ + resolve(process.cwd(), "visa-sponsor-providers"), + resolve(process.cwd(), "../visa-sponsor-providers"), + resolve(moduleDir, "../../../../../../visa-sponsor-providers"), + ]; +} + +const MANIFEST_CANDIDATES = ["manifest.ts", "src/manifest.ts"] as const; + +async function fileExists(path: string): Promise { + try { + await access(path); + return true; + } catch { + return false; + } +} + +async function directoryExists(path: string): Promise { + try { + const info = await stat(path); + return info.isDirectory(); + } catch { + return false; + } +} + +async function resolveProvidersRoot(): Promise { + const candidates = getProvidersRootCandidates(); + + for (const candidate of candidates) { + if (await directoryExists(candidate)) { + logger.info("Resolved visa sponsor providers root", { + selectedRoot: candidate, + candidates, + }); + return candidate; + } + } + + logger.warn( + "No visa sponsor providers root exists; using default candidate", + { + selectedRoot: candidates[0], + candidates, + }, + ); + return candidates[0]; +} + +export async function discoverProviderManifestPaths( + providersRoot?: string, +): Promise { + const root = providersRoot ?? (await resolveProvidersRoot()); + if (basename(root) !== "visa-sponsor-providers") { + logger.warn( + "Visa sponsor providers root rejected due to unexpected basename", + { + root, + }, + ); + return []; + } + + let entries: Dirent[] = []; + try { + entries = await readdir(root, { withFileTypes: true }); + } catch (error) { + const known = error as NodeJS.ErrnoException; + if (known.code === "ENOENT") return []; + logger.warn("Failed to read visa sponsor providers root", { + root, + error: sanitizeUnknown(error), + }); + throw error; + } + + const paths: string[] = []; + for (const entry of entries) { + if (!entry.isDirectory()) continue; + for (const candidate of MANIFEST_CANDIDATES) { + const fullPath = join(root, entry.name, candidate); + if (await fileExists(fullPath)) { + paths.push(fullPath); + break; + } + } + } + + const sortedPaths = paths.sort(); + logger.info("Discovered visa sponsor provider manifest paths", { + root, + manifestCount: sortedPaths.length, + manifestPaths: sortedPaths, + }); + + return sortedPaths; +} + +function isProviderManifest( + value: unknown, +): value is VisaSponsorProviderManifest { + if (!value || typeof value !== "object") return false; + const m = value as Partial; + return ( + typeof m.id === "string" && + typeof m.displayName === "string" && + typeof m.countryKey === "string" && + typeof m.fetchSponsors === "function" + ); +} + +export async function loadProviderManifestFromFile( + path: string, +): Promise { + const fileUrl = pathToFileURL(path).href; + logger.info("Loading visa sponsor provider manifest", { + path, + fileUrl, + }); + + let loaded: unknown; + try { + loaded = await import(fileUrl); + } catch (error) { + logger.warn("Failed to import visa sponsor provider manifest", { + path, + fileUrl, + error: sanitizeUnknown(error), + }); + throw error; + } + + const candidateManifest = (loaded as { manifest?: unknown }).manifest; + const candidateDefault = (loaded as { default?: unknown }).default; + const manifest = isProviderManifest(candidateManifest) + ? candidateManifest + : candidateDefault; + + if (!isProviderManifest(manifest)) { + logger.warn("Visa sponsor provider manifest export shape is invalid", { + path, + fileUrl, + exportedKeys: + loaded && typeof loaded === "object" ? Object.keys(loaded) : [], + }); + throw new Error(`Invalid visa sponsor provider manifest in ${path}`); + } + + logger.info("Loaded visa sponsor provider manifest", { + path, + id: manifest.id, + countryKey: manifest.countryKey, + }); + + return manifest; +} diff --git a/orchestrator/src/server/services/visa-sponsors/providers/registry.ts b/orchestrator/src/server/services/visa-sponsors/providers/registry.ts new file mode 100644 index 0000000..5060feb --- /dev/null +++ b/orchestrator/src/server/services/visa-sponsors/providers/registry.ts @@ -0,0 +1,109 @@ +import { logger } from "@infra/logger"; +import { sanitizeUnknown } from "@infra/sanitize"; +import type { VisaSponsorProviderManifest } from "@shared/types"; +import { + isVisaSponsorProviderId, + VISA_SPONSOR_PROVIDER_IDS, + type VisaSponsorProviderId, +} from "@shared/visa-sponsor-providers"; +import { + discoverProviderManifestPaths, + loadProviderManifestFromFile, +} from "./discovery"; + +export interface VisaSponsorProviderRegistry { + manifests: Map; + manifestByCountryKey: Map; + availableProviderIds: VisaSponsorProviderId[]; +} + +let registry: VisaSponsorProviderRegistry | null = null; +let initPromise: Promise | null = null; + +export function __resetVisaSponsorRegistryForTests(): void { + registry = null; + initPromise = null; +} + +async function createRegistry(): Promise { + const manifestPaths = await discoverProviderManifestPaths(); + const manifests = new Map< + VisaSponsorProviderId, + VisaSponsorProviderManifest + >(); + const manifestByCountryKey = new Map(); + + for (const path of manifestPaths) { + try { + const manifest = await loadProviderManifestFromFile(path); + + if (manifests.has(manifest.id)) { + logger.warn("Duplicate visa sponsor provider id — skipping", { + id: manifest.id, + path, + }); + continue; + } + + if (!isVisaSponsorProviderId(manifest.id)) { + logger.warn("Visa sponsor provider id not in catalog — skipping", { + id: manifest.id, + path, + knownIds: VISA_SPONSOR_PROVIDER_IDS, + }); + continue; + } + + if (manifestByCountryKey.has(manifest.countryKey)) { + logger.warn( + "Duplicate countryKey in visa sponsor providers — skipping", + { + countryKey: manifest.countryKey, + path, + }, + ); + continue; + } + + manifests.set(manifest.id, manifest); + manifestByCountryKey.set(manifest.countryKey, manifest); + } catch (error) { + logger.warn("Skipping invalid visa sponsor provider manifest", { + path, + error: sanitizeUnknown(error), + }); + } + } + + const availableProviderIds = [...manifests.keys()]; + logger.info("Visa sponsor provider registry initialized", { + count: availableProviderIds.length, + providers: availableProviderIds, + }); + + return { manifests, manifestByCountryKey, availableProviderIds }; +} + +export async function initializeVisaSponsorProviderRegistry(): Promise { + if (registry) return registry; + if (!initPromise) { + initPromise = createRegistry() + .then((created) => { + registry = created; + return created; + }) + .catch((error) => { + logger.error("Failed to initialize visa sponsor provider registry", { + error: sanitizeUnknown(error), + }); + registry = null; + initPromise = null; + throw error; + }); + } + return initPromise; +} + +export async function getVisaSponsorProviderRegistry(): Promise { + return initializeVisaSponsorProviderRegistry(); +} diff --git a/orchestrator/vite.config.ts b/orchestrator/vite.config.ts index 8b63b57..86e0ca8 100644 --- a/orchestrator/vite.config.ts +++ b/orchestrator/vite.config.ts @@ -28,6 +28,9 @@ export default defineConfig({ globals: true, environment: "jsdom", setupFiles: "./src/setupTests.ts", + maxWorkers: 1, + testTimeout: 30_000, + hookTimeout: 30_000, include: [ "src/**/*.test.ts", "src/**/*.test.tsx", diff --git a/shared/src/types/visa-sponsors.ts b/shared/src/types/visa-sponsors.ts index 5fb5300..669d715 100644 --- a/shared/src/types/visa-sponsors.ts +++ b/shared/src/types/visa-sponsors.ts @@ -1,3 +1,5 @@ +import type { VisaSponsorProviderId } from "../visa-sponsor-providers"; + export interface VisaSponsor { organisationName: string; townCity: string; @@ -7,6 +9,8 @@ export interface VisaSponsor { } export interface VisaSponsorSearchResult { + providerId: VisaSponsorProviderId; + countryKey: string; sponsor: VisaSponsor; score: number; matchedName: string; @@ -18,7 +22,9 @@ export interface VisaSponsorSearchResponse { total: number; } -export interface VisaSponsorStatusResponse { +export interface VisaSponsorProviderStatus { + providerId: VisaSponsorProviderId; + countryKey: string; lastUpdated: string | null; csvPath: string | null; totalSponsors: number; @@ -26,3 +32,25 @@ export interface VisaSponsorStatusResponse { nextScheduledUpdate: string | null; error: string | null; } + +export interface VisaSponsorStatusResponse { + providers: VisaSponsorProviderStatus[]; +} + +/** + * Implemented by each country-specific visa sponsor provider. + * Providers only own what is country-specific: HTTP fetching and parsing. + * Storage, scheduling, caching, and search are handled by the service layer. + */ +export interface VisaSponsorProviderManifest { + /** Unique slug, must be in VISA_SPONSOR_PROVIDER_IDS catalog. e.g. "uk", "au" */ + id: VisaSponsorProviderId; + /** Human-readable display name. e.g. "United Kingdom" */ + displayName: string; + /** normalizeCountryKey()-compatible string. e.g. "united kingdom", "australia" */ + countryKey: string; + /** UTC hour (0-23) for daily scheduled refresh. Defaults to 2. */ + scheduledUpdateHour?: number; + /** Fetch and return the full sponsor list. Throws on failure. */ + fetchSponsors(): Promise; +} diff --git a/shared/src/visa-sponsor-providers/index.ts b/shared/src/visa-sponsor-providers/index.ts new file mode 100644 index 0000000..d4568ae --- /dev/null +++ b/shared/src/visa-sponsor-providers/index.ts @@ -0,0 +1,24 @@ +export const VISA_SPONSOR_PROVIDER_IDS = ["uk"] as const; + +export type VisaSponsorProviderId = (typeof VISA_SPONSOR_PROVIDER_IDS)[number]; + +export interface VisaSponsorProviderMetadata { + label: string; + countryKey: string; +} + +export const VISA_SPONSOR_PROVIDER_METADATA: Record< + VisaSponsorProviderId, + VisaSponsorProviderMetadata +> = { + uk: { + label: "United Kingdom", + countryKey: "united kingdom", + }, +}; + +export function isVisaSponsorProviderId( + value: string, +): value is VisaSponsorProviderId { + return (VISA_SPONSOR_PROVIDER_IDS as readonly string[]).includes(value); +} diff --git a/shared/src/visa-sponsors/csv.test.ts b/shared/src/visa-sponsors/csv.test.ts new file mode 100644 index 0000000..e86b0a4 --- /dev/null +++ b/shared/src/visa-sponsors/csv.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from "vitest"; +import { parseVisaSponsorsCsv } from "./csv"; + +describe("parseVisaSponsorsCsv", () => { + it("parses CRLF files and strips a UTF-8 BOM", () => { + const csv = [ + "\uFEFFOrganisation Name,Town/City,County,Type & Rating,Route", + '"Acme Ltd","London","Greater London","Worker","Skilled Worker"', + '"Beta Corp","Manchester","Greater Manchester","Temporary","Graduate"\r', + ].join("\r\n"); + + expect(parseVisaSponsorsCsv(csv)).toEqual([ + { + organisationName: "Acme Ltd", + townCity: "London", + county: "Greater London", + typeRating: "Worker", + route: "Skilled Worker", + }, + { + organisationName: "Beta Corp", + townCity: "Manchester", + county: "Greater Manchester", + typeRating: "Temporary", + route: "Graduate", + }, + ]); + }); +}); diff --git a/shared/src/visa-sponsors/csv.ts b/shared/src/visa-sponsors/csv.ts new file mode 100644 index 0000000..6f001db --- /dev/null +++ b/shared/src/visa-sponsors/csv.ts @@ -0,0 +1,54 @@ +import type { VisaSponsor } from "../types/visa-sponsors"; + +function parseCsvLine(line: string): string[] { + const fields: string[] = []; + let current = ""; + let inQuotes = false; + + for (let i = 0; i < line.length; i++) { + const char = line[i]; + const nextChar = line[i + 1]; + + if (char === '"' && !inQuotes) { + inQuotes = true; + } else if (char === '"' && inQuotes) { + if (nextChar === '"') { + current += '"'; + i++; + } else { + inQuotes = false; + } + } else if (char === "," && !inQuotes) { + fields.push(current.trim()); + current = ""; + } else { + current += char; + } + } + + fields.push(current.trim()); + return fields; +} + +export function parseVisaSponsorsCsv(content: string): VisaSponsor[] { + const lines = content.replace(/^\uFEFF/, "").split(/\r?\n/); + const sponsors: VisaSponsor[] = []; + + for (let i = 1; i < lines.length; i++) { + const line = lines[i].trim(); + if (!line) continue; + + const fields = parseCsvLine(line); + if (fields.length >= 5) { + sponsors.push({ + organisationName: fields[0] || "", + townCity: fields[1] || "", + county: fields[2] || "", + typeRating: fields[3] || "", + route: fields[4] || "", + }); + } + } + + return sponsors; +} diff --git a/visa-sponsor-providers/tsconfig.json b/visa-sponsor-providers/tsconfig.json new file mode 100644 index 0000000..5515a22 --- /dev/null +++ b/visa-sponsor-providers/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "strict": true, + "skipLibCheck": true, + "baseUrl": ".", + "paths": { + "@shared/*": ["../shared/src/*"] + } + }, + "include": ["**/*.ts"] +} diff --git a/visa-sponsor-providers/uk/manifest.ts b/visa-sponsor-providers/uk/manifest.ts new file mode 100644 index 0000000..cfe5885 --- /dev/null +++ b/visa-sponsor-providers/uk/manifest.ts @@ -0,0 +1,57 @@ +import type { + VisaSponsor, + VisaSponsorProviderManifest, +} from "@shared/types/visa-sponsors"; +import { parseVisaSponsorsCsv } from "@shared/visa-sponsors/csv"; + +const GOV_UK_PAGE_URL = + "https://www.gov.uk/government/publications/register-of-licensed-sponsors-workers"; + +const CSV_LINK_PATTERN = + /href="(https:\/\/assets\.publishing\.service\.gov\.uk\/media\/[^"]+Worker_and_Temporary_Worker\.csv)"/; + +async function extractCsvUrl(): Promise { + const response = await fetch(GOV_UK_PAGE_URL); + if (!response.ok) { + throw new Error( + `Failed to fetch gov.uk page: ${response.status} ${response.statusText}`, + ); + } + + const html = await response.text(); + const match = html.match(CSV_LINK_PATTERN); + if (!match) { + throw new Error( + "Could not find Worker and Temporary Worker CSV link on gov.uk page", + ); + } + + return match[1]; +} + +export const manifest: VisaSponsorProviderManifest = { + id: "uk", + displayName: "United Kingdom", + countryKey: "united kingdom", + scheduledUpdateHour: 2, + + async fetchSponsors(): Promise { + const csvUrl = await extractCsvUrl(); + const response = await fetch(csvUrl); + if (!response.ok) { + throw new Error( + `Failed to download UK sponsor CSV: ${response.status} ${response.statusText}`, + ); + } + + const content = await response.text(); + const sponsors = parseVisaSponsorsCsv(content); + if (sponsors.length === 0) { + throw new Error("UK sponsor CSV appears empty or invalid"); + } + + return sponsors; + }, +}; + +export default manifest;