import os import unittest class TestProviderSelection(unittest.TestCase): def setUp(self): self._old_env = dict(os.environ) def tearDown(self): os.environ.clear() os.environ.update(self._old_env) def test_always_returns_openai_compat(self): """Provider is always 'openai_compat' now (OpenRouter removed).""" from backend.llm_client import _get_provider_name # Should always return openai_compat regardless of env vars self.assertEqual(_get_provider_name(), "openai_compat") # Test with different env var combinations os.environ["OPENAI_COMPAT_BASE_URL"] = "http://gpu:8000" self.assertEqual(_get_provider_name(), "openai_compat") os.environ.pop("OPENAI_COMPAT_BASE_URL", None) self.assertEqual(_get_provider_name(), "openai_compat") class TestParallelConcurrency(unittest.IsolatedAsyncioTestCase): async def test_query_models_parallel_respects_llm_max_concurrency(self): import asyncio import backend.llm_client as lc old_env = dict(os.environ) old_query_model = lc.query_model in_flight = 0 max_in_flight = 0 lock = asyncio.Lock() async def fake_query_model(model, messages, timeout=120.0, max_tokens_override=None): nonlocal in_flight, max_in_flight async with lock: in_flight += 1 max_in_flight = max(max_in_flight, in_flight) # ensure overlap is possible without the semaphore await asyncio.sleep(0.02) async with lock: in_flight -= 1 return {"content": model} try: os.environ["LLM_MAX_CONCURRENCY"] = "1" lc.query_model = fake_query_model models = ["m1", "m2", "m3"] out = await lc.query_models_parallel(models, [{"role": "user", "content": "hi"}]) self.assertEqual(set(out.keys()), set(models)) self.assertEqual(max_in_flight, 1) finally: lc.query_model = old_query_model os.environ.clear() os.environ.update(old_env)