llm_council/backend/tests/test_llm_client.py

import os
import unittest


class TestProviderSelection(unittest.TestCase):
    def setUp(self):
        self._old_env = dict(os.environ)

    def tearDown(self):
        os.environ.clear()
        os.environ.update(self._old_env)

    def test_always_returns_openai_compat(self):
        """Provider is always 'openai_compat' now (OpenRouter removed)."""
        from backend.llm_client import _get_provider_name

        # Should always return openai_compat regardless of env vars
        self.assertEqual(_get_provider_name(), "openai_compat")

        # Test with different env var combinations
        os.environ["OPENAI_COMPAT_BASE_URL"] = "http://gpu:8000"
        self.assertEqual(_get_provider_name(), "openai_compat")

        os.environ.pop("OPENAI_COMPAT_BASE_URL", None)
        self.assertEqual(_get_provider_name(), "openai_compat")


class TestParallelConcurrency(unittest.IsolatedAsyncioTestCase):
    async def test_query_models_parallel_respects_llm_max_concurrency(self):
        import asyncio
        import backend.llm_client as lc

        old_env = dict(os.environ)
        old_query_model = lc.query_model

        in_flight = 0
        max_in_flight = 0
        lock = asyncio.Lock()

        async def fake_query_model(model, messages, timeout=120.0, max_tokens_override=None):
            nonlocal in_flight, max_in_flight
            async with lock:
                in_flight += 1
                max_in_flight = max(max_in_flight, in_flight)
            # ensure overlap is possible without the semaphore
            await asyncio.sleep(0.02)
            async with lock:
                in_flight -= 1
            return {"content": model}

        try:
            os.environ["LLM_MAX_CONCURRENCY"] = "1"
            lc.query_model = fake_query_model

            models = ["m1", "m2", "m3"]
            out = await lc.query_models_parallel(models, [{"role": "user", "content": "hi"}])

            self.assertEqual(set(out.keys()), set(models))
            self.assertEqual(max_in_flight, 1)
        finally:
            lc.query_model = old_query_model
            os.environ.clear()
            os.environ.update(old_env)