Reducing startup time/cost

This commit is contained in:
2025-10-08 23:21:22 -04:00
parent 51b9932389
commit c978b1a255
3 changed files with 47 additions and 14 deletions

15
llm.py
View File

@@ -16,7 +16,7 @@ class LLMClient:
self.ollama_client = Client(
host=os.getenv("OLLAMA_URL", "http://localhost:11434")
)
client.chat(
self.ollama_client.chat(
model="gemma3:4b", messages=[{"role": "system", "content": "test"}]
)
self.PROVIDER = "ollama"
@@ -35,9 +35,16 @@ class LLMClient:
if self.PROVIDER == "ollama":
response = self.ollama_client.chat(
model="gemma3:4b",
prompt=prompt,
messages=[
{
"role": "system",
"content": system_prompt,
},
{"role": "user", "content": prompt},
],
)
output = response["response"]
print(response)
output = response.message.content
elif self.PROVIDER == "openai":
response = self.openai_client.responses.create(
model="gpt-4o-mini",
@@ -51,6 +58,8 @@ class LLMClient:
)
output = response.output_text
return output
if __name__ == "__main__":
client = Client()