1- # --- Braincell Orchestrator (Middleman Proxy) ---
1+ # --- bitnet Orchestrator (Middleman Proxy) ---
22from pydantic import BaseModel
33
44from fastapi import FastAPI , HTTPException , Query , Depends
1010import time
1111import httpx
1212
13+ from typing import List
14+ from pydantic import BaseModel , Field
15+ from fastapi import HTTPException
16+ import asyncio
17+
1318# --- Server Process Management ---
1419# Each server instance is tracked by a unique (host, port) key
1520server_processes = {}
@@ -40,12 +45,11 @@ def _max_threads():
4045 return os .cpu_count () or 1
4146
4247async def initialize_server_endpoint (
43- model : ModelEnum ,
44- threads : int = Query (os .cpu_count () // 2 , gt = 0 , le = os .cpu_count ()),
48+ threads : int = Query (1 , gt = 0 , le = os .cpu_count ()),
4549 ctx_size : int = Query (2048 , gt = 0 ),
46- port : int = Query (8081 , gt = 1023 ),
50+ port : int = Query (8081 , gt = 8080 , le = 65535 ),
4751 system_prompt : str = Query ("You are a helpful assistant." , description = "Unique system prompt for this server instance" ),
48- n_predict : int = Query (4096 , gt = 0 , description = "Number of tokens to predict for the server instance" ),
52+ n_predict : int = Query (256 , gt = 0 , description = "Number of tokens to predict for the server instance. " ),
4953 temperature : float = Query (0.8 , gt = 0.0 , le = 2.0 , description = "Temperature for sampling" )
5054):
5155 """
@@ -71,7 +75,7 @@ async def initialize_server_endpoint(
7175 raise HTTPException (status_code = 429 , detail = f"Cannot start server: would oversubscribe CPU threads (in use: { threads_in_use } , requested: { threads } , max: { max_threads } )" )
7276 command = [
7377 server_path ,
74- '-m' , model . value ,
78+ '-m' , "models/BitNet-b1.58-2B-4T/ggml- model-i2_s.gguf" ,
7579 '-c' , str (ctx_size ),
7680 '-t' , str (threads ),
7781 '-n' , str (n_predict ),
@@ -96,7 +100,7 @@ async def initialize_server_endpoint(
96100 raise HTTPException (status_code = 500 , detail = f"Server failed to start. Stderr: { stderr_output } " )
97101 server_processes [key ] = proc
98102 server_configs [key ] = {
99- "model" : model . value ,
103+ "model" : "models/BitNet-b1.58-2B-4T/ggml- model-i2_s.gguf" ,
100104 "threads" : threads ,
101105 "ctx_size" : ctx_size ,
102106 "host" : host ,
@@ -241,43 +245,70 @@ def get_model_sizes():
241245
242246class ChatRequest (BaseModel ):
243247 message : str
244- port : int
245- # Optionally add user/session id, etc.
248+ port : int = 8081
249+ threads : int = 1
250+ ctx_size : int = 2048
251+ n_predict : int = 256
252+ temperature : float = 0.8
246253
247- def chat_with_braincell (
254+ def chat_with_bitnet (
248255 chat : ChatRequest
249256):
250257 """
251- Middleman endpoint: receives a chat message and forwards it to the specified braincell (llama server instance) by port.
252- Returns the response from the braincell .
258+ Middleman endpoint: receives a chat message and forwards it to the specified bitnet (llama server instance) by port.
259+ Returns the response from the bitnet .
253260 """
254261 host = "127.0.0.1"
255262 key = (host , chat .port )
256263 proc = server_processes .get (key )
257264 cfg = server_configs .get (key )
258265 if not (proc and proc .poll () is None and cfg ):
259- raise HTTPException (status_code = 503 , detail = f"Braincell server not running on { host } :{ chat .port } . Initialize it first." )
266+ raise HTTPException (status_code = 503 , detail = f"bitnet server not running on { host } :{ chat .port } . Initialize it first." )
260267 server_url = f"http://{ host } :{ chat .port } /completion"
261268 payload = {
262- "prompt" : chat .message
269+ "prompt" : chat .message ,
270+ "threads" : chat .threads ,
271+ "ctx_size" : chat .ctx_size ,
272+ "n_predict" : chat .n_predict ,
273+ "temperature" : chat .temperature
263274 }
264275 async def _chat ():
265276 async with httpx .AsyncClient () as client :
266277 try :
267- response = await client .post (server_url , json = payload , timeout = 120 .0 )
278+ response = await client .post (server_url , json = payload , timeout = 180 .0 )
268279 response .raise_for_status ()
269280 result_data = response .json ()
270281 content = result_data .get ("content" , result_data )
271282 return {"result" : content }
272283 except httpx .TimeoutException :
273- raise HTTPException (status_code = 504 , detail = "Request to braincell server timed out." )
284+ raise HTTPException (status_code = 504 , detail = "Request to bitnet server timed out." )
274285 except httpx .ConnectError :
275- raise HTTPException (status_code = 503 , detail = f"Could not connect to braincell server at { server_url } . Is it running?" )
286+ raise HTTPException (status_code = 503 , detail = f"Could not connect to bitnet server at { server_url } . Is it running?" )
276287 except httpx .RequestError as e :
277- raise HTTPException (status_code = 500 , detail = f"Error during request to braincell server: { str (e )} " )
288+ raise HTTPException (status_code = 500 , detail = f"Error during request to bitnet server: { str (e )} " )
278289 except httpx .HTTPStatusError as e :
279290 error_detail = e .response .text or str (e )
280- raise HTTPException (status_code = e .response .status_code , detail = f"Braincell server returned error: { error_detail } " )
291+ raise HTTPException (status_code = e .response .status_code , detail = f"bitnet server returned error: { error_detail } " )
281292 except Exception as e :
282293 raise HTTPException (status_code = 500 , detail = f"Unexpected error during chat: { str (e )} " )
283294 return _chat
295+
296+ class MultiChatRequest (BaseModel ):
297+ requests : List [ChatRequest ]
298+
299+ async def multichat_with_bitnet (multichat : MultiChatRequest ):
300+ async def run_chat (chat_req : ChatRequest ):
301+ chat_fn = chat_with_bitnet (chat_req )
302+ return await chat_fn ()
303+ results = await asyncio .gather (* (run_chat (req ) for req in multichat .requests ), return_exceptions = True )
304+ # Format results: if exception, return error message
305+ formatted = []
306+ for res in results :
307+ if isinstance (res , Exception ):
308+ if isinstance (res , HTTPException ):
309+ formatted .append ({"error" : res .detail , "status_code" : res .status_code })
310+ else :
311+ formatted .append ({"error" : str (res )})
312+ else :
313+ formatted .append (res )
314+ return {"results" : formatted }
0 commit comments