
    *itL                       % S SK Jr  S SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SKJ
r
JrJrJr  S SKrS SKrS SKJr  S SKJrJr  S SKJrJrJrJrJrJrJr  S S	KJr  S S
KJr  S SK J!r!  S SK"J#r#  S SK$J%r%  S SK&J'r'  S SK(J)r)  S SK*J+r+J,r,J-r-J.r.  S SK/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8  S SK9J:r:  \" \:S9r;Sq<S\=S'   SsS jr>S r?Sq@S\=S'   \" 5       rA\" 5       rBStS jrCS rDSqES\=S'   SuS jrF   Sv     SwS jjrG        SxS  jrH        SyS! jrI      SzS" jrJ\!" S#S$9rK\" \?5      \" \K5      4   S{S% jjrLS&rM\;R                  S'S(\" \L5      /\\R                  \P4   S)S*S+S,S-0/S.S/.0S+S0S1S2S3.0S4.S5.0\M/S69\;R                  S7S#\" \L5      /\M/S89      S|S9 j5       5       rQ\;R                  S:S;\" \L5      /\M/S<9\" \D5      4   S}S= jj5       rR\;R                  S>S?\" \L5      /\\R                  \P4   S)S*S+S,S@0/S.S/.0S+S0SASBS3.0S4.S5.0\M/S69\" SCSDSESFSG.SHSISG./SJ.SK.SLSDSESFSG.SHSMSG./SNSO0SP.SK.SQSDSESFSG.SHSRSG./SSSTSUSVSNS00SNSW0SX.SYSZ/S[.S\.S]./SSSYST0S].S^.SK.S_SDSESFSG.SHSISG./S`SaSb.SK.Sc.Sd94     S~Se jj5       rT\;R                  SfSg\" \L5      /\M/S<9\" \D5      4   SSh jj5       rVSirW\;R                  SjSk\" \L5      /\W/S<9\" \D5      4     SSl jj5       rX\;R                  SmSn\" \L5      /\W/S<9\" \D5      4     SSo jj5       rY\;R                  SpSq\" \L5      /\W/S<9\" \D5      4     SSr jj5       rZg)    )annotationsN)Lock)partial)ListOptionalUnionDict)MemoryObjectSendStream)run_in_threadpooliterate_in_threadpool)DependsFastAPI	APIRouterRequestHTTPExceptionstatusBody)
Middleware)CORSMiddleware)
HTTPBearer)EventSourceResponse)RequestIdPlugin)RawContextMiddleware)
LlamaProxy)ConfigFileSettingsSettingsModelSettingsServerSettings)	CreateCompletionRequestCreateEmbeddingRequestCreateChatCompletionRequest	ModelListTokenizeInputRequestTokenizeInputResponseTokenizeInputCountResponseDetokenizeInputRequestDetokenizeInputResponse)RouteErrorHandler)route_classzOptional[ServerSettings]_server_settingsc                    U q g Nr*   )server_settingss    R/var/www/html/ai-backend/venv/lib/python3.13/site-packages/llama_cpp/server/app.pyset_server_settingsr0   5   s    &    c               #     #    [         v   g 7fr,   r-    r1   r/   get_server_settingsr4   :   s     
s   
zOptional[LlamaProxy]_llama_proxyc                    [        U S9qg )N)models)r   r5   model_settingss    r/   set_llama_proxyr:   D   s    ^4Lr1   c                  #    [         R                  5       I S h  vN   Sn  [        R                  5       I S h  vN    [         R                  5         Sn [        7v   [        R                  5          U (       a  [         R                  5         g g  Nu NX! [        R                  5         f = f! U (       a  [         R                  5         f f = f7f)NTF)llama_outer_lockacquirellama_inner_lockreleaser5   )release_outer_locks    r/   get_llama_proxyrA   I   s      
"
"
$$$
'&&(((	'$$&!&$$&$$&  % 	) $$&$$& sM   CBCB- BB- B B- 3CB- B**B- -CCz+typing.Optional[typing.Callable[[], bytes]]_ping_message_factoryc                    U q g r,   )rB   )factorys    r/   set_ping_message_factoryrE   _   s    #r1   c                :   [         R                  R                  SS 5      nUb  [         R                  R	                  U5      (       d  [        SU S35      e[        US5       nUR                  S5      (       d  UR                  S5      (       a>  SS Kn[        R                  " [        R                  " UR                  U5      5      5      nO$[        R                  " UR                  5       5      n[        R                   " U5      nUR"                  nS S S 5        Uc=  Uc:  U c
  [%        5       n [        R                   " U 5      n[&        R                   " U 5      /nUb  Uc   S5       e[)        U5        [+        [,        [/        5       4S	9/n[1        US
[2        R4                  UR6                  S9nUR9                  [:        S/SS/S/S9  UR=                  [>        5        Uc   e[A        US9  URB                  (       a  [E        S 5        U$ ! , (       d  f       N= f)NCONFIG_FILEzConfig file z not found!rbz.yamlz.ymlr   z<server_settings and model_settings must be provided together)pluginsu   🦙 llama.cpp Python API)
middlewaretitleversion	root_path*T)allow_originsallow_credentialsallow_methodsallow_headersr8   c                     [        5       $ r,   )bytesr3   r1   r/   <lambda>create_app.<locals>.<lambda>   s    r1   )#osenvirongetpathexists
ValueErroropenendswithyamlr   model_validate_jsonjsondumps	safe_loadreadr   model_validater7   r   r   r0   r   r   r   r   	llama_cpp__version__rM   add_middlewarer   include_routerrouterr:   disable_ping_eventsrE   )	settingsr.   r9   config_filefr_   config_file_settingsrJ   apps	            r/   
create_apprq   d   s   
 **..5Kww~~k**|K=DEE+t$##G,,0D0DV0L0L'9'M'MJJt~~a01($ (:'M'Maffh'W$,;;<PQO188N % >#9zH(77A'66x@A 	#(BFEFB (1O<M;OPQJ
)%%!++	C eee   v%%%>2** 1J] %$s   #B1H
Hc                (   Uc  [        [        R                  SS9eU" U5      nU R                  b4  U R                  S:X  a  [        X@R                  5      OU R                  US'   U R                  b,  [        R                  R                  U R                  5      US'   U R                  S:  ad  [        R                  " [        R                  " U R                  UR                  5       5      /5      nSU;  a  XSS'   U$ US   R                  U5        U$ )NzService is not availablestatus_codedetailtokens
logit_biasgrammarr   logits_processor)r   r   HTTP_503_SERVICE_UNAVAILABLErw   logit_bias_type_logit_bias_tokens_to_input_idsrx   rf   LlamaGrammarfrom_string
min_tokensLogitsProcessorListMinTokensLogitsProcessor	token_eosextend)bodyllama_proxy
body_modelkwargsllama_min_tokens_logits_processors         r/   prepare_request_resourcesr      s    ;;-
 	
 
#E" ##x/ ,E??C 	| ||%22>>t||Ly'0'D'D//ARST(
$ V+)E%& L %&--.JKLr1   c           
       #    [        [        5       5      nU(       a  UR                  OSn[        R                  " [
        5      " 5        IS h  vN n[        X(X55      n	U IS h  vN    [        XI40 UD6I S h  vN n
[        U
5        S h  vN nUR                  [        [        R                  " U5      S95      I S h  vN   U R                  5       I S h  vN (       a  [        R                  " 5       " 5       eU(       d  M}  [         R#                  5       (       d  M  UR                  [        SS95      I S h  vN   [        R                  " 5       " 5       e GN N N N N N N+
 UR                  [        SS95      I S h  vN    Oq! [        R                  " 5        aU  n[%        S5        [        R&                  " SSS9   [%        SU R(                   35        Ue! , (       d  f        S nAO= fS nAff = fS S S 5      IS h  vN    O! , IS h  vN  (       d  f       O= fS S S 5      IS h  vN    g ! , IS h  vN  (       d  f       g = f7f)	NF)dataz[DONE]disconnected   T)shieldz-Disconnected from client (via refresh/close) )nextr4   interrupt_requests
contextlibasynccontextmanagerrA   r   r   r   senddictra   rb   is_disconnectedanyioget_cancelled_exc_classr<   lockedprintmove_on_afterclient)requestinner_send_chanr   r   
llama_callr   r.   r   r   r   iteratorchunkes                r/   get_event_publisherr      s     .01O.=**5  --o>@@K)$ZP"?!2:!O!OO#8#B @%)..tE9J/KLLL$44666#;;=??)).>.E.E.G.G-224X3FGGG#;;=?? A"O@L6 H $C &**4X+>???002 n%((48GGWX G	 988 #????? A@@@@@sP  AIEIH=%E&H=)H+F:E!;F
E+E#
E+0FE%
FE'
(FF"F=E)
>FIH=!F#E+%F'F)F+FF
FHG<' G7G"	"
G4,G7/H4G77G<<H?H=
HH=H(	HH(	$H=+I6H97I=IIIIc                    0 nUR                  5        H;  u  p4UR                  S5      nU R                  USSS9 H  nXB[        U5      '   M     M=     U$ )Nutf-8FT)add_bosspecial)itemsencodetokenizestr)r   rw   to_biastokenscoreinput_ids         r/   r|   r|      sY     !#G"((*W%ueTJH%*CM" K + Nr1   F)
auto_errorc                   #    U R                   c  gU(       a&  UR                  U R                   :X  a  UR                  $ [        [        R                  SS9e7f)NTzInvalid API keyrs   )api_keycredentialsr   r   HTTP_401_UNAUTHORIZED)rl   authorizations     r/   authenticater      sU     
  22h6F6FF((( 00  s   AAz	OpenAI V1z/v1/completions
Completion200zSuccessful Responseschemaz$refz-#/components/schemas/CreateCompletionResponsez&Completion response, when stream=False)anyOfrK   stringzServer Side Streaming response, when stream=True. See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_formatzQdata: {... see CreateCompletionResponse ...} \n\n data: ... \n\n ... data: [DONE])typerK   example)zapplication/jsonztext/event-stream)descriptioncontent)summarydependenciesresponse_model	responsestags%/v1/engines/copilot-codex/completions)include_in_schemar   r   c                  #    [        UR                  [        5      (       aJ  [        UR                  5      S::  d   e[        UR                  5      S:  a  UR                  S   OSUl        U R                  R
                  S:w  a  UR                  OSn1 SknUR                  US9nUR                  SS	5      (       aQ  [        R                  " S
5      u  pV[        U[        [        U UUU[        R                  R                   US9S["        S9$ [$        R&                  " [(        5      " 5        IS h  vN n[+        XX$5      nU R-                  5       I S h  vN (       a0  [/        SU R0                   35        [3        [4        R6                  SS9e[9        U40 UD6I S h  vN sS S S 5      IS h  vN   $  N N` N N! , IS h  vN  (       d  f       g = f7f)Nr   r    r   zcopilot-codex>   nuserbest_ofr   r{   excludestreamF
   r   r   r   r   r   r   
data_sender_callablesepping_message_factory@Disconnected from client (via refresh/close) before llm invoked Client closed requestrs   )
isinstancepromptlistlenurlrZ   model
model_dumprY   r   create_memory_object_streamr   r   r   rf   Llama__call__rB   r   r   rA   r   r   r   r   r   r   HTTP_400_BAD_REQUESTr   	r   r   r   r   r   	send_chan	recv_chanr   r   s	            r/   create_completionr     s    V $++t$$4;;1$$$(+DKK(81(<dkk!n" ;;FF 	

 G __W_-F zz(E""$@@D	"!(# )%$??33" !6
 	
  --o>@@K)$ZP((***RSZSaSaRbc  "77. 
 'u777 A@@ + 8 A@@@sm   D0G2F53G6 F=F7AF=F9 F=#G/F;0G7F=9F=;G=GGGGz/v1/embeddings	Embedding)r   r   r   c                   #    [        U" U R                  5      R                  40 U R                  S1S9D6I S h  vN $  N7f)Nr   r   )r   r   create_embeddingr   )r   r   s     r/   r   r   h  sH      #GMM"33


fX

.   s   8A?Az/v1/chat/completionsChatz1#/components/schemas/CreateChatCompletionResponsezServer Side Streaming response, when stream=TrueSee SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_formatzUdata: {... see CreateChatCompletionResponse ...} \n\n data: ... \n\n ... data: [DONE]zChat Completionzgpt-3.5-turbosystemzYou are a helpful assistant.)roler   r   zWhat is the capital of France?)r   messages)r   valuez	JSON Modez Who won the world series in 2020r   json_object)r   r   response_formatzTool CallingzExtract Jason is 30 years old.functionUserzUser recordobjectnumber)nameager   r   )r   
propertiesrequired)r   r   
parameters)r   r   )r   r   toolstool_choiceLogprobsTr   )r   r   logprobstop_logprobs)normal	json_modetool_callingr   )openapi_examplesc                  #    UR                   n1 SknUR                  US9nUR                  SS5      (       aQ  [        R                  " S5      u  pV[        U[        [        U UUU[        R                  R                  US9S[        S9$ [        R                  " [        5      " 5        IS h  vN n[        XX$5      nU R!                  5       I S h  vN (       a0  [#        S	U R$                   35        ['        [(        R*                  S
S9e[-        UR                  40 UD6I S h  vN sS S S 5      IS h  vN   $  N Nj N N! , IS h  vN  (       d  f       g = f7f)N>   r   r   r   r{   r   r   Fr   r   r   r   r   r   rs   )r   r   rY   r   r   r   r   r   rf   r   create_chat_completionrB   r   r   rA   r   r   r   r   r   r   r   r   r   s	            r/   r  r  x  s8    \ JG __W_-F zz(E""$@@D	"!(# )%$??AA" !6
 	
  --o>@@K)$ZP((***RSZSaSaRbc  "77. 
 'u'C'CNvNN A@@ + O A@@@sm   B+E-D:.E1 ED<AE$D>%E(E4E 5E<E>E EEE	EEz
/v1/modelsModelsc           	     J   #    SU  Vs/ s H
  nUSS/ S.PM     snS.$ s  snf 7f)Nr   r   me)idr   owned_bypermissions)r   r   r3   )r   model_aliass     r/   
get_modelsr    sH        +
  + "! !	  +
 
s   #
#Extrasz/extras/tokenizeTokenizec                   #    U" U R                   5      R                  U R                  R                  S5      SS9n[	        US9$ 7f)Nr   Tr   )rv   )r   r   inputr   r$   r   r   rv   s      r/   r   r   +  s@      $--djj.?.?.HRV-WF //s   AAz/extras/tokenize/countzTokenize Countc                   #    U" U R                   5      R                  U R                  R                  S5      SS9n[	        [        U5      S9$ 7f)Nr   Tr  )count)r   r   r  r   r%   r   r  s      r/   count_query_tokensr  :  sD      $--djj.?.?.HRV-WF%CK88s   AAz/extras/detokenize
Detokenizec                   #    U" U R                   5      R                  U R                  5      R                  S5      n[	        US9$ 7f)Nr   )text)r   
detokenizerv   decoder'   )r   r   r  s      r/   r  r  I  s:      tzz"--dkk:AA'JD"--s   AA)r.   r   )r9   zList[ModelSettings])rD   ztyping.Callable[[], bytes])NNN)rl   zSettings | Noner.   zServerSettings | Noner9   zList[ModelSettings] | None)r   5CreateCompletionRequest | CreateChatCompletionRequestr   r   r   
str | Nonereturnllama_cpp.Llama)r   r   r   z"MemoryObjectSendStream[typing.Any]r   r  r   r  )r   r  rw   Dict[str, float]r  r  )rl   r   r   zOptional[str])r   r   r   r   r  zllama_cpp.Completion)r   r    r   r   )r   r   r   r!   r  zllama_cpp.ChatCompletion)r   r   r  r"   )r   r#   r   r   r  r$   )r   r#   r   r   r  r%   )r   r&   r   r   r  r'   )[
__future__r   rW   ra   typingr   r   r   	functoolsr   r   r   r   r	   rf   anyio.streams.memoryr
   starlette.concurrencyr   r   fastapir   r   r   r   r   r   r   fastapi.middlewarer   fastapi.middleware.corsr   fastapi.securityr   sse_starlette.sser   starlette_context.pluginsr   starlette_context.middlewarer   llama_cpp.server.modelr   llama_cpp.server.settingsr   r   r   r   llama_cpp.server.typesr   r    r!   r"   r#   r$   r%   r&   r'   llama_cpp.server.errorsr(   rj   r*   __annotations__r0   r4   r5   r<   r>   r:   rA   rB   rE   rq   r   r   r|   bearer_schemer   openai_v1_tagpostCreateCompletionResponser   r   r   ChatCompletionr  rY   r  
extras_tagr   r  r  r3   r1   r/   <module>r7     sD   " 	      . .   7 J U U U ) 2 ' 1 5 = 
 
 
 6 
0	1-1 * 1'
 &*" )6 6 5
'& FJ B I$ !%-11577*7 /7t
? 
 B7 @ 	D		 	 	 e, !!45#*=#9 (  ,'(**	
 	0 #%TU" "J	%  ("]#~	&
. ?   B +,'(
	  6868
!68 68C N68r ,'(
	   &o6# ,'(113670  !'(["
 "J	%  ("] $C	&
2 =  D )- -,!)6TU!'4TU!	 ',!)6TU!'4VW! )/'>
 *,!)6TU!'4TU! %/(./<,4170B06/A3& 28/")$ !+"F%$/ D &,!)6TU!'4TU! !%$&oC
E)vOvO
%vOP QvOA@vOr ,'(
	   &o6" 
 ,'(
	   &o60
00 00 ,'(
	   &o69
99  99 ,'(
	   &o6.
 .. ..r1   