
    *izs                    \   S SK Jr  S SKrS SKrS SKJrJrJrJrJ	r	J
r
Jr  S SKJrJr  S SKJr  S SKrS SKJr  SSK7  SSKJr  SS	KJr  S SKJr   " S
 S5      r " S S5      r " S S5      r " S S5      rS r \ " S S5      5       r!\ " S S5      5       r" " S S5      r# " S S5      r$g)    )annotationsN)DictListTupleOptionalSequenceCallableUnion)	dataclassfield)	ExitStack   )*)LlamaGrammar)suppress_stdout_stderrc                  f   \ rS rSrSrSS.     S&S jjrS rS rS'S jrS'S	 jr	S'S
 jr
S'S jrS(S jrS)S jrS'S jrS'S jrS*S jrS+S jrS,S jrS-S jrS'S jrS'S jrS'S jrS'S jrS'S jrS'S jrS'S jrS'S jrS'S jrS.S jrS.S jrS/S jrS0S1S  jjr S0S2S! jjr!S3S" jr"\#S# 5       r$S$r%g%)4
LlamaModel   z~Intermediate Python wrapper for a llama.cpp llama_model.
NOTE: For stability it's recommended you use the Llama class instead.Tverbosec               L  ^  UT l         UT l        UT l        [        5       T l        S n[
        R                  R                  U5      (       d  [        SU 35      e[        US9   [        R                  " T R                   R                  S5      T R                  5      nS S S 5        Uc  [        SU 35      e[        R                  " U5      nUc  [        SU 35      eUT l        UT l        S T l        U 4S jnT R                  R#                  U5        g ! , (       d  f       N}= f)NzModel path does not exist: )disableutf-8z Failed to load model from file: z Failed to get vocab from model: c                 p   > T R                   c  g [        R                  " T R                   5        S T l         g N)model	llama_cppllama_model_freeselfs   R/var/www/html/ai-backend/venv/lib/python3.13/site-packages/llama_cpp/_internals.py
free_model'LlamaModel.__init__.<locals>.free_modelE   )    zz!&&tzz2DJ    )
path_modelparamsr   r   _exit_stackospathexists
ValueErrorr   r   llama_model_load_from_fileencodellama_model_get_vocabr   vocabsamplercallback)r    r&   r'   r   r   r0   r"   s   `      r!   __init__LlamaModel.__init__#   s     %$;ww~~j))::,GHH#G488&&w/E 5
 =?
|LMM//6=?
|LMM

	 	!!*-/ 54s   #;D
D#c                   U R                   bY  [        U R                  5       H&  u  p[        R                  " U R                   U5        M(     U R                  R                  5         U R                  R                  5         g r   )r1   reversedcustom_samplersr   llama_sampler_chain_removeclearr(   close)r    i_s      r!   r:   LlamaModel.closeM   s[    <<# !5!5644T\\1E 7  &&( r%   c                $    U R                  5         g r   r:   r   s    r!   __del__LlamaModel.__del__U       

r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_typer0   r   s    r!   
vocab_typeLlamaModel.vocab_typeX       ))$**55r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_n_tokensr0   r   s    r!   n_vocabLlamaModel.n_vocab[       --djj99r%   c                B    [         R                  " U R                  5      $ r   )r   llama_model_n_ctx_trainr   r   s    r!   n_ctx_trainLlamaModel.n_ctx_train^       00<<r%   c                B    [         R                  " U R                  5      $ r   )r   llama_model_n_embdr   r   s    r!   n_embdLlamaModel.n_embda   s    ++DJJ77r%   c                B    [         R                  " U R                  5      $ r   )r   !llama_model_rope_freq_scale_trainr   r   s    r!   rope_freq_scale_train LlamaModel.rope_freq_scale_traind   s    ::4::FFr%   c                    [         R                  " S5      n[        R                  " U R                  US5        UR
                  R                  S5      $ )N   r   )ctypescreate_string_bufferr   llama_model_descr   valuedecode)r    bufs     r!   descLlamaModel.descg   s=    ))$/""4::sD9yy((r%   c                B    [         R                  " U R                  5      $ r   )r   llama_model_sizer   r   s    r!   sizeLlamaModel.sizel   rG   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_model_n_paramsr   r   s    r!   n_paramsLlamaModel.n_paramso   rL   r%   c                    [        S5      e)Nz*get_tensor is not implemented in llama.cppNotImplementedError)r    names     r!   
get_tensorLlamaModel.get_tensorr   s    !"NOOr%   c                b    [         R                  " U R                  U5      R                  S5      $ Nr   )r   llama_vocab_get_textr0   r`   r    tokens     r!   token_get_textLlamaModel.token_get_textw   s$    --djj%@GGPPr%   c                D    [         R                  " U R                  U5      $ r   )r   llama_vocab_get_scorer0   ru   s     r!   token_get_scoreLlamaModel.token_get_scorez   s    ..tzz5AAr%   c                D    [         R                  " U R                  U5      $ r   )r   llama_vocab_get_attrr0   ru   s     r!   token_get_attrLlamaModel.token_get_attr}   s    --djj%@@r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_bosr0   r   s    r!   	token_bosLlamaModel.token_bos       ((44r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_eosr0   r   s    r!   	token_eosLlamaModel.token_eos   r   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_clsr0   r   s    r!   	token_clsLlamaModel.token_cls   r   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_sepr0   r   s    r!   	token_sepLlamaModel.token_sep   r   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_nlr0   r   s    r!   token_nlLlamaModel.token_nl   s    ''

33r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_fim_prer0   r   s    r!   token_prefixLlamaModel.token_prefix       ,,TZZ88r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_fim_midr0   r   s    r!   token_middleLlamaModel.token_middle   r   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_fim_sufr0   r   s    r!   token_suffixLlamaModel.token_suffix   r   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_eotr0   r   s    r!   	token_eotLlamaModel.token_eot   r   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_get_add_bosr0   r   s    r!   add_bos_tokenLlamaModel.add_bos_token   rQ   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_vocab_get_add_eosr0   r   s    r!   add_eos_tokenLlamaModel.add_eos_token   rQ   r%   c           	        U R                  5       n[        R                  U-  " 5       n[        R                  " U R                  U[        U5      XTX#5      nUS:  ag  [        U5      n[        R                  U-  " 5       n[        R                  " U R                  U[        U5      XVX#5      nUS:  a  [        SU SU 35      e[        US U 5      $ )Nr   zFailed to tokenize: text="z" n_tokens=)	rO   r   llama_tokenllama_tokenizer0   lenabsRuntimeErrorlist)r    textadd_bosspecialn_ctxtokensn_tokenss          r!   tokenizeLlamaModel.tokenize   s      "''%/2++JJc$i
 a<8}H++h69F //

D#d)VwH !|"0k(L  F9H%&&r%   c                    [         R                  " S5      n[        R                  " U R                  XSSU5        [        U5      $ )N    r   )r\   r]   r   llama_token_to_piecer0   bytes)r    rv   r   ra   s       r!   token_to_pieceLlamaModel.token_to_piece   s6    ))"-&&tzz5r1gNSzr%   c           	     X   SnSn[         R                  U-  " 5       nU HS  n[        R                  " U R                  [        R
                  " U5      XTSU5      nXt::  d   eU[        US U 5      -  nMU     [        U5      S:  a%  US   U R                  5       :X  a  USS S:X  a  USS  $ U$ )Nr%   r   r   r       )	r\   c_charr   r   r0   r   r   r   r   )r    r   r   outputrf   bufferrv   ns           r!   
detokenizeLlamaModel.detokenize   s    --$&)E..

I11%8&7A 99eF2AJ''F  6{Q6!90@#@VAa[TXEX 12J	
 	
r%   c                   0 nSn[         R                  " U5      nSU-  Ul        [        [        R
                  " U R                  5      5       GH  n[        R                  " U R                  XCU5      nXR:  a=  US-   n[         R                  " U5      n[        R                  " U R                  XCU5      nUR                  R                  S5      n[        R                  " U R                  XCU5      nXR:  a=  US-   n[         R                  " U5      n[        R                  " U R                  XCU5      nUR                  R                  S5      nXqU'   GM	     U$ )Nr[       r   r   )
r\   r]   r_   ranger   llama_model_meta_countr   llama_model_meta_key_by_indexr`   !llama_model_meta_val_str_by_index)r    metadatabuffer_sizer   r;   nbyteskeyr_   s           r!   r   LlamaModel.metadata   s/   #%,,[9{*y77

CDA<<

A{F #$qj44[A"@@JJ; ,,%%g.C@@

A{F #$qj44[A"DDJJ; LL''0E!SM+ E, r%   c                 ,    [         R                  " 5       $ )z#Get the default llama_model_params.)r   llama_model_default_params r%   r!   default_paramsLlamaModel.default_params   s     3355r%   )r(   r   r'   r&   r1   r   r0   N)r&   strr'   zllama_cpp.llama_model_paramsr   boolreturnint)r   float)r   r   )ro   r   r   zctypes.c_void_p)rv   r   r   r   )rv   r   r   r   )rv   r   r   r   )r   r   )r   r   r   r   r   r   )F)rv   r   r   r   r   r   )r   	List[int]r   r   r   r   )r   zDict[str, str])&__name__
__module____qualname____firstlineno____doc__r3   r:   r@   rE   rJ   rO   rT   rX   rb   rf   rj   rp   rw   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   staticmethodr   __static_attributes__r   r%   r!   r   r      s    M (. (. -	(.
 (.T!6:=8G)
6:P
QBA
555549995==
'$

&> 6 6r%   r   c                     \ rS rSrSrSS.     S+S jjrS rS rS,S jrS,S	 jr	S
 r
S-S jrS.S jrS/S jrS0S jrS,S jrS1S jrS1S jrS2S jrS rS3S jrS rS3S jrS/S jrS4S jr            S5S jrS6S jrS7S jrS8S jrS8S jr      S8S jrS9S jrS:S  jr             S;S! jr!          S<S" jr"S=S# jr#S=S$ jr$S>S% jr%S& r&S' r'\(S( 5       r)S)r*g*)?LlamaContext   zIntermediate Python wrapper for a llama.cpp llama_context.
NOTE: For stability it's recommended you use the Llama class instead.Tr   c               |  ^  UT l         UT l        UT l        [        5       T l        [
        R                  " T R                   R                   T R                  5      nUc  [        S5      eUT l        [
        R                  " T R                  5      T l
        S T l        U 4S jnT R                  R                  U5        g )NzFailed to create llama_contextc                 p   > T R                   c  g [        R                  " T R                   5        S T l         g r   )ctxr   
llama_freer   s   r!   free_ctx'LlamaContext.__init__.<locals>.free_ctx  s)    xx  *DHr%   )r   r'   r   r   r(   r   llama_init_from_modelr,   r   llama_get_memorymemoryr1   r2   )r    r   r'   r   r   r   s   `     r!   r3   LlamaContext.__init__   s     
$;--djj.>.>L;=>>00:	 	!!(+r%   c                8    U R                   R                  5         g r   r(   r:   r   s    r!   r:   LlamaContext.close       r%   c                $    U R                  5         g r   r?   r   s    r!   r@   LlamaContext.__del__  rB   r%   c                B    [         R                  " U R                  5      $ r   )r   llama_n_ctxr   r   s    r!   r   LlamaContext.n_ctx  s    $$TXX..r%   c                B    [         R                  " U R                  5      $ r   )r   llama_pooling_typer   r   s    r!   pooling_typeLlamaContext.pooling_type  s    ++DHH55r%   c                n    U R                   c   S5       e[        R                  " U R                   S5        g )NMemory is not initializedT)r   r   llama_memory_clearr   s    r!   kv_cache_clearLlamaContext.kv_cache_clear!  s,    {{&C(CC&$$T[[$7r%   c                    U R                   c   S5       eUS:  a  UOSn[        R                  " U R                   XU5        g )Nr  r   )r   r   llama_memory_seq_rm)r    seq_idp0p1s       r!   kv_cache_seq_rmLlamaContext.kv_cache_seq_rm%  s;    {{&C(CC&!Q;A%%dkk6rBr%   c                p    U R                   c   S5       e[        R                  " U R                   XX45        g Nr  )r   r   llama_memory_seq_cp)r    
seq_id_src
seq_id_dstr  r  s        r!   kv_cache_seq_cpLlamaContext.kv_cache_seq_cp*  s.    {{&C(CC&%%dkk:2Rr%   c                n    U R                   c   S5       e[        R                  " U R                   U5        g r  )r   r   llama_memory_seq_keepr    r  s     r!   kv_cache_seq_keepLlamaContext.kv_cache_seq_keep.  s,    {{&C(CC&''V<r%   c                p    U R                   c   S5       e[        R                  " U R                   XX45        g r  )r   r   llama_memory_seq_add)r    r  r  r  shifts        r!   kv_cache_seq_shiftLlamaContext.kv_cache_seq_shift2  s.    {{&C(CC&&&t{{FJr%   c                B    [         R                  " U R                  5      $ r   )r   llama_state_get_sizer   r   s    r!   get_state_sizeLlamaContext.get_state_size6      --dhh77r%   c                    [         R                  " U R                  UR                  5      nUS:w  a  [	        SU 35      eg )Nr   zllama_decode returned )r   llama_decoder   batchr   r    r(  return_codes      r!   r`   LlamaContext.decodeA  B    ,,HHKK
 !!7}EFF r%   c                    [         R                  " U R                  UR                  5      nUS:w  a  [	        SU 35      eg )Nr   zllama_encode returned )r   llama_encoder   r(  r   r)  s      r!   r.   LlamaContext.encodeI  r,  r%   c                F    [         R                  " U R                  X5        g r   )r   llama_set_n_threadsr   )r    	n_threadsn_threads_batchs      r!   set_n_threadsLlamaContext.set_n_threadsQ  s    %%dhh	Kr%   c                B    [         R                  " U R                  5      $ r   )r   llama_get_logitsr   r   s    r!   
get_logitsLlamaContext.get_logitsT  s    ))$((33r%   c                D    [         R                  " U R                  U5      $ r   )r   llama_get_logits_ithr   r    r;   s     r!   get_logits_ithLlamaContext.get_logits_ithW  s    --dhh::r%   c                B    [         R                  " U R                  5      $ r   )r   llama_get_embeddingsr   r   s    r!   get_embeddingsLlamaContext.get_embeddingsZ  r%  r%   c                D    [         R                  " U R                  U5      $ r   )r   llama_get_embeddings_ithr   r<  s     r!   get_embeddings_ithLlamaContext.get_embeddings_ith]  s    11$((A>>r%   c                D    [         R                  " U R                  U5      $ r   )r   llama_get_embeddings_seqr   r  s     r!   get_embeddings_seqLlamaContext.get_embeddings_seq`  s    11$((FCCr%   c                    [        S5      e)Nz4set_rng_seed is deprecated, use LlamaSampler insteadrm   )r    seeds     r!   set_rng_seedLlamaContext.set_rng_seede      !"XYYr%   c                    [        S5      e)NzCsample_repetition_penalties is deprecated, use LlamaSampler insteadrm   )r    
candidateslast_tokens_datapenalty_last_npenalty_repeatpenalty_freqpenalty_presents          r!   sample_repetition_penalties(LlamaContext.sample_repetition_penaltiesh  s     ""ghhr%   c                    [        S5      e)Nz6sample_softmax is deprecated, use LlamaSampler insteadrm   r    rQ  s     r!   sample_softmaxLlamaContext.sample_softmaxs      !"Z[[r%   c                    [        S5      e)Nz4sample_top_k is deprecated, use LlamaSampler insteadrm   )r    rQ  kmin_keeps       r!   sample_top_kLlamaContext.sample_top_kv  rO  r%   c                    [        S5      e)Nz4sample_top_p is deprecated, use LlamaSampler insteadrm   r    rQ  pr`  s       r!   sample_top_pLlamaContext.sample_top_py  rO  r%   c                    [        S5      e)Nz4sample_min_p is deprecated, use LlamaSampler insteadrm   rd  s       r!   sample_min_pLlamaContext.sample_min_p|  rO  r%   c                    [        S5      e)Nz6sample_typical is deprecated, use LlamaSampler insteadrm   rd  s       r!   sample_typicalLlamaContext.sample_typical  s     ""Z[[r%   c                    [        S5      e)Nz3sample_temp is deprecated, use LlamaSampler insteadrm   )r    rQ  temps      r!   sample_tempLlamaContext.sample_temp  s    !"WXXr%   c                    [        S5      e)Nz6sample_grammar is deprecated, use LlamaSampler insteadrm   )r    rQ  grammars      r!   sample_grammarLlamaContext.sample_grammar  r]  r%   c                    [        S5      e)Nz=sample_token_mirostat is deprecated, use LlamaSampler insteadrm   )r    rQ  tauetammus         r!   sample_token_mirostat"LlamaContext.sample_token_mirostat  s     ""abbr%   c                    [        S5      e)Nz@sample_token_mirostat_v2 is deprecated, use LlamaSampler insteadrm   )r    rQ  rw  rx  rz  s        r!   sample_token_mirostat_v2%LlamaContext.sample_token_mirostat_v2  s     ""deer%   c                    [        S5      e)Nz;sample_token_greedy is deprecated, use LlamaSampler insteadrm   rZ  s     r!   sample_token_greedy LlamaContext.sample_token_greedy  s    !"_``r%   c                    [        S5      e)Nz4sample_token is deprecated, use LlamaSampler insteadrm   rZ  s     r!   sample_tokenLlamaContext.sample_token  rO  r%   c                    [        S5      e)Nz<grammar_accept_token is deprecated, use LlamaSampler insteadrm   )r    rs  rv   s      r!   grammar_accept_token!LlamaContext.grammar_accept_token  s    !"`aar%   c                D    [         R                  " U R                  5        g r   )r   llama_perf_context_resetr   r   s    r!   reset_timingsLlamaContext.reset_timings      **4884r%   c                D    [         R                  " U R                  5        g r   )r   llama_perf_context_printr   r   s    r!   print_timingsLlamaContext.print_timings  r  r%   c                 ,    [         R                  " 5       $ )z%Get the default llama_context_params.)r   llama_context_default_paramsr   r%   r!   r   LlamaContext.default_params  s     5577r%   )r(   r   r   r   r'   r1   r   N)r   r   r'   zllama_cpp.llama_context_paramsr   r   r   )r  r   r  r   r  r   )r  r   r  r   r  r   r  r   )r  r   )r  r   r  r   r  r   r  r   )r(  
LlamaBatch)r2  r   r3  r   )r;   r   rL  r   )rQ  '_LlamaTokenDataArray'rR  z('llama_cpp.Array[llama_cpp.llama_token]'rS  r   rT  r   rU  r   rV  r   )rQ  r  )rQ  r  r_  r   r`  r   )rQ  r  re  r   r`  r   )rQ  r  ro  r   )rQ  r  rs  r   )rQ  r  rw  r   rx  r   ry  r   rz  ,llama_cpp.CtypesPointerOrRef[ctypes.c_float]r   r   )
rQ  r  rw  r   rx  r   rz  r  r   r   )rQ  r  r   r   )rs  r   rv   r   )+r   r   r   r   r   r3   r:   r@   r   r  r  r  r  r  r  r#  r`   r.   r4  r8  r=  rA  rE  rI  rM  rW  r[  ra  rf  ri  rl  rp  rt  r{  r~  r  r  r  r  r  r   r   r   r   r%   r!   r   r      s   M , , /	,
 ,:!/68C
S=K8GGL4;8?D
Z	i*	i C	i 		i
 	i 	i 	i\ZZZ\0\5:\FI\
Y\c*c c 	c
 c 9c 
cf*f f 	f
 9f 
faZb55 8 8r%   r   c                  d    \ rS rSrSS.       SS jjrS rS rSS jrS rSS	 jr	SS
 jr
Srg)r  i  Tr   c               B  ^  UT l         UT l        UT l        UT l        [	        5       T l        [        R                  " T R                   T R                  T R                  5      nUc  [        S5      eUT l	        S T l
        U 4S jnT R
                  R                  U5        g )NzFailed to create llama_batchc                 p   > T R                   c  g [        R                  " T R                   5        S T l         g r   )r(  r   llama_batch_freer   s   r!   
free_batch'LlamaBatch.__init__.<locals>.free_batch  r$   r%   )	_n_tokensembd	n_seq_maxr   r   r(   r   llama_batch_initr,   r(  r1   r2   )r    r   r  r  r   r(  r  s   `      r!   r3   LlamaBatch.__init__  s     "	"$;**4>>499dnnU=;<<
	 	!!*-r%   c                8    U R                   R                  5         g r   r   r   s    r!   r:   LlamaBatch.close  r   r%   c                $    U R                  5         g r   r?   r   s    r!   r@   LlamaBatch.__del__  rB   r%   c                .    U R                   R                  $ r   r(  r   r   s    r!   r   LlamaBatch.n_tokens  s    zz"""r%   c                &    SU R                   l        g )Nr   r  r   s    r!   resetLlamaBatch.reset  s    

r%   c                   [        U5      nX@R                  l        [        U5       H  nX   U R                  R                  U'   X%-   U R                  R
                  U'   SU R                  R                  U   S'   SU R                  R                  U'   X0R                  R                  U'   M     SU R                  R                  US-
  '   g Nr   r   T	r   r(  r   r   rv   posr  n_seq_idlogits)r    r(  n_past
logits_allr   r;   s         r!   	set_batchLlamaBatch.set_batch  s    u:&

xA"'(DJJQ &
DJJNN1&'DJJa #%&DJJ"#-JJa  ! +/

(Q,'r%   c                   [        U5      nU R                  R                  nU R                  =R                  U-  sl        [        U5       H  nXV-   nX   U R                  R                  U'   X`R                  R
                  U'   X R                  R                  U   S'   SU R                  R                  U'   X0R                  R                  U'   M     SU R                  R                  US-
  '   g r  r  )r    r(  r  r  r   	n_tokens0r;   js           r!   add_sequenceLlamaBatch.add_sequence  s    u:JJ''	

x'xAA"'(DJJQ !JJNN1&,JJa #%&DJJ"#-JJa  ! +/

(Q,'r%   )r(   r  r(  r  r  r1   r   N)r   r   r  r   r  r   r   r   r   )r(  Sequence[int]r  r   r  r   )r(  r  r  r   r  r   )r   r   r   r   r3   r:   r@   r   r  r  r  r   r   r%   r!   r  r    sH    KO..&).69.DH.2!# 	//r%   r  c                  (    \ rS rSrSS jrSS jrSrg)LlamaTokenDataArrayi  c          	     h   Xl         [        R                  " U R                   4[        R                  " S[        R                  4S[        R
                  4S[        R
                  4/SS9S9U l        [        R                  " U R                  R                  R                  [        R                  5      U R                   SS9U l        [        R                  " U R                   [        R                  S9U l        [        R                  " U R                   [        R
                  S9U l        S U l        g )	Nidlogitre  T)align)dtypeF)datarf   sorted)rJ   nprecarrayr  intcsinglecandidates_datar   llama_token_data_arrayr\   data_asllama_token_data_prQ  arangedefault_candidates_data_idzerosdefault_candidates_data_pr1   )r    rJ   s     r!   r3   LlamaTokenDataArray.__init__  s    !{{\\O((7BII"6bii8HIQU 
 $::%%,,44Y5Q5QR

 +-))DLL*P')+$,,bii)P&r%   c                   U R                   U R                  R                  S S & XR                  R                  S S & U R                  U R                  R
                  S S & SU R                  l        U R                  U R                  l	        g )NF)
r  r  r  r  r  re  rQ  r  rJ   rf   )r    r  s     r!   copy_logitsLlamaTokenDataArray.copy_logits  sj    %)%D%D"(.""1%$($B$Bq!!&#||r%   )rQ  r  r  r  rJ   r1   N)rJ   r   )r  znpt.NDArray[np.single])r   r   r   r   r3   r  r   r   r%   r!   r  r    s    ",r%   r  c                    [        [        R                  R                  U 5      5      nUS:X  a  U $ U  Vs/ s H  o"U-  PM	     sn$ s  snf )N        )r   r  linalgnorm)	embeddingr  vs      r!   normalize_embeddingr    s@    	*+Ds{'(iHi(((s   Ac                  6   \ rS rSr% SrS\S'   SrS\S'   SrS\S'   S	rS
\S'   Sr	S
\S'   Sr
S
\S'   SrS
\S'   SrS
\S'   SrS\S'   SrS
\S'   SrS
\S'   SrS
\S'   SrS\S'   SrS
\S'   SrS
\S'   SrS\S'   S rS!\S"'   S rS!\S#'   SrS
\S$'   \" \S%9rS&\S''   S(rg))*LlamaSamplingParamsi  @   r   n_prevr   n_probs(   top_kgffffff?r   top_pg?min_pg      ?tfs_z	typical_pg?ro  rS  rT  r  rU  rV  mirostatg      @mirostat_taug?mirostat_etaTr   penalize_nl r   rs  cfg_negative_prompt	cfg_scaledefault_factoryzdict[int, float]
logit_biasr   N)r   r   r   r   r  __annotations__r  r  r  r  r  r  ro  rS  rT  rU  rV  r  r  r  r  rs  r  r  r   dictr  r   r   r%   r!   r  r    s    FCGSE3OE5E5E5IuD%NCNEL%!OU!HcL%L%KGS!!Iu#(#>J >r%   r  c                      \ rS rSr% \" \S9rS\S'   \" \R                  S9r
S\S'   SrS\S	'   \" \S9rS
\S'   \" \S9rS\S'   S rS rSS jrSS jr  S     SS jjrSS jrSrg)LlamaSamplingContexti5  r  r  r'   zctypes.c_floatmirostat_muNzOptional[LlamaGrammar]rs  z	list[int]prevz list[llama_cpp.llama_token_data]curc                p    / U l         / U l        U R                  b  U R                  R                  5         g g r   )r  r  rs  r  r   s    r!   r  LlamaSamplingContext.reset>  s/    	<<#LL  $r%   c                    [        U R                  U R                  U R                  U R                  R                  5       U R                  R                  5       S9$ )N)r'   r  rs  r  r  )r  r'   r  rs  r  copyr  r   s    r!   cpLlamaSamplingContext.cpD  sA    #;;((LL!
 	
r%   c                T    [        U R                  5      S:  a  U R                  S   $ g )Nr   )r   r  r   s    r!   lastLlamaSamplingContext.lastM  s#    tyy>A99R= r%   c                r    UR                   R                  U R                  U* S  5      R                  S5      $ rs   )r   r   r  r`   )r    ctx_mainr   s      r!   prev_strLlamaSamplingContext.prev_strS  s.    ~~((A238??HHr%   c                    [        S5      e)NzCLlamaSamplingContext.sample is deprecated, use LlamaSampler insteadrm   )r    r  idxlogits_arrays       r!   sampleLlamaSamplingContext.sampleV  s     ""ghhr%   c                :    U R                   R                  U5        g r   )r  append)r    r  r  apply_grammars       r!   acceptLlamaSamplingContext.accept_  s    		r%   )r  r  )r   zOptional[int])r  r   r   r   r   r   )r   N)r  r   r	  r   r
  z Optional[npt.NDArray[np.single]])r  r   r  r   r  r   )r   r   r   r   r   r  r'   r  r\   c_floatr  rs  r   r  r  r  r  r  r  r  r  r   r   r%   r!   r  r  5  s    "'8K"LFL"'"GKG&*G#*D1D)1,1$,GC	)G!
I 9=	ii i 7	ir%   r  c                  ,    \ rS rSr  SS jrSS jrSrg)CustomSampleric  c                R  ^  UT l             SU 4S jjnSS jn[        R                  " 5       n[        R                  " U5      Ul        UT l        [        R                  " S5      Ul        [        R                  " S5      Ul	        [        R                  " S5      Ul        [        R                  " S5      Ul        [        R                  " S5      Ul        [        R                   " 5       T l        [$        R&                  " U5      T R"                  l        S T R"                  l        g )Nc                (   > TR                  U5        g r   )
apply_func)r1   cur_pr    s     r!   apply_wrapper-CustomSampler.__init__.<locals>.apply_wrapperi  s     OOE"r%   c                    g r   r   )r1   s    r!   free_wrapper,CustomSampler.__init__.<locals>.free_wrappero  s    r%   r   )r1   llama_cpp.llama_sampler_pr  z"llama_cpp.llama_token_data_array_p)r1   r  )r  r   llama_sampler_illama_sampler_i_applyapply_apply_wrapper_refllama_sampler_i_namero   llama_sampler_i_acceptr  llama_sampler_i_resetr  llama_sampler_i_cloneclonellama_sampler_i_freefreellama_samplerr1   r\   pointerifacer   )r    r  r  r  	sampler_is   `    r!   r3   CustomSampler.__init__d  s     %	#.	#5	#	 --/	#99-H	"/"77:	$;;A>	#99!<	#99!<	"77:	 ..0#^^I6r%   c                B    [         R                  " U R                  5      $ r   )r\   r+  r1   r   s    r!   get_samplerCustomSampler.get_sampler  s    ~~dll++r%   )r"  r  r1   Nr  z2Callable[[llama_cpp.llama_token_data_array], None])r   r  )r   r   r   r   r3   r0  r   r   r%   r!   r  r  c  s     L 8,r%   r  c                  h   \ rS rSrS rS rS rS rS S jrS r	S!S jr
S"S#S	 jjrS"S#S
 jjrS"S#S jjrS$S jrS%S jrS&S jrS'S jrS(S jrS)S jrS*S jr        S+S jr        S,S jr              S-S jr    S.S jrS/S jr  S0S jrS1S jrS2S3S jjrS4S jrS rS rSr g)5LlamaSampleri  c                   ^  [         R                  " 5       n[         R                  " U5      T l        / T l        [        5       T l        U 4S jnT R                  R                  U5        g )Nc                    > TR                   bg  [        TR                  5       H&  u  p[        R                  " TR                   U 5        M(     [        R
                  " TR                   5        S Tl         g g r   )r1   r6   r7   r   r8   llama_sampler_free)r;   r<   r    s     r!   free_sampler+LlamaSampler.__init__.<locals>.free_sampler  sV    ||'$T%9%9:DA88qI ;,,T\\:# (r%   )r   "llama_sampler_chain_default_paramsllama_sampler_chain_initr1   r7   r   r(   r2   )r    r'   r8  s   `  r!   r3   LlamaSampler.__init__  sP    ==? 99&A@B$;	$ 	!!,/r%   c                8    U R                   R                  5         g r   r   r   s    r!   r:   LlamaSampler.close  r   r%   c                $    U R                  5         g r   r?   r   s    r!   r@   LlamaSampler.__del__  rB   r%   c                p    [         R                  " 5       n[         R                  " U R                  U5        g r   )r   llama_sampler_init_greedyllama_sampler_chain_addr1   r    r1   s     r!   
add_greedyLlamaSampler.add_greedy  s$    557))$,,@r%   c                r    [         R                  " U5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_distrC  r1   )r    rL  r1   s      r!   add_distLlamaSampler.add_dist  &    33D9))$,,@r%   c                p    [         R                  " 5       n[         R                  " U R                  U5        g r   )r   llama_sampler_init_softmaxrC  r1   rD  s     r!   add_softmaxLlamaSampler.add_softmax  s$    668))$,,@r%   c                r    [         R                  " U5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_top_krC  r1   )r    r_  r1   s      r!   	add_top_kLlamaSampler.add_top_k  s&    44Q7))$,,@r%   c                r    [         R                  " X5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_top_prC  r1   r    re  r`  r1   s       r!   	add_top_pLlamaSampler.add_top_p  &    44QA))$,,@r%   c                r    [         R                  " X5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_min_prC  r1   rV  s       r!   	add_min_pLlamaSampler.add_min_p  rY  r%   c                r    [         R                  " X5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_typicalrC  r1   rV  s       r!   add_typicalLlamaSampler.add_typical  s&    66qC))$,,@r%   c                r    [         R                  " U5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_temprC  r1   )r    ro  r1   s      r!   add_tempLlamaSampler.add_temp  rK  r%   c                t    [         R                  " XU5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_temp_extrC  r1   )r    tdeltaexponentr1   s        r!   add_temp_extLlamaSampler.add_temp_ext  s(    77(K))$,,@r%   c                t    [         R                  " XX45      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_xtcrC  r1   )r    re  rh  r`  rL  r1   s         r!   add_xtcLlamaSampler.add_xtc  s(    221H))$,,@r%   c                r    [         R                  " U5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_top_n_sigmarC  r1   )r    r   r1   s      r!   add_top_n_sigmaLlamaSampler.add_top_n_sigma  s&    ::1=))$,,@r%   c                v    [         R                  " XX4U5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_mirostatrC  r1   )r    rJ   rL  rw  rx  ry  r1   s          r!   add_mirostatLlamaSampler.add_mirostat  s+    77sQRS))$,,@r%   c                t    [         R                  " XU5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_mirostat_v2rC  r1   )r    rL  rw  rx  r1   s        r!   add_mirostat_v2LlamaSampler.add_mirostat_v2  s(    ::4cJ))$,,@r%   c                    [         R                  " UR                  UR                  R	                  S5      UR
                  R	                  S5      5      n[         R                  " U R                  U5        g rs   )r   llama_sampler_init_grammarr0   _grammarr.   _rootrC  r1   )r    r   rs  r1   s       r!   add_grammarLlamaSampler.add_grammar  sT    66KK))0097==;O;OPW;X
 	))$,,@r%   c                   [         R                  [        U5      -  " 5       n[        U5       H  u  pgUR	                  S5      XV'   M     [
        R                  [        U5      -  " U6 n[
        R                  " UR                  UR                  R	                  S5      UR                  R	                  S5      U[        U5      U[        U5      5      n	[
        R                  " U R                  U	5        g rs   )r\   c_char_pr   	enumerater.   r   r   (llama_sampler_init_grammar_lazy_patternsr0   r  r  rC  r1   )
r    r   rs  trigger_patternstrigger_tokenspattern_ptrsr;   patterntoken_arrayr1   s
             r!   add_grammar_lazy_patterns&LlamaSampler.add_grammar_lazy_patterns  s     #.>*??B#$45JA%nnW5LO 6 !,,s>/BB^TDDKK##G,MM  ) !
 	))$,,@r%   c                x    [         R                  " UUUU5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_penaltiesrC  r1   )r    rS  rT  rU  rV  r1   s         r!   add_penaltiesLlamaSampler.add_penalties  s6     88	
 	))$,,@r%   c                6   [         R                  [        U5      -  " 5       n[        U5       H  u  pU
R	                  S5      X'   M     [
        R                  " UR                  UUUUUU[        U5      5      n[
        R                  " U R                  U5        g rs   )
r\   r  r   r  r.   r   llama_sampler_init_dryr0   rC  r1   )r    r   rO   dry_multiplierdry_basedry_allowed_lengthdry_penalty_last_nseq_breakersbreaker_ptrsr;   breakerr1   s               r!   add_dryLlamaSampler.add_dry  s     #l*;;>#L1JA%nnW5LO 2 22KK	
 	))$,,@r%   c                8   [         R                  [        U5      -  " 5       n[        UR	                  5       5       H  u  nu  pVXSU   l        XcU   l        M     [         R                  " U[        U5      U5      n[         R                  " U R                  U5        g r   )
r   llama_logit_biasr   r  itemsrv   biasllama_sampler_init_logit_biasrC  r1   )r    rJ   r  
bias_arrayr;   rv   r  r1   s           r!   add_logit_biasLlamaSampler.add_logit_bias  s      003z?BE
 )**:*:*< =A}"'qM!%qM !> 99
O

 	))$,,@r%   c                    [         R                  " UR                  5      n[         R                  " U R                  U5        g r   )r   llama_sampler_init_infillr0   rC  r1   )r    r   r1   s      r!   
add_infillLlamaSampler.add_infill+  s*    55ekkB))$,,@r%   c                    [        U5      nUR                  5       n[        R                  " U R                  U5        U R
                  R                  [        R                  " U R                  5      S-
  U45        g )Nr   )r  r0  r   rC  r1   r7   r  llama_sampler_chain_n)r    r  custom_samplerr1   s       r!   
add_customLlamaSampler.add_custom/  s`     'z2 ,,.))$,,@##,,T\\:Q>O	
r%   c                B    [         R                  " U R                  5      $ r   )r   llama_sampler_get_seedr1   r   s    r!   get_seedLlamaSampler.get_seed:  s    //==r%   c                Z    [         R                  " U R                  UR                  U5      $ r   )r   llama_sampler_sampler1   r   )r    r   r	  s      r!   r  LlamaSampler.sample=  s    --dllCGGSIIr%   c                F    [         R                  " U R                  U5        g r   )r   llama_sampler_acceptr1   ru   s     r!   r  LlamaSampler.accept@  s    &&t||U;r%   c                D    [         R                  " U R                  5        g r   )r   llama_sampler_resetr1   r   s    r!   r  LlamaSampler.resetC  s    %%dll3r%   c                .  ^ U R                   (       a  [        S5      e[        R                  " U R                  5      n[
        R                  [
        5      mUTl        / Tl         [        5       Tl        U4S jnTR                  R                  U5        T$ )Nz7Cannot clone LlamaSampler that contains custom samplersc                 p   > T R                   b(  [        R                  " T R                   5        S T l         g g r   )r1   r   r7  )new_samplers   r!   r8  (LlamaSampler.clone.<locals>.free_samplerR  s0    "".,,[-@-@A&*# /r%   )
r7   rn   r   llama_sampler_cloner1   r4  __new__r   r(   r2   )r    cloned_samplerr8  r  s      @r!   r'  LlamaSampler.cloneF  sw    %&_``"66t||D"**<8,&(#"++	+
 	((6r%   )r(   r7   r1   Nr  )r_  r   )r   )re  r   r`  r   )ro  r   )rh  r   ri  r   rj  r   )re  r   rh  r   r`  r   rL  r   )r   r   )
rJ   r   rL  r   rw  r   rx  r   ry  r   )rL  r   rw  r   rx  r   )r   r   rs  r   )r   r   rs  r   r  	List[str]r  r   )rS  r   rT  r   rU  r   rV  r   )r   r   rO   r   r  r   r  r   r  r   r  r   r  r  )rJ   r   r  zDict[int, float])r   r   r2  r   )r  )r   r   r	  r   r   r   )rv   r   )!r   r   r   r   r3   r:   r@   rE  rI  rN  rR  rW  r\  r`  rd  rk  ro  rs  rw  r{  r  r  r  r  r  r  r  r  r  r  r  r'  r   r   r%   r!   r4  r4    s]   0 !AAAAAAAAAAAAAAAA A $	A
 "A4AA A 	A
 AAA A 	A
 A  A  A  A6AA %A$A	
L	
>J<4r%   r4  )%
__future__r   r)   r\   typingr   r   r   r   r   r	   r
   dataclassesr   r   
contextlibr   numpyr  numpy.typingnptllama_typesllama_grammarr   _utilsr   llama_cpp.llama_cppr   r   r   r  r  r  r  r  r  r4  r   r%   r!   <module>r     s    " 	    )      ' * 'R6 R6j}8 }8@</ </~, ,:) ? ? ?4 * * *Z, ,BT Tr%   