
    *iT                   4S   S SK Jr  S SKrS SKrS SKrS SKJrJrJrJ	r	J
r
  S SKJrJrJr  \
(       a  S SKJrJrJrJrJrJrJr  Sr\R.                  R1                  S5      r\cG  \R4                  " \R6                  R9                  \R6                  R;                  \5      5      5      S-  O\R4                  " \5      r\" \\5      r \" \ 5      r!S r"S	r#S
r$Sr%Sr&Sr'Sr(Sr)Sr*Sr+Sr,Sr-Sr.Sr/Sr0Sr1Sr2Sr3Sr4Sr5Sr6Sr7Sr8Sr9S r:S!r;S"r<S#r=S$r>\R~                  " \R                  \R                  \R                  \R                  5      rB\R~                  " \R                  \R                  5      rC/ \ R                  lE        \R                  \ R                  lG        \ R                  5       rHS%rIS&rJS'rKS(rLS)rM\LrNSrO\MrPS
rQ\" S*\R5      rS\R                  rT\" S+\R5      rU\R                  rV\" S,\R5      rW\R                  rX\" S-\R5      rY\R                  rZ\" S.\R5      r[\R                  r\\R                  r^\R                  r_\R                  " \_5      ra\R                  rbS rc S	rd S
re Srf S/rg S0rh Sri S rjS	rkS
rlSrmS/rnS0roSrpSrqSrrSrsSrtSruSrvSrwSrxSrySrzSr{Sr|Sr}Sr~SrSrSrSrSrS rS!rS"rS#rS$rS1rS2rS3rS4rS5rS&rS rS
=rrS=rrS=rrS rS	rS
rSrS/rS0rSrS rS	rS
rS/rSrSrS2rS6rS7rS8rS9rS rS	rS
rSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrSrS rS!rS"rS#rS$rS1rS2rS:rS;rS<rS=rS&rS rS	rS
rSr\rS&rS rS	rS
rSrS/rS&rS rS	rS rS	rS
r " S> S?\GR                  5      r\R                  " \5      r " S@ SA\GR                  5      r\R                  " \5      r\R~                  " \R                  \GR                  \R                  5      r " SB SC\GR                  5      rS rS	rS
rSr " SD SE\R                  5      r " SF SG\GR                  5      r " SH SI\GR                  5      r " SJ SK\GR                  5      r\R~                  " S\GR                  \GR                  \R                  5      r  " SL SM\GR                  5      r " SN SO\GR                  5      r\R                  " \5      r " SP SQ\GR                  5      r " SR SS\GR                  5      r\R                  r\R                  " \R                  5      r\!" ST/ \5      GSSU j5       r\!" SV/ \5      GSSW j5       r\!" SX/ \5      GSSY j5       r\!" SZ/ \5      GSS[ j5       r\!" S\/ S5      S] 5       rS rS	rS
rSrS/rS0Gr \!" S^/ S5      S_ 5       Gr\!" S`\GR                  /S5      GSSa j5       Gr\!" Sb\GR                  \/\V5            GSSc j5       Gr\!" Sd\GR                  \/\V5            GSSe j5       Gr\!" Sf\R                  " \GR                  5      \R                  \/\V5              GSSg j5       Gr\!" Sh\V\GR                  /S5      GSSi j5       Gr\!" Sj\V/S5      GS Sk j5       Gr\!" Sl\V/S5      GS Sm j5       Gr\!" Sn\V\/\X5            GS!So j5       Gr	\!" Sp\V\/\X5            GS!Sq j5       Gr
\!" Sr\X/S5      GS"Ss j5       Gr\!" St/ \GR                  5      GS#Su j5       Gr\!" Sv/ \R                  5      GS#Sw j5       rD\!" Sx/ \R                  5      GS#Sy j5       Gr\!" Sz/ \R                  5      GS$S{ j5       Gr\!" S|/ \R                  5      GS$S} j5       Gr\!" S~/ \R                  5      GS$S j5       Gr\!" S/ \R                  5      GS$S j5       Gr\!" S\X/\GR&                  5      GS%S j5       Gr\!" S\X/\GR&                  5      GS%S j5       Gr\!" S\X/\GR&                  5      GS%S j5       Gr\!" S\X/\GR&                  5      GS%S j5       Gr\!" S\V/\R                  5      GS&S j5       Gr\!" S\V/\R                  5      GS&S j5       Gr\!" S\V/\R                  5      GS&S j5       Gr\!" S\V/\R                  5      GS&S j5       Gr\!" S\T/\R                  5      GS'S j5       Gr\!" S\X/\V5      GS(S j5       Gr\!" S\X/\Z5      GS)S j5       Gr\!" S\X/\GR                  5      GS%S j5       Gr\!" S\X/\\5      GS*S j5       Gr \!" S\V/\T5      GS+S j5       Gr!\!" S\V/\GR                  5      GS&S j5       Gr"\!" S\V/\R                  5      GS&S j5       Gr#\!" S\V/\R                  5      GS&S j5       Gr$\!" S\V/\R                  5      GS&S j5       Gr%\!" S\V/\R                  5      GS&S j5       Gr&\!" S\V/\R                  5      GS&S j5       Gr'\!" S\V/\R                  5      GS&S j5       Gr(\!" S\V/\GR                  5      GS,S j5       Gr)\!" S\V/\GR&                  5      GS&S j5       Gr*\!" S\V\GR&                  /\GR                  5      GS-S j5       Gr+\!" S\T/\GR                  5      GS.S j5       Gr,\!" S\T/\R                  5      GS.S j5       Gr-\!" S\V\GR                  \GR                  \R                  /\R                  5                GS/S j5       Gr.\!" S\V/\R                  5      GS&S j5       Gr/\!" S\V\R                  \GR                  \R                  /\R                  5                GS0S j5       Gr0\!" S\V\R                  \GR                  \R                  /\R                  5                GS0S j5       Gr1\!" S\V\GR                  \R                  /\R                  5              GS1S j5       Gr2\!" S\V/\GRf                  5      GS&S j5       Gr4\!" S\V\GR                  /\GR                  5      GS2S j5       Gr5\!" S\V/\GRf                  5      GS&S j5       Gr6\!" S\V/\R                  5      GS3S j5       Gr7\!" S\V/\R                  5      GS3S j5       Gr8\!" S\V/\R                  5      GS&S j5       Gr9\!" S\V/\R                  5      GS3S j5       Gr:\!" S\V/\R                  5      GS3S j5       Gr;\!" S\GR                  \GR                  \R                  " \5      /\GR&                  5              GS4S j5       Gr<\!" S\V\GR                  /\5            GS5S j5       Gr=\!" S\/S5      GS6S j5       Gr>\!" S\X\\GR                  /\R                  5              GS7S j5       Gr?\!" S\X\/\R                  5            GS8S j5       Gr@\!" S\X/S5      GS"S j5       GrA\!" S\X\R                  " \GR                  5      \R                  \R                  \R                  \R                  /\R                  5                    GS9S j5       GrB\!" S\Z\R                  /S5      GS:S j5       GrC\!" S\Z\b\^\^/\R                  5                GS;S j5       GrD\!" S\Z\b\b\^\^/S5                GS<S j5       GrE\!" S\Z\b/S5      GS=S j5       GrF\!" S\Z\b\^\^\^/S5                GS>S j5       GrG\!" S\Z\b\^\^\GR                  /S5                GS?S j5       GrH\!" S\Z\b/\^5            GS@S j5       GrI\!" S\Z\b/\^5            GS@S j5       GrJ\!" S\Z/\R                  5      GSAS j5       GrK\!" S\X/\R                  5      GS%S j5       GrL\!" S\X/\R                  5      GS%S j5       GrM\!" S\X/S5      GS"S j5       GrN\!" S\X\b\^\^/\R                  5                GSBS j5       GrO\!" S\X\b\b\^\^/S5                GSCS j5       GrP\!" S\X\b/S5      GSDS j5       GrQ\!" S\X\b\^\^\^/S5                GSES j5       GrR\!" S\X\b\^\^\GR                  /S5                GSFGS  j5       GrS\!" GS\X\b/\^5            GSGGS j5       GrT\!" GS\X\b/\^5            GSGGS j5       GrU\!" GS\X/S5      GS"GS j5       GrV\!" GS\X/\R                  5      GSHGS j5       GrW\!" GS	\X/S5      GS"GS
 j5       GrX\!" GS\X/\R                  5      GS%GS j5       GrY\!" GS\X/\R                  5      GS%GS j5       GrZ\!" GS\X\R                  " \GR                  5      \R                  /\R                  5              GSIGS j5       Gr\\!" GS\X\R                  " \GR                  5      /\R                  5            GSJGS j5       Gr]\!" GS\X\R                  " \GR                  5      \R                  /\R                  5              GSKGS j5       Gr^\!" GS\X\R                  " \GR                  5      /\R                  5            GSLGS j5       Gr_\!" GS\X\GR                  \a\R                  \R                  " \R                  5      /\R                  5                  GSMGS j5       Gr`\!" GS\X\GR                  \a\R                  \R                  " \R                  5      /\R                  5                  GSMGS j5       Gra\!" GS\X\GR                  \a\R                  /\R                  5                GSNGS j5       Grb\!" GS\X\GR                  \a\R                  /\R                  5                GSNGS j5       Grc\!" GS\X\b/\R                  5      GSOGS  j5       Grd\!" GS!\X\R                  " \GR                  5      \R                  \b/\R                  5                GSPGS" j5       Gre\!" GS#\X\R                  " \GR                  5      \R                  \b/\R                  5                GSQGS$ j5       Grf\!" GS%\X\GR                  \b\a\R                  /\R                  5                  GSRGS& j5       Grg\!" GS'\X\GR                  \b\a\R                  \R                  " \R                  5      /\R                  5                    GSSGS( j5       Grh\!" GS)\a\R                  /\5            GSTGS* j5       Gri\!" GS+\R                  \R                  \R                  /\5              GSUGS, j5       Grj\!" GS-\/S5      GSVGS. j5       Grk\!" GS/\X\/\R                  5      GSWGS0 j5       Grl\!" GS1\X\/\R                  5      GSWGS2 j5       Grm\!" GS3\X\R                  \R                  /S5            GSXGS4 j5       Grn\!" GS5\X/\R                  5      GS%GS6 j5       Gro\!" GS7\X/\R                  5      GS%GS8 j5       Grp\!" GS9\X\R                  /S5      GSYGS: j5       Grq\!" GS;\X\R                  /S5      GSZGS< j5       Grr\!" GS=\X\R                  /S5      GS[GS> j5       Grs\!" GS?\X\C\R                  /S5            GS\GS@ j5       Grt\!" GSA\X/S5      GS"GSB j5       Gru\!" GSC\X/\R                  " \GR                  5      5      GS]GSD j5       Grv\!" GSE\X\R                  /\R                  " \GR                  5      5            GS^GSF j5       Grw\!" GSG\X/\R                  " \GR                  5      5      GS]GSH j5       Grx\!" GSI\X\R                  /\R                  " \GR                  5      5            GS^GSJ j5       Gry\!" GSK\X\b/\R                  " \GR                  5      5            GS_GSL j5       Grz\!" GSM\T\_/\GR                  5            GS`GSN j5       Gr{\!" GSO\T\_/\GR                  5            GSaGSP j5       Gr|\!" GSQ\T\_/\GR                  5            GSbGSR j5       Gr}\!" GSS\T\_/\R                  5      GScGST j5       Gr~\!" GSU\T\_/\R                  5            GScGSV j5       Gr\!" GSW\T/\_5      GSdGSX j5       Gr\!" GSY\T/\_5      GSdGSZ j5       Gr\!" GS[\T/\_5      GSdGS\ j5       Gr\!" GS]\T/\_5      GSdGS^ j5       Gr\!" GS_\T/\_5      GSdGS` j5       Gr\!" GSa\T/\_5      GSdGSb j5       Gr\!" GSc\T/\_5      GSdGSd j5       Gr\!" GSe\T/\R                  5      GSeGSf j5       Gr\!" GSg\T/\R                  5      GSeGSh j5       Gr\!" GSi\T/\R                  5      GSeGSj j5       Gr\!" GSk\T/\_5      GSdGSl j5       Gr\!" GSm\T/\_5      GSdGSn j5       Gr\!" GSo\T/\_5      GSdGSp j5       Gr\!" GSq\T/\_5      GSdGSr j5       Gr\!" GSs\T/\_5      GSdGSt j5       Gr\!" GSu\T/\_5      GSdGSv j5       Gr\!" GSw\T\_/\GR                  5            GS`GSx j5       Gr\!" GSy\T\_/\GR                  5            GSaGSz j5       Gr\!" GS{\T\_/\GR                  5            GSbGS| j5       Gr\!" GS}\T\_/\R                  5            GScGS~ j5       Gr\!" GS\T\_/\R                  5            GScGS j5       Gr\!" GS\T/\_5      GS.GS j5       Gr\!" GS\T/\_5      GS.GS j5       Gr\!" GS\T/\_5      GS.GS j5       Gr\!" GS\T/\_5      GS.GS j5       Gr\!" GS\T/\_5      GS.GS j5       Gr\!" GS\T/\_5      GS.GS j5       Gr\!" GS\T/\_5      GS.GS j5       Gr\!" GS\T/\R                  5      GSeGS j5       Gr\!" GS\T/\R                  5      GSeGS j5       Gr\!" GS\T/\_5      GSdGS j5       Gr\!" GS\T/\_5      GSdGS j5       Gr\!" GS\T/\_5      GSdGS j5       Gr\!" GS\T/\_5      GSdGS j5       Gr\!" GS\T/\_5      GSdGS j5       Gr\!" GS\T/\_5      GSdGS j5       Gr\!" GS\T/\_5      GSdGS j5       Gr\!" GS\T\GR                  \R                  \a\R                  \R                  \R                  /\R                  5                      GSfGS j5       Gr\!" GS\T\_\GR                  \R                  \R                  \R                  /\R                  5                    GSgGS j5       Gr\!" GS\T\R                  " \_5      \R                  \GR                  \R                  \R                  \R                  /\R                  5                      GShGS j5       Gr\!" GS\GR                  \R                  " \5      \R                  \R                  \GR                  \R                  /\R                  5                    GSiGS j5       Gr\!" GS\R                  " \GR                  5      \R                  /\R                  5            GSjGS j5       Gr\R                  Gr " GS GS\GR                  5      Gr " GS GS\GR                  5      Gr\
(       a  \G\   Gr\R                  " G\5      Gr\R~                  " \GR                  G\5      Gr\R~                  " SG\\_5      Gr\R~                  " SG\\5      Gr\R~                  " SG\5      Gr\R~                  " G\G\5      Gr\R~                  " SG\5      GrSG\4GSG\4GSG\4GSG\4GSG\4GSG\4/G\Gl        \!" GS\R                  " G\5      G\/G\5            GSkGS j5       Gr\!" GSG\/\GR                  5      GSlGS j5       Gr\!" GSG\\_/S5      GSmGS j5       Gr\!" GSG\\/S5          GSnGS j5       Gr\!" GSG\/S5      GSoGS j5       Gr\!" GSG\/G\5      GSpGS j5       Gr\!" GSG\/S5      GSoGS j5       Gr\!" GS\/G\5      GSqGS j5       Gr\!" GSG\G\/S5      GSrGS j5       Gr\!" GSG\\R                  /G\5            GSsGS j5       Gr\!" GSG\/\GR                  5      GStGS j5       Gr\!" GSG\\R                  /G\5            GSsGS j5       Gr\!" GS/ G\5      GSuGS j5       Gr\!" GS\GR&                  /G\5      GSvGS j5       Gr\!" GS/ G\5      GSuGS j5       Gr\!" GS\R                  /G\5      GSwGS j5       Gr\!" GS\GR                  \R                  /G\5      GSxGS j5       Gr\!" GS\GR                  \R                  /G\5      GSxGS j5       Gr\!" GS\GR                  \R                  /G\5      GSxGS j5       Gr\!" GS\GR                  /G\5      GSyGS j5       Gr\!" GS\GR                  \GR                  \GR                  /G\5              GSzGS j5       Gr\!" GS\GR                  \GR                  \R                  \GR&                  /G\5                GS{GS j5       Gr\!" GS\GR                  /G\5      GS|GS j5       Gr\!" GS\R                  \GR&                  \GR                  \GR                  \R                  /G\5                  GS}GS j5       Gr\!" GS\GR&                  \GR                  \GR                  /G\5              GS~GS j5       Gr\!" GS\T\GR                  \GR                  /G\5              GSGS j5       Gr\!" GS\T\GR                  \GR                  \R                  " \GR                  5      \R                  \R                  " \_5      \R                  /G\5                      GSGS j5       Gr\!" GS\T\GR                  \GR                  \R                  " \GR                  5      \R                  \R                  " \_5      \R                  /G\5                      GSGS j5       Gr\!" GS\R                  \GR                  \GR                  \GR                  /G\5                GSGS j5       Gr\!" GS\T\R                  \GR                  \GR                  \R                  \R                  \R                  " \GR                  5      \R                  /G\5                      GSGS j5       Gr\!" GS\R                  \R                  \/G\5              GSGS j5       Gr\!" GS\T/G\5      GSGS j5       Gr\!" GSG\/\GR&                  5      GSGS j5       Gr\!" GSG\\X\R                  /\_5              GSGS j5       Gr\!" GS\GR                  \R                  \GR                  \GR                  \GR                  /\GR                  5                  GSGS j5       Gr\!" GS\GR                  \R                  \GR                  \GR                  \GR                  /\GR                  5                  GSGS j5       Gr\!" GS/ \GR                  5      GSGS j5       Gr\!" GS\R                  \R                  /S5          GSGS j5       Gr " GS  GS\GR                  5      Gr " GS GS\GR                  5      Gr\!" GS\X/G\5      GSGS j5       Gr\!" GS\X/S5      GS"GS j5       Gr\!" GS\X/S5      GS"GS	 j5       Gr\!" GS
G\/G\5      GSGS j5       Gr\!" GSG\/S5      GSGS j5       Gr\!" GSG\/S5      GSGS j5       Gr\R~                  " \R                  \R                  \R                  5      Gr\!" GS\R                  \R                  /\R                  5      GSGS j5       Gr " GS GS\GR                  5      Gr\!" GS\X\VG\/S5      GSGS j5       Gr\!" GS\X\R                  \R                  \R                  \GR                  \R                  \R                  /S5                    GSGS j5       Grg(      )annotationsN)CallableUnionNewTypeOptionalTYPE_CHECKING)load_shared_librarybyref"ctypes_function_for_shared_library)CtypesCDataCtypesArrayCtypesPointerCtypesVoidPointer	CtypesRefCtypesPointerOrRefCtypesFuncPointerllamaLLAMA_CPP_LIB_PATHlib                  	   
                                                               l    ialgginsggiqsggllama_vocab_pllama_model_pllama_context_pllama_memory_tllama_kv_cache_p             !   "   #   @            $   %   &   i   c                      \ rS rSr% Sr\(       a  S\S'   S\S'   S\S'   S\4S\R                  4S\R                  4/r
Srg	)
llama_token_datai  zUsed to store token data

Attributes:
    id (llama_token): token id
    logit (float): log-odds of the token
    p (float): probability of the tokenllama_tokenidfloatlogitp N__name__
__module____qualname____firstlineno____doc__r   __annotations__rH   ctypesc_float_fields___static_attributes__rM       Q/var/www/html/ai-backend/venv/lib/python3.13/site-packages/llama_cpp/llama_cpp.pyrG   rG     sD    /  
{	&..!	fnnHrY   rG   c                      \ rS rSr% Sr\(       a  S\S'   S\S'   S\S'   S\S	'   S\4S\R                  4S\R                  4S	\R                  4/rS
rg)llama_token_data_arrayrB   zUsed to sample tokens given logits

Attributes:
    data (ctypes.Array[llama_token_data]): token data
    size (int): size of the array
    selected (int): index in the data array (i.e. not the token id)
    sorted (bool): whether the array is sortedzCtypesArray[llama_token_data]dataintsizeselectedboolsortedrM   N)rO   rP   rQ   rR   rS   r   rT   llama_token_data_prU   c_size_tc_int64c_boolrW   rX   rM   rY   rZ   r\   r\      sW    6 ++	 
#$	!	V^^$	6==!	HrY   r\   c                     \ rS rSr% Sr\(       a#  S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S\R                  4S\R                  " \
5      4S\R                  " \R                  5      4S
\R                  " \5      4S\R                  " \R                  5      4S\R                  " \R                  " \5      5      4S\R                  " \R                  5      4/rSrg)llama_batchi9  a  Input data for llama_encode/llama_decode

A llama_batch object can contain input about one or many sequences

The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens

Attributes:
    n_tokens (int): number of tokens
    token (ctypes.Array[llama_token]): the token ids of the input (used when embd is NULL)
    embd (ctypes.Array[ctypes.ctypes.c_float]): token embeddings (i.e. float vector of size n_embd) (used when token is NULL)
    pos (ctypes.Array[ctypes.Array[llama_pos]]): the positions of the respective token in the sequence
    seq_id (ctypes.Array[ctypes.Array[llama_seq_id]]): the sequence to which the respective token belongs
    logits (ctypes.Array[ctypes.ctypes.c_int8]): if zero, the logits for the respective token will not be output
r^   n_tokensCtypesArray[llama_token]tokenCtypesArray[ctypes.c_float]embdz#CtypesArray[CtypesArray[llama_pos]]poszCtypesArray[ctypes.c_int]n_seq_idz&CtypesArray[CtypesArray[llama_seq_id]]seq_idzCtypesArray[ctypes.c_int8]logitsrM   N)rO   rP   rQ   rR   rS   r   rT   rU   c_int32POINTERrH   rV   	llama_posllama_seq_idc_int8rW   rX   rM   rY   rZ   rh   rh   9  s     ''))00++66** 
V^^$	&..-.	/0	y)*	V^^FNN34	6>>&..">?@	6>>&--01HrY   rh   c                      \ rS rSr% S\R
                  4S\R                  4S\R                  4S\R                  S-  4/r	\
(       a  S\S'   S\S'   S	\S'   S
\S'   SrgSrg)llama_model_kv_override_valueiv  val_i64val_f64val_boolval_strr@   r^   rJ   ra   bytesrM   N)rO   rP   rQ   rR   rU   re   c_doublerf   c_charrW   r   rT   rX   rM   rY   rZ   rx   rx   v  s\    	FNN#	FOO$	V]]#	FMMC'(	H 	 rY   rx   c                      \ rS rSr% S\R
                  4S\R                  S-  4S\4/r\	(       a  S\
S'   S\
S'   S\
S'   S	rg
S	rg
)llama_model_kv_overridei  tagkeyr@   valuer^   r}   zUnion[int, float, bool, bytes]rM   N)rO   rP   rQ   rR   rU   c_intr   rx   rW   r   rT   rX   rM   rY   rZ   r   r     sI    		#$	/0H 
-- rY   r   c                  <   \ rS rSr% Sr\(       aF  S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\R                  4S\R                  4S\R                  4S	\R                  4S
\R                  4S\R                  " \R                  5      4S\4S\R                  4S\R                  " \5      4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4/rSrg)llama_model_paramsi  a  Parameters for llama_model

Attributes:
    devices (ctypes.Array[ggml_backend_dev_t]): NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
    tensor_buft_overrides (ctypes.Array[llama_model_tensor_buft_override]): NULL-terminated list of buffer types to use for tensors that match a pattern
    n_gpu_layers (int): number of layers to store in VRAM
    split_mode (int): how to split the model across multiple GPUs
    main_gpu (int): the GPU that is used for the entire model when split_mode is LLAMA_SPLIT_MODE_NONE
    tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
    progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
    progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback
    kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
    vocab_only (bool): only load the vocabulary, no weights
    use_mmap (bool): use mmap if possible
    use_mlock (bool): force system to keep model in RAM
    check_tensors (bool): validate model tensor data
    use_extra_bufts (bool): use extra buffer types (used for weight repacking)zCtypesArray[ctypes.c_void_p]devicesz-CtypesArray[llama_model_tensor_buft_override]tensor_buft_overridesr^   n_gpu_layers
split_modemain_gpurl   tensor_splitz(Callable[[float, ctypes.c_void_p], bool]progress_callbackctypes.c_void_pprogress_callback_user_dataz$CtypesArray[llama_model_kv_override]kv_overridesra   
vocab_onlyuse_mmap	use_mlockcheck_tensorsuse_extra_buftsrM   N)rO   rP   rQ   rR   rS   r   rT   rU   c_void_prr   r   rs   rV   llama_progress_callbackr   rf   rW   rX   rM   rY   rZ   r   r     s   V$ --LL11CC%44:: 
FOO$	 &//2	(	v||$	V^^$	78	56	&8	(?@A	v}}%	V]]#	fmm$	&--(	FMM*HrY   r   c                  <   \ rS rSr% Sr\(       a  S\S'   S\S'   S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S '   S\S!'   S\S"'   S\S#'   S\S$'   S\S%'   S\S&'   S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S	\R                  4S
\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\4S\R                  4S\R                  4S\R                  4S\4S\R                  4S \R                  4S!\R                  4S"\R                  4S#\R                  4S$\R                  4S%\R                  4S&\R                  4/rS'rg())llama_context_paramsi   a[  Parameters for llama_context

Attributes:
    n_ctx (int): text context, 0 = from model
    n_batch (int): logical maximum batch size that can be submitted to llama_decode
    n_ubatch (int): physical maximum batch size
    n_seq_max (int): max number of sequences (i.e. distinct states for recurrent models)
    n_threads (int): number of threads to use for generation
    n_threads_batch (int): number of threads to use for batch processing
    rope_scaling_type (int): RoPE scaling type, from `enum llama_rope_scaling_type`
    pooling_type (int): whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
    attention_type (int): attention type to use for embeddings
    rope_freq_base (float): RoPE base frequency, 0 = from model
    rope_freq_scale (float): RoPE frequency scaling factor, 0 = from model
    yarn_ext_factor (float): YaRN extrapolation mix factor, negative = from model
    yarn_attn_factor (float): YaRN magnitude scaling factor
    yarn_beta_fast (float): YaRN low correction dim
    yarn_beta_slow (float): YaRN high correction dim
    yarn_orig_ctx (int): YaRN original context size
    defrag_thold (float): defragment the KV cache if holes/size > thold, <= 0 disabled (default)
    cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
    cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
    type_k (int): data type for K cache
    type_v (int): data type for V cache
    abort_callback (ggml_abort_callback): abort callback if it returns true, execution of llama_decode() will be aborted
    abort_callback_data (ctypes.ctypes.c_void_p): data for abort_callback
    embeddings (bool): if true, extract embeddings (together with logits)
    offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
    flash_attn (bool): whether to use flash attention
    no_perf (bool): whether to measure performance timings
    op_offload (bool): offload host tensor operations to device
    swa_full (bool): use full-size SWA cache
    kv_unified (bool): use a unified buffer across the input sequences when computing the attention
r^   n_ctxn_batchn_ubatch	n_seq_max	n_threadsn_threads_batchrope_scaling_typepooling_typeattention_typerJ   rope_freq_baserope_freq_scaleyarn_ext_factoryarn_attn_factoryarn_beta_fastyarn_beta_slowyarn_orig_ctxdefrag_tholdz'Callable[[ctypes.c_void_p, bool], bool]cb_evalr   cb_eval_user_datatype_ktype_vz!Callable[[ctypes.c_void_p], bool]abort_callbackabort_callback_datara   
embeddingsoffload_kqv
flash_attnno_perf
op_offloadswa_full
kv_unifiedrM   N)rO   rP   rQ   rR   rS   r   rT   rU   c_uint32rr   r   rV    ggml_backend_sched_eval_callbackr   ggml_abort_callbackrf   rW   rX   rM   rY   rZ   r   r      s#   !F 
88**99,, 
&//"	FOO$	V__%	foo&	fnn%	FNN+	fll+	&	6<<(	6>>*	FNN+	FNN+	V^^,	6>>*	6>>*	&//*	(	45	foo.	6<< 	6<< 	./	0	v}}%	&	v}}%	FMM"	v}}%	V]]#	v}}%=HrY   r   c                      \ rS rSr% Sr\(       aA  S\S'   S\S'   S\S'   S\S'   S\S	'   S\S
'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\S'   S\R                  4S\R                  4S\R                  4S\R                  4S	\R                  4S
\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4S\R                  4/rSrg)llama_model_quantize_paramsi  a  Parameters for llama_model_quantize

Attributes:
    nthread (int): number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
    ftype (int): quantize to this llama_ftype
    output_tensor_type (int): output tensor type
    token_embedding_type (int): token embeddings tensor type
    allow_requantize (bool): allow quantizing non-f32/f16 tensors
    quantize_output_tensor (bool): quantize output.weight
    only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
    pure (bool): quantize all tensors to the default type
    keep_split (bool): quantize to the same number of shards
    imatrix (ctypes.c_void_p): pointer to importance matrix data
    kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
    tensor_types (ctypes.c_void_p): pointer to vector containing tensor types
    prune_layers (ctypes.c_void_p): pointer to vector containing layer indices to prune
r^   nthreadftypeoutput_tensor_typetoken_embedding_typera   allow_requantizequantize_output_tensor	only_copypure
keep_splitr   imatrixr   tensor_typesprune_layersrM   N)rO   rP   rQ   rR   rS   r   rT   rU   rr   r   rf   r   rW   rX   rM   rY   rZ   r   r     s    $ 
!! $$
  %%%%%% 
FNN#	&,,	v||,	.	V]]+	!6==1	fmm$		v}}%	FOO$	)	)	)HrY   r   c                  `    \ rS rSr% Sr\(       a
  S\S'   S\S'   S\4S\R                  4/r
Srg)	llama_logit_biasi  z^Used to store logit bias

Attributes:
    token (llama_token): token id
    bias (float): biasrH   rk   rJ   biasrM   NrN   rM   rY   rZ   r   r     s4      
+	 HrY   r   c                  P    \ rS rSr% Sr\(       a  S\S'   S\R                  4/r	Sr
g)llama_sampler_chain_paramsi  zjParameters for llama_sampler_chain

Attributes:
    no_perf (bool): whether to measure performance timingsra   r   rM   N)rO   rP   rQ   rR   rS   r   rT   rU   rf   rW   rX   rM   rY   rZ   r   r     s'    B
  
FMM"HrY   r   c                  L    \ rS rSrS\R
                  4S\R
                  4/rSrg)llama_chat_messagei  rolecontentrM   N)rO   rP   rQ   rR   rU   c_char_prW   rX   rM   rY   rZ   r   r     s!    	!	FOO$HrY   r   llama_model_default_paramsc                     g)z&Get default parameters for llama_modelNrM   rM   rY   rZ   r   r          rY   llama_context_default_paramsc                     g)z(Get default parameters for llama_contextNrM   rM   rY   rZ   r   r   !  r   rY   "llama_sampler_chain_default_paramsc                     g)z.Get default parameters for llama_sampler_chainNrM   rM   rY   rZ   r   r   ,  r   rY   #llama_model_quantize_default_paramsc                     g)z/Get default parameters for llama_model_quantizeNrM   rM   rY   rZ   r   r   7  r   rY   llama_backend_initc                     g)zIInitialize the llama + ggml backend
Call once at the start of the programNrM   rM   rY   rZ   r   r   E       rY   llama_backend_freec                     g)zACall once at the end of the program - currently only used for MPINrM   rM   rY   rZ   r   r   c  r   rY   llama_numa_initc                   g NrM   )numas    rZ   r   r   o       rY   llama_load_model_from_filec                   g r   rM   
path_modelparamss     rZ   r   r     r   rY   llama_model_load_from_filec                   g)zLoad the model from a file

If the file is split into multiple parts, the file name must follow this pattern: <name>-%05d-of-%05d.gguf

If the split file name does not follow this pattern, use llama_model_load_from_splitsNrM   r   s     rZ   r   r          rY   llama_model_load_from_splitsc                   g)zjLoad the model from multiple splits (support custom naming scheme)

The paths must be in the correct orderNrM   )pathsn_pathsr   s      rZ   r   r          rY   llama_model_save_to_filec                   g)zSave the model to a fileNrM   )modelr   s     rZ   r   r     r   rY   llama_free_modelc                   g r   rM   r   s    rZ   r   r     r   rY   llama_model_freec                   g r   rM   r   s    rZ   r   r     r   rY   llama_init_from_modelc                   g r   rM   r   r   s     rZ   r   r     r   rY   llama_new_context_with_modelc                   g r   rM   r   s     rZ   r   r     r   rY   
llama_freec                   g)zFrees all allocated memoryNrM   ctxs    rZ   r  r    r   rY   llama_time_usc                     g r   rM   rM   rY   rZ   r  r  	  r   rY   llama_max_devicesc                     g r   rM   rM   rY   rZ   r  r        rY   llama_max_parallel_sequencesc                     g r   rM   rM   rY   rZ   r
  r
    r	  rY   llama_supports_mmapc                     g r   rM   rM   rY   rZ   r  r    r	  rY   llama_supports_mlockc                     g r   rM   rM   rY   rZ   r  r  %  r	  rY   llama_supports_gpu_offloadc                     g r   rM   rM   rY   rZ   r  r  +  r	  rY   llama_supports_rpcc                     g r   rM   rM   rY   rZ   r  r  1  r	  rY   llama_n_ctxc                   g r   rM   r  s    rZ   r  r  7  r	  rY   llama_n_batchc                   g r   rM   r  s    rZ   r  r  =  r	  rY   llama_n_ubatchc                   g r   rM   r  s    rZ   r  r  C  r	  rY   llama_n_seq_maxc                   g r   rM   r  s    rZ   r  r  I  r	  rY   llama_n_ctx_trainc                   g r   rM   r   s    rZ   r  r  O  r	  rY   llama_n_embdc                   g r   rM   r   s    rZ   r  r  U  r	  rY   llama_n_layerc                   g r   rM   r   s    rZ   r   r   [  r	  rY   llama_n_headc                   g r   rM   r   s    rZ   r"  r"  a  r	  rY   llama_n_vocabc                   g r   rM   r   s    rZ   r$  r$  g  r	  rY   llama_get_modelc                   g r   rM   r  s    rZ   r&  r&  m  r	  rY   llama_get_memoryc                   g)zGet the memory for the contextNrM   r  s    rZ   r(  r(  s       rY   llama_pooling_typec                   g r   rM   r  s    rZ   r+  r+  z  r	  rY   llama_get_kv_selfc                   g)z0Get the KV cache for self-attention (DEPRECATED)NrM   r  s    rZ   r-  r-    r   rY   llama_model_get_vocabc                   g r   rM   r   s    rZ   r/  r/    r	  rY   llama_model_rope_typec                   g r   rM   r   s    rZ   r1  r1    r	  rY   llama_model_n_ctx_trainc                   g r   rM   r   s    rZ   r3  r3    r	  rY   llama_model_n_embdc                   g r   rM   r   s    rZ   r5  r5    r	  rY   llama_model_n_layerc                   g r   rM   r   s    rZ   r7  r7    r	  rY   llama_model_n_headc                   g r   rM   r   s    rZ   r9  r9    r	  rY   llama_model_n_head_kvc                   g r   rM   r   s    rZ   r;  r;    r	  rY   llama_model_n_swac                   g r   rM   r   s    rZ   r=  r=    r	  rY   !llama_model_rope_freq_scale_trainc                   g r   rM   r   s    rZ   r?  r?    r	  rY   llama_model_n_cls_outc                   g)zKReturns the number of classifier outputs (only valid for classifier models)NrM   r   s    rZ   rA  rA    r*  rY   llama_model_cls_labelc                   g)zNReturns label of classifier output by index. Returns None if no label providedNrM   )r   is     rZ   rC  rC    r*  rY   llama_vocab_typec                   g r   rM   vocabs    rZ   rF  rF    r	  rY   llama_vocab_n_tokensc                   g r   rM   rH  s    rZ   rJ  rJ    r	  rY   llama_model_meta_val_strc                   g)z*Get metadata value as a string by key nameNrM   )r   r   bufbuf_sizes       rZ   rL  rL        $ rY   llama_model_meta_countc                   g)z*Get the number of metadata key/value pairsNrM   r   s    rZ   rQ  rQ    r*  rY   llama_model_meta_key_by_indexc                   g)zGet metadata key name by indexNrM   r   rE  rN  rO  s       rZ   rS  rS    rP  rY   !llama_model_meta_val_str_by_indexc                   g)z'Get metadata value as a string by indexNrM   rU  s       rZ   rV  rV    rP  rY   llama_model_descc                   g)z&Get a string describing the model typeNrM   )r   rN  rO  s      rZ   rX  rX  4       rY   llama_model_sizec                   g)z?Returns the total size of all the tensors in the model in bytesNrM   r   s    rZ   r[  r[  E  r*  rY   llama_model_chat_templatenamec                   g)zoGet the default chat template. Returns None if not available
If name is None, returns the default chat templateNrM   )r   r^  s     rZ   r]  r]  N       rY   llama_model_n_paramsc                   g)z3Returns the total number of parameters in the modelNrM   r   s    rZ   ra  ra  W  r*  rY   llama_model_has_encoderc                   g)zOReturns true if the model contains an encoder that requires llama_encode() callNrM   r   s    rZ   rc  rc  _  r*  rY   llama_model_has_decoderc                   g)zNReturns true if the model contains a decoder that requires llama_decode() callNrM   r   s    rZ   re  re  g  r*  rY   llama_model_decoder_start_tokenc                   g)zFor encoder-decoder models, this function returns id of the token that must be provided
to the decoder to start generating output sequence. For other models, it returns -1.
NrM   r   s    rZ   rg  rg  p  r   rY   llama_model_is_recurrentc                   g)z?Returns true if the model is recurrent (like Mamba, RWKV, etc.)NrM   r   s    rZ   ri  ri  |  r*  rY   llama_model_is_diffusionc                   g)zFReturns true if the model is diffusion-based (like LLaDA, Dream, etc.)NrM   r   s    rZ   rk  rk    r*  rY   llama_model_quantizec                   g)zReturns 0 on successNrM   )	fname_inp	fname_outr   s      rZ   rm  rm          rY   llama_adapter_lora_initc                   g r   rM   )r   	path_loras     rZ   rr  rr    r   rY   llama_adapter_lora_freec                   g r   rM   )adapters    rZ   ru  ru    r   rY   llama_set_adapter_lorac                   g)zNAdd a loaded LoRA adapter to given context
This will not modify model's weightNrM   )r  rw  scales      rZ   rx  rx         rY   llama_rm_adapter_lorac                   g)zhRemove a specific LoRA adapter from given context
Return -1 if the adapter is not present in the contextNrM   )r  rw  s     rZ   r|  r|    r{  rY   llama_clear_adapter_lorac                   g)z+Remove all LoRA adapters from given contextNrM   r  s    rZ   r~  r~    r   rY   llama_apply_adapter_cvecc                   g)a  Apply a loaded control vector to a llama_context, or if data is NULL, clear
the currently loaded vector.
n_embd should be the size of a single layer's control, and data should point
to an n_embd x n_layers buffer starting from layer 1.
il_start and il_end are the layer range the vector should apply to (both inclusive)
See llama_control_vector_load in common to load a control vector.NrM   )r  r]   lenn_embdil_startil_ends         rZ   r  r    s    6 rY   llama_memory_clearc                   g)zkClear the memory contents
If data == true, the data buffers will also be cleared together with the metadataNrM   )memr]   s     rZ   r  r  )  r   rY   llama_memory_seq_rmc                   g)a  Removes all tokens that belong to the specified sequence and have positions in [p0, p1)

Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails

seq_id < 0 : match any sequence
p0 < 0     : [0,  p1]
p1 < 0     : [p0, inf)NrM   )r  rp   p0p1s       rZ   r  r  >  s    0 rY   llama_memory_seq_cpc                   g)znCopy all tokens that belong to the specified sequence to another sequence
p0 < 0 : [0,  p1]
p1 < 0 : [p0, inf)NrM   )r  
seq_id_src
seq_id_dstr  r  s        rZ   r  r  b      , rY   llama_memory_seq_keepc                   g)z?Removes all tokens that do not belong to the specified sequenceNrM   r  rp   s     rZ   r  r        
 rY   llama_memory_seq_addc                   g)zAdds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
p0 < 0 : [0,  p1]
p1 < 0 : [p0, inf)NrM   )r  rp   r  r  deltas        rZ   r  r    r  rY   llama_memory_seq_divc                   g)z[Integer division of the positions by factor of `d > 1`
p0 < 0 : [0,  p1]
p1 < 0 : [p0, inf)NrM   )r  rp   r  r  ds        rZ   r  r    r  rY   llama_memory_seq_pos_minc                   g)zReturns the smallest position present in the memory for the specified sequence
This is typically non-zero only for SWA caches
Return -1 if the sequence is emptyNrM   r  s     rZ   r  r         rY   llama_memory_seq_pos_maxc                   g)zpReturns the largest position present in the memory for the specified sequence
Return -1 if the sequence is emptyNrM   r  s     rZ   r  r    r   rY   llama_memory_can_shiftc                   g)z%Check if the memory supports shiftingNrM   )r  s    rZ   r  r    r*  rY   llama_kv_self_n_tokensc                   g)zTReturns the number of tokens in the KV cache (slow, use only for debug) (DEPRECATED)NrM   r  s    rZ   r  r    r  rY   llama_kv_self_used_cellsc                   g)z0Returns the number of used KV cells (DEPRECATED)NrM   r  s    rZ   r  r  
  r  rY   llama_kv_self_clearc                   g)zClear the KV cache (DEPRECATED)NrM   r  s    rZ   r  r    r  rY   llama_kv_self_seq_rmc                   g)z(Remove tokens from KV cache (DEPRECATED)NrM   )r  rp   r  r  s       rZ   r  r  )  rP  rY   llama_kv_self_seq_cpc                   g)z$Copy tokens in KV cache (DEPRECATED)NrM   )r  r  r  r  r  s        rZ   r  r  I      ( rY   llama_kv_self_seq_keepc                   g)z5Keep only specified sequence in KV cache (DEPRECATED)NrM   r  rp   s     rZ   r  r  e  r  rY   llama_kv_self_seq_addc                   g)z8Add delta to sequence positions in KV cache (DEPRECATED)NrM   )r  rp   r  r  r  s        rZ   r  r  y  r  rY   llama_kv_self_seq_divc                   g)z2Divide sequence positions in KV cache (DEPRECATED)NrM   )r  rp   r  r  r  s        rZ   r  r    r  rY   llama_kv_self_seq_pos_minc                   g)zCReturns the smallest position in KV cache for sequence (DEPRECATED)NrM   r  s     rZ   r  r    r   rY   llama_kv_self_seq_pos_maxc                   g)zBReturns the largest position in KV cache for sequence (DEPRECATED)NrM   r  s     rZ   r  r    r   rY   llama_kv_self_defragc                   g)z$Defragment the KV cache (DEPRECATED)NrM   r  s    rZ   r  r    r*  rY   llama_kv_self_can_shiftc                   g)z<Check if the context supports KV cache shifting (DEPRECATED)NrM   r  s    rZ   r  r    r*  rY   llama_kv_self_updatec                   g)z'Apply the KV cache updates (DEPRECATED)NrM   r  s    rZ   r  r    r*  rY   llama_state_get_sizec                   g)zNReturns the *actual* size in bytes of the state (logits, embedding and memory)NrM   r  s    rZ   r  r    r*  rY   llama_get_state_sizec                   g)z3Returns the size in bytes of the state (DEPRECATED)NrM   r  s    rZ   r  r  	  r*  rY   llama_state_get_datac                   g)zCopies the state to the specified destination address.
Destination needs to have allocated enough memory.
Returns the number of bytes copiedNrM   )r  dstr_   s      rZ   r  r  	  rP  rY   llama_copy_state_datac                   g)zBCopies the state to the specified destination address (DEPRECATED)NrM   )r  r  s     rZ   r  r  )	  rZ  rY   llama_state_set_datac                   g)zQSet the state reading from the specified address
Returns the number of bytes readNrM   )r  srcr_   s      rZ   r  r  >	  r   rY   llama_set_state_datac                   g)z=Set the state reading from the specified address (DEPRECATED)NrM   )r  r  s     rZ   r  r  R	  r  rY   llama_state_load_filec                   g r   rM   r  path_session
tokens_outn_token_capacityn_token_count_outs        rZ   r  r  e	      & rY   llama_load_session_filec                   g r   rM   r  s        rZ   r  r  	  r  rY   llama_state_save_filec                   g r   rM   r  r  tokensn_token_counts       rZ   r  r  	      " rY   llama_save_session_filec                   g r   rM   r  s       rZ   r  r  	  r  rY   llama_state_seq_get_sizec                   g)z@Get the exact size needed to copy the state of a single sequenceNrM   r  s     rZ   r  r  	  r   rY   llama_state_seq_get_datac                   g)z=Copy the state of a single sequence into the specified bufferNrM   )r  r  r_   rp   s       rZ   r  r  	  rP  rY   llama_state_seq_set_datac                   g)z2Copy the sequence data into the specified sequenceNrM   )r  r  r_   dest_seq_ids       rZ   r  r  	  rP  rY   llama_state_seq_save_filec                   g r   rM   )r  filepathrp   r  r  s        rZ   r  r  
  r  rY   llama_state_seq_load_filec                   g r   rM   )r  r  r  r  r  r  s         rZ   r  r  5
  s    * rY   llama_batch_get_onec                   g)zReturn batch for single sequence of tokens

NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it
NrM   )r  ri   s     rZ   r  r  Z
  r  rY   llama_batch_initc                   g)a  Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
Each token can be assigned up to n_seq_max sequence ids
The batch has to be freed with llama_batch_free()
If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
The rest of the llama_batch members are allocated with size n_tokens
All members are left uninitializedNrM   )ri   rm   r   s      rZ   r  r  y
  rq  rY   llama_batch_freec                   g)z9Frees a batch of tokens allocated with llama_batch_init()NrM   )batchs    rZ   r  r  
  r*  rY   llama_encodec                   g)zDProcess a batch of tokens using the encoder.
0 - success
< 0 - errorNrM   r  r  s     rZ   r  r  
  r  rY   llama_decodec                   g)a8  Process a batch of tokens.
0 - success
1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
2 - aborted (processed ubatches will remain in the context's memory)
-1 - invalid input batch
< -1 - fatal error (processed ubatches will remain in the context's memory)NrM   r  s     rZ   r  r  
  r   rY   llama_set_n_threadsc                   g)zSet the number of threads used for decoding
n_threads is the number of threads used for generation (single token)
n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
NrM   )r  r   r   s      rZ   r  r  
  r  rY   llama_n_threadsc                   g)z?Get the number of threads used for generation of a single tokenNrM   r  s    rZ   r  r  
  r*  rY   llama_n_threads_batchc                   g)zOGet the number of threads used for prompt and batch processing (multiple token)NrM   r  s    rZ   r  r  
  r*  rY   llama_set_embeddingsc                   g)z1Set whether the context outputs embeddings or notNrM   )r  r   s     rZ   r  r  
  r*  rY   llama_set_causal_attnc                   g)zhSet whether to use causal attention or not
If set to true, the model will only attend to the past tokensNrM   )r  causal_attns     rZ   r  r  
  r`  rY   llama_set_warmupc                   g)zSet whether the model is in warmup mode or not
If true, all model tensors are activated during llama_decode() to load and cache their weights.NrM   )r  warmups     rZ   r  r  
  r`  rY   llama_set_abort_callbackc                   g)zSet abort callbackNrM   )r  r   r   s      rZ   r  r    rZ  rY   llama_synchronizec                   g)zWait until all computations are finished
This is automatically done when using one of the functions below to obtain the computation results
and is not necessary to call it explicitly in most casesNrM   r  s    rZ   r  r    r  rY   llama_get_logitsc                   g)aB  Token logits obtained from the last call to llama_decode()
The logits for which llama_batch.logits[i] != 0 are stored contiguously
in the order they have appeared in the batch.
Rows: number of tokens for which llama_batch.logits[i] != 0
Cols: n_vocab

Returns:
    Pointer to the logits buffer of shape (n_tokens, n_vocab)NrM   r  s    rZ   r	  r	  *  rZ  rY   llama_get_logits_ithc                   g)zJLogits for the ith token. Equivalent to:
llama_get_logits(ctx) + i*n_vocabNrM   r  rE  s     rZ   r  r  >  r{  rY   llama_get_embeddingsc                   g)z@Get the embeddings for the input
shape: [n_embd] (1-dimensional)NrM   r  s    rZ   r  r  S  r   rY   llama_get_embeddings_ithc                   g)zLGet the embeddings for the ith sequence
llama_get_embeddings(ctx) + i*n_embdNrM   r  s     rZ   r  r  b  r{  rY   llama_get_embeddings_seqc                   g)z|Get the embeddings for a sequence id
Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
shape: [n_embd] (1-dimensional)NrM   r  s     rZ   r  r  t  r   rY   llama_vocab_get_textc                   g r   rM   rI  rk   s     rZ   r  r    r   rY   llama_vocab_get_scorec                   g r   rM   r  s     rZ   r  r    r   rY   llama_vocab_get_attrc                   g r   rM   r  s     rZ   r  r    r   rY   llama_vocab_is_eogc                   g)zXCheck if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)NrM   r  s     rZ   r  r    r  rY   llama_vocab_is_controlc                   g)z>Identify if Token Id is a control token or a render-able tokenNrM   r  s     rZ   r  r    r   rY   llama_vocab_bosc                   g)zbeginning-of-sentenceNrM   rH  s    rZ   r  r    r*  rY   llama_vocab_eosc                   g)zend-of-sentenceNrM   rH  s    rZ   r!  r!    r*  rY   llama_vocab_eotc                   g)zend-of-turnNrM   rH  s    rZ   r#  r#    r*  rY   llama_vocab_sepc                   g)zsentence separatorNrM   rH  s    rZ   r%  r%    r*  rY   llama_vocab_nlc                   g)z	next-lineNrM   rH  s    rZ   r'  r'    r*  rY   llama_vocab_padc                   g)paddingNrM   rH  s    rZ   r)  r)    r*  rY   llama_vocab_maskc                   g)maskNrM   rH  s    rZ   r,  r,    r*  rY   llama_vocab_get_add_bosc                   g r   rM   rH  s    rZ   r/  r/    r   rY   llama_vocab_get_add_eosc                   g r   rM   rH  s    rZ   r1  r1    r   rY   llama_vocab_get_add_sepc                   g r   rM   rH  s    rZ   r3  r3    r   rY   llama_vocab_fim_prec                   g r   rM   rH  s    rZ   r5  r5    r   rY   llama_vocab_fim_sufc                   g r   rM   rH  s    rZ   r7  r7    r   rY   llama_vocab_fim_midc                   g r   rM   rH  s    rZ   r9  r9    r   rY   llama_vocab_fim_padc                   g r   rM   rH  s    rZ   r;  r;  )  r   rY   llama_vocab_fim_repc                   g r   rM   rH  s    rZ   r=  r=  3  r   rY   llama_vocab_fim_sepc                   g r   rM   rH  s    rZ   r?  r?  =  r   rY   llama_token_get_textc                   g r   rM   r  s     rZ   rA  rA  H  r   rY   llama_token_get_scorec                   g r   rM   r  s     rZ   rC  rC  T  r   rY   llama_token_get_attrc                   g r   rM   r  s     rZ   rE  rE  _  r   rY   llama_token_is_eogc                   g r   rM   r  s     rZ   rG  rG  j  r   rY   llama_token_is_controlc                   g r   rM   r  s     rZ   rI  rI  u  r   rY   llama_token_bosc                   g r   rM   rH  s    rZ   rK  rK    r   rY   llama_token_eosc                   g r   rM   rH  s    rZ   rM  rM    r   rY   llama_token_eotc                   g r   rM   rH  s    rZ   rO  rO    r   rY   llama_token_clsc                   g r   rM   rH  s    rZ   rQ  rQ    r   rY   llama_token_sepc                   g r   rM   rH  s    rZ   rS  rS    r   rY   llama_token_nlc                   g r   rM   rH  s    rZ   rU  rU    r   rY   llama_token_padc                   g r   rM   rH  s    rZ   rW  rW    r   rY   llama_add_bos_tokenc                   g r   rM   rH  s    rZ   rY  rY    r   rY   llama_add_eos_tokenc                   g r   rM   rH  s    rZ   r[  r[    r   rY   llama_token_fim_prec                   g r   rM   rH  s    rZ   r]  r]    r   rY   llama_token_fim_sufc                   g r   rM   rH  s    rZ   r_  r_    r   rY   llama_token_fim_midc                   g r   rM   rH  s    rZ   ra  ra    r   rY   llama_token_fim_padc                   g r   rM   rH  s    rZ   rc  rc    r   rY   llama_token_fim_repc                   g r   rM   rH  s    rZ   re  re    r   rY   llama_token_fim_sepc                   g r   rM   rH  s    rZ   rg  rg    r   rY   llama_vocab_clsc                   g r   rM   rH  s    rZ   ri  ri    r   rY   llama_tokenizec                   g)ae  Convert the provided text into tokens.

Args:
    vocab: The vocabulary to use for tokenization.
    text: The text to tokenize.
    text_len: The length of the text.
    tokens: The tokens pointer must be large enough to hold the resulting tokens.
    n_max_tokens: The maximum number of tokens to return.
    add_special: Allow adding special tokens if the model is configured to do so.
    parse_special: Allow parsing special tokens.

Returns:
    Returns the number of tokens on success, no more than n_tokens_max
    Returns a negative number on failure - the number of tokens that would have been returned
NrM   )rI  texttext_lenr  n_tokens_maxadd_specialparse_specials          rZ   rk  rk  ,  s    L rY   llama_token_to_piecec                   g)a   Token Id -> Piece.
Uses the vocabulary in the provided context.
Does not write null terminator to the buffer.
User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.

Args:
    vocab: The vocabulary to use for tokenization.
    token: The token to convert.
    buf: The buffer to write the token to.
    length: The length of the buffer.
    lstrip: The number of leading spaces to skip.
    special: If true, special tokens are rendered in the output.NrM   )rI  rk   rN  lengthlstripspecials         rZ   rr  rr  a      B rY   llama_detokenizec                   g)a  Convert the provided tokens into text (inverse of llama_tokenize()).

Args:
    vocab: The vocabulary to use for tokenization.
    tokens: The tokens to convert.
    n_tokens: The number of tokens.
    text: The buffer to write the text to.
    text_len_max: The length of the buffer.
    remove_special: Allow to remove BOS and EOS tokens if model is configured to do so.
    unparse_special: If true, special tokens are rendered in the output.NrM   )rI  r  ri   rm  text_len_maxremove_specialunparse_specials          rZ   rx  rx    rw  rY   llama_chat_apply_templatec                   g)aB  Apply chat template.

Args:
    tmpl: Template to use. If None, uses model's default
    chat: Array of chat messages
    n_msg: Number of messages
    add_ass: Whether to end prompt with assistant token
    buf: Output buffer
    length: Buffer length

Returns:
    Number of bytes written, or needed if buffer too small
NrM   )tmplchatn_msgadd_assrN  rt  s         rZ   r}  r}    s    D rY   llama_chat_builtin_templatesc                   g)zGet list of built-in chat templates.

Args:
    output: Output buffer to store template names.
    len: Length of the output buffer.

Returns:
    Number of templates available.
    Returns a negative number on error.
NrM   )outputr  s     rZ   r  r        . rY   c                      \ rS rSrSrg)llama_sampler_ii!  rM   N)rO   rP   rQ   rR   rX   rM   rY   rZ   r  r  !  s    rY   r  c                  D    \ rS rSrS\R
                  " \5      4S\4/rSr	g)llama_sampleri)  ifacer  rM   N)
rO   rP   rQ   rR   rU   rs   r  llama_sampler_context_trW   rX   rM   rY   rZ   r  r  )  s#    	&..12	'(HrY   r  acceptapplyresetclonefreellama_sampler_initc                   g r   rM   )r  r  s     rZ   r  r  J  r   rY   llama_sampler_namec                   g r   rM   smpls    rZ   r  r  V  r   rY   llama_sampler_acceptc                   g r   rM   )r  rk   s     rZ   r  r  `  r   rY   llama_sampler_applyc                   g r   rM   )r  cur_ps     rZ   r  r  j  r   rY   llama_sampler_resetc                   g r   rM   r  s    rZ   r  r  v  r   rY   llama_sampler_clonec                   g r   rM   r  s    rZ   r  r    r   rY   llama_sampler_freec                   g r   rM   r  s    rZ   r  r    r   rY   llama_sampler_chain_initc                   g r   rM   )r   s    rZ   r  r    r   rY   llama_sampler_chain_addc                   g r   rM   )chainr  s     rZ   r  r    r   rY   llama_sampler_chain_getc                   g r   rM   r  rE  s     rZ   r  r    r   rY   llama_sampler_chain_nc                   g r   rM   r  s    rZ   r  r    r   rY   llama_sampler_chain_removec                   g r   rM   r  s     rZ   r  r    r   rY   llama_sampler_init_greedyc                     g r   rM   rM   rY   rZ   r  r    r	  rY   llama_sampler_init_distc                    g r   rM   )seeds    rZ   r  r    r	  rY   llama_sampler_init_softmaxc                     g r   rM   rM   rY   rZ   r  r    r	  rY   llama_sampler_init_top_kc                    g r   rM   )ks    rZ   r  r    r	  rY   llama_sampler_init_top_pc                    g r   rM   rL   min_keeps     rZ   r  r    r   rY   llama_sampler_init_min_pc                    g r   rM   r  s     rZ   r  r    r   rY   llama_sampler_init_typicalc                    g r   rM   r  s     rZ   r  r    r   rY   llama_sampler_init_tempc                    g r   rM   )ts    rZ   r  r    r	  rY   llama_sampler_init_temp_extc                    g r   rM   )r  r  exponents      rZ   r  r    r   rY   llama_sampler_init_xtcc                   g r   rM   )rL   r  r  r  s       rZ   r  r  %  r   rY   llama_sampler_init_top_n_sigmac                   g r   rM   )ns    rZ   r  r  2  r   rY   llama_sampler_init_mirostatc                   g r   rM   )n_vocabr  tauetams        rZ   r  r  B  r   rY   llama_sampler_init_mirostat_v2c                   g r   rM   )r  r  r  s      rZ   r  r  R  r   rY   llama_sampler_init_grammarc                   g r   rM   )rI  grammar_strgrammar_roots      rZ   r  r  b  r   rY   llama_sampler_init_grammar_lazyc                   g r   rM   )rI  r  r  trigger_wordsnum_trigger_wordstrigger_tokensnum_trigger_tokenss          rZ   r  r  v  r  rY   (llama_sampler_init_grammar_lazy_patternsc                   g r   rM   )rI  r  r  trigger_patternsnum_trigger_patternsr  r  s          rZ   r  r    r  rY   llama_sampler_init_penaltiesc                   g r   rM   )penalty_last_npenalty_repeatpenalty_freqpenalty_presents       rZ   r  r    rZ  rY   llama_sampler_init_dryc                   g r   rM   )rI  n_ctx_traindry_multiplierdry_basedry_allowed_lengthdry_penalty_last_nseq_breakersnum_breakerss           rZ   r  r    s    2 rY   llama_sampler_init_logit_biasc                   g r   rM   )r  n_logit_bias
logit_biass      rZ   r  r    r   rY   llama_sampler_init_infillc                   g r   rM   rH  s    rZ   r  r    r   rY   llama_sampler_get_seedc                   g r   rM   r  s    rZ   r  r  
  r   rY   llama_sampler_samplec                   g r   rM   )r  r  idxs      rZ   r  r    r   rY   llama_split_pathc                   g)z-Build a split GGUF final path for this chunk.NrM   )
split_pathmaxlenpath_prefixsplit_nosplit_counts        rZ   r  r  &       rY   llama_split_prefixc                   g)z^Extract the path prefix from the split_path if and only if the split_no and split_count match.NrM   )split_prefixr   r  r  r  s        rZ   r  r  9  r  rY   llama_print_system_infoc                     g r   rM   rM   rY   rZ   r  r  L  r	  rY   llama_log_setc                   g)zxSet callback for all future logging events.

If this is not called, or NULL is supplied, everything is output on stderr.NrM   )log_callback	user_datas     rZ   r
  r
  T  r   rY   c                      \ rS rSrS\R
                  4S\R
                  4S\R
                  4S\R
                  4S\R                  4S\R                  4S\R                  4/rS	rg
)llama_perf_context_datair  
t_start_ms	t_load_mst_p_eval_ms	t_eval_msn_p_evaln_evaln_reusedrM   N	rO   rP   rQ   rR   rU   r~   rr   rW   rX   rM   rY   rZ   r  r  r  s]    	v'	foo&	(	foo&	V^^$	6>>"	V^^$HrY   r  c                  L    \ rS rSrS\R
                  4S\R                  4/rSrg)llama_perf_sampler_datai  t_sample_msn_samplerM   Nr  rM   rY   rZ   r  r    s!    	(	V^^$HrY   r  llama_perf_contextc                   g r   rM   r  s    rZ   r  r    r   rY   llama_perf_context_printc                   g r   rM   r  s    rZ   r  r    r   rY   llama_perf_context_resetc                   g r   rM   r  s    rZ   r   r     r   rY   llama_perf_samplerc                   g r   rM   r  s    rZ   r"  r"    r   rY   llama_perf_sampler_printc                   g r   rM   r  s    rZ   r$  r$    r   rY   llama_perf_sampler_resetc                   g r   rM   r  s    rZ   r&  r&    r   rY   llama_opt_param_filter_allc                   g r   rM   )tensoruserdatas     rZ   r(  r(    r   rY   c                      \ rS rSrS\R
                  4S\4S\R                  4S\R                  4S\R                  4/rSr	g)	llama_opt_paramsi  r  param_filterparam_filter_udget_opt_parsget_opt_pars_udrM   N)
rO   rP   rQ   rR   rU   r   llama_opt_param_filterr   rW   rX   rM   rY   rZ   r-  r-    sB    	(	/0	FOO,	)	FOO,HrY   r-  llama_opt_initc                   g r   rM   )lctxr   lopt_paramss      rZ   r3  r3    r   rY   llama_opt_epochc                   g r   rM   )r5  datasetresult_trainresult_evalidata_splitcallback_traincallback_evals          rZ   r7  r7    r  rY   )returnr   )r?  r   )r?  r   )r?  r   )r   r^   )r   r}   r   r   r?  Optional[llama_model_p])r   zList[bytes]r   r^   r   r   r?  r@  )r   r4   r   r}   )r   r4   )r   r4   r   r   r?  zOptional[llama_context_p])r  r5   )r?  r^   )r?  ra   )r  r5   r?  r^   )r   r4   r?  r^   )r   r3   r?  r^   )r  r5   r?  r@  )r  r5   r?  zOptional[llama_memory_t])r  r5   r?  zOptional[llama_kv_cache_p])r   r4   r?  zOptional[llama_vocab_p])r   r4   r?  rJ   )r   r4   rE  r^   r?  Optional[bytes])rI  r3   r?  r^   )
r   r4   r   zUnion[ctypes.c_char_p, bytes]rN  r}   rO  r^   r?  r^   )
r   r4   rE  Union[ctypes.c_int, int]rN  (Union[bytes, CtypesArray[ctypes.c_char]]rO  r^   r?  r^   )r   r4   rN  rC  rO  Union[ctypes.c_size_t, int]r?  r^   )r   r4   r^  rA  r?  rA  )r   r4   r?  ra   )ro  r}   rp  r}   r   z/CtypesPointerOrRef[llama_model_quantize_params]r?  r^   )r   r4   rt  r}   r?  zOptional[llama_adapter_lora_p])rw  llama_adapter_lora_p)r  r5   rw  rE  rz  rJ   r?  r^   )r  r5   rw  rE  r?  r^   )r  r5   r]   z"CtypesPointerOrRef[ctypes.c_float]r  r^   r  r^   r  r^   r  r^   r?  r^   )r  r6   r]   ra   )
r  r6   rp   Union[llama_seq_id, int]r  Union[llama_pos, int]r  rG  r?  ra   )
r  r6   r  rF  r  rF  r  rG  r  rG  )r  r6   rp   rF  )
r  r6   rp   rF  r  rG  r  rG  r  rG  )
r  r6   rp   rF  r  rG  r  rG  r  rB  )r  r6   rp   rF  r?  r^   )r  r6   r?  ra   )
r  r5   rp   rF  r  rG  r  rG  r?  ra   )
r  r5   r  rF  r  rF  r  rG  r  rG  )r  r5   rp   rF  )
r  r5   rp   rF  r  rG  r  rG  r  rG  )
r  r5   rp   rF  r  rG  r  rG  r  rB  )r  r5   rp   rF  r?  r^   )r  r5   r?  ra   )r  r5   r  CtypesArray[ctypes.c_uint8]r_   rD  r?  r^   )r  r5   r  rH  r?  r^   )r  r5   r  rH  r_   rD  r?  r^   )r  r5   r  rH  r?  r^   )r  r5   r  r}   r  rj   r  rD  r  #CtypesPointerOrRef[ctypes.c_size_t]r?  ra   )
r  r5   r  r}   r  rj   r  rD  r?  ra   )r  r5   rp   ru   r?  r^   )
r  r5   r  rH  r_   rD  rp   ru   r?  r^   )
r  r5   r  rH  r_   rD  r  ru   r?  r^   )r  r5   r  r}   rp   ru   r  rj   r  rD  r?  r^   )r  r5   r  r}   r  ru   r  rj   r  rD  r  rI  r?  r^   )r  rj   ri   rB  r?  rh   )ri   Union[ctypes.c_int32, int]rm   rJ  r   rJ  r?  rh   )r  rh   )r  r5   r  rh   r?  r^   )r  r5   r   rJ  r   rJ  )r  r5   r   ra   )r  r5   r  ra   )r  r5   r  ra   )r  r5   r   z!Callable[[ctypes.c_void_p], None]r   r   )r  r5   r?  rl   )r  r5   rE  rJ  r?  rl   )r  r5   rp   rF  r?  rl   )rI  r3   rk   Union[llama_token, int]r?  r}   )rI  r3   rk   rK  r?  rJ   )rI  r3   rk   rK  r?  r^   )rI  r3   rk   rK  r?  ra   )rI  r3   r?  rH   )rI  r3   r?  ra   )rI  r3   rm  r}   rn  rB  r  rj   ro  rB  rp  Union[ctypes.c_bool, bool]rq  rL  r?  r^   )rI  r3   rk   rK  rN  z9Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]]rt  rB  ru  rB  rv  rL  r?  r^   )rI  r3   r  rj   ri   rB  rm  r}   rz  rB  r{  rL  r|  rL  r?  r^   )r  r}   r  zCtypesArray[llama_chat_message]r  r^   r  ra   rN  r}   rt  r^   r?  r^   )r  CtypesArray[bytes]r  rD  r?  r^   )r  zctypes.POINTER(llama_sampler_i)r  r  r?  llama_sampler_p)r  rN  r?  r}   )r  rN  rk   rK  )r  rN  r  z#CtypesArray[llama_token_data_array])r  rN  )r  rN  r?  rN  )r   r   r?  rN  )r  rN  r  rN  )r  rN  rE  rJ  r?  rN  )r  rN  r?  r^   )r?  rN  )r  r^   r?  rN  )r  r^   r?  rN  )rL   rJ   r  r^   r?  rN  )r  rJ   r?  rN  )r  rJ   r  rJ   r  rJ   r?  rN  )
rL   rJ   r  rJ   r  r^   r  r^   r?  rN  )r  rJ   r?  rN  )r  r^   r  r^   r  rJ   r  rJ   r  r^   r?  rN  )r  r^   r  rJ   r  rJ   r?  rN  )rI  r3   r  r}   r  r}   r?  rN  )rI  r3   r  r}   r  r}   r  rM  r  r^   r  rj   r  r^   r?  rN  )rI  r3   r  r}   r  r}   r  rM  r  r^   r  rj   r  r^   r?  rN  )
r  r^   r  rJ   r  rJ   r  rJ   r?  rN  )rI  r3   r  r^   r  rJ   r  rJ   r  r^   r  r^   r  r^   r?  rN  )r  r^   r  r^   r  zCtypesArray[llama_logit_bias]r?  rN  )rI  r3   r?  rN  )r  rN  r?  r^   )r  rN  r  r5   r  r^   r?  r^   )r  r}   r   rD  r  r}   r  rB  r  rB  r?  r^   )r  r}   r   rD  r  r}   r  rB  r  rB  r?  r^   )r?  r}   )r  zOptional[CtypesFuncPointer]r  r   )r  r5   r?  r  )r  rN  r?  r  )r  rN  )r*  r   r+  r   r?  ra   )r5  r5   r   r4   r6  r-  )r5  r5   r9  r   r:  r   r;  r   r<  r^   r=  r   r>  r   (  
__future__r   osrU   pathlibtypingr   r   r   r   r   llama_cpp._ctypes_extensionsr	   r
   r   r   r   r   r   r   r   r   _lib_base_nameenvironget_override_base_pathPathpathabspathdirname__file__
_base_path_libctypes_functionGGML_TYPE_F32GGML_TYPE_F16GGML_TYPE_Q4_0GGML_TYPE_Q4_1GGML_TYPE_Q5_0GGML_TYPE_Q5_1GGML_TYPE_Q8_0GGML_TYPE_Q8_1GGML_TYPE_Q2_KGGML_TYPE_Q3_KGGML_TYPE_Q4_KGGML_TYPE_Q5_KGGML_TYPE_Q6_KGGML_TYPE_Q8_KGGML_TYPE_IQ2_XXSGGML_TYPE_IQ2_XSGGML_TYPE_IQ3_XXSGGML_TYPE_IQ1_SGGML_TYPE_IQ4_NLGGML_TYPE_IQ3_SGGML_TYPE_IQ2_SGGML_TYPE_IQ4_XSGGML_TYPE_I8GGML_TYPE_I16GGML_TYPE_I32GGML_TYPE_I64GGML_TYPE_F64GGML_TYPE_IQ1_MGGML_TYPE_COUNT	CFUNCTYPErf   r   r   r   r  argtypesrd   restypeLLAMA_MAX_DEVICESLLAMA_DEFAULT_SEEDLLAMA_TOKEN_NULLLLAMA_FILE_MAGIC_GGLALLAMA_FILE_MAGIC_GGSNLLAMA_FILE_MAGIC_GGSQLLAMA_SESSION_MAGICLLAMA_SESSION_VERSIONLLAMA_STATE_SEQ_MAGICLLAMA_STATE_SEQ_VERSIONr^   r3   llama_vocab_p_ctypesr4   llama_model_p_ctypesr5   llama_context_p_ctypesr6   llama_memory_t_ctypesr7   llama_kv_cache_p_ctypesrr   rt   rH   rs   llama_token_pru   LLAMA_VOCAB_TYPE_NONELLAMA_VOCAB_TYPE_SPMLLAMA_VOCAB_TYPE_BPELLAMA_VOCAB_TYPE_WPMLLAMA_VOCAB_TYPE_UGMLLAMA_VOCAB_TYPE_RWKVLLAMA_VOCAB_TYPE_PLAMO2LLAMA_VOCAB_PRE_TYPE_DEFAULTLLAMA_VOCAB_PRE_TYPE_LLAMA3!LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM#LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODERLLAMA_VOCAB_PRE_TYPE_FALCONLLAMA_VOCAB_PRE_TYPE_MPTLLAMA_VOCAB_PRE_TYPE_STARCODERLLAMA_VOCAB_PRE_TYPE_GPT2LLAMA_VOCAB_PRE_TYPE_REFACTLLAMA_VOCAB_PRE_TYPE_COMMAND_RLLAMA_VOCAB_PRE_TYPE_STABLELM2LLAMA_VOCAB_PRE_TYPE_QWEN2LLAMA_VOCAB_PRE_TYPE_OLMOLLAMA_VOCAB_PRE_TYPE_DBRXLLAMA_VOCAB_PRE_TYPE_SMAUGLLAMA_VOCAB_PRE_TYPE_POROLLAMA_VOCAB_PRE_TYPE_CHATGLM3LLAMA_VOCAB_PRE_TYPE_CHATGLM4LLAMA_VOCAB_PRE_TYPE_VIKINGLLAMA_VOCAB_PRE_TYPE_JAISLLAMA_VOCAB_PRE_TYPE_TEKKENLLAMA_VOCAB_PRE_TYPE_SMOLLMLLAMA_VOCAB_PRE_TYPE_CODESHELLLLAMA_VOCAB_PRE_TYPE_BLOOM!LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISHLLAMA_VOCAB_PRE_TYPE_EXAONELLAMA_VOCAB_PRE_TYPE_CHAMELEONLLAMA_VOCAB_PRE_TYPE_MINERVA"LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLMLLAMA_VOCAB_PRE_TYPE_GPT4OLLAMA_VOCAB_PRE_TYPE_SUPERBPELLAMA_VOCAB_PRE_TYPE_TRILLIONLLAMA_VOCAB_PRE_TYPE_BAILINGMOELLAMA_VOCAB_PRE_TYPE_LLAMA4LLAMA_VOCAB_PRE_TYPE_PIXTRALLLAMA_VOCAB_PRE_TYPE_SEED_CODERLLAMA_ROPE_TYPE_NONELLAMA_ROPE_TYPE_NORMLLAMA_ROPE_TYPE_NEOXGGML_ROPE_TYPE_NEOXLLAMA_ROPE_TYPE_MROPEGGML_ROPE_TYPE_MROPELLAMA_ROPE_TYPE_VISIONGGML_ROPE_TYPE_VISIONLLAMA_TOKEN_TYPE_UNDEFINEDLLAMA_TOKEN_TYPE_NORMALLLAMA_TOKEN_TYPE_UNKNOWNLLAMA_TOKEN_TYPE_CONTROLLLAMA_TOKEN_TYPE_USER_DEFINEDLLAMA_TOKEN_TYPE_UNUSEDLLAMA_TOKEN_TYPE_BYTELLAMA_TOKEN_ATTR_UNDEFINEDLLAMA_TOKEN_ATTR_UNKNOWNLLAMA_TOKEN_ATTR_UNUSEDLLAMA_TOKEN_ATTR_NORMALLLAMA_TOKEN_ATTR_CONTROLLLAMA_TOKEN_ATTR_USER_DEFINEDLLAMA_TOKEN_ATTR_BYTELLAMA_TOKEN_ATTR_NORMALIZEDLLAMA_TOKEN_ATTR_LSTRIPLLAMA_TOKEN_ATTR_RSTRIPLLAMA_TOKEN_ATTR_SINGLE_WORDLLAMA_FTYPE_ALL_F32LLAMA_FTYPE_MOSTLY_F16LLAMA_FTYPE_MOSTLY_Q4_0LLAMA_FTYPE_MOSTLY_Q4_1LLAMA_FTYPE_MOSTLY_Q8_0LLAMA_FTYPE_MOSTLY_Q5_0LLAMA_FTYPE_MOSTLY_Q5_1LLAMA_FTYPE_MOSTLY_Q2_KLLAMA_FTYPE_MOSTLY_Q3_K_SLLAMA_FTYPE_MOSTLY_Q3_K_MLLAMA_FTYPE_MOSTLY_Q3_K_LLLAMA_FTYPE_MOSTLY_Q4_K_SLLAMA_FTYPE_MOSTLY_Q4_K_MLLAMA_FTYPE_MOSTLY_Q5_K_SLLAMA_FTYPE_MOSTLY_Q5_K_MLLAMA_FTYPE_MOSTLY_Q6_KLLAMA_FTYPE_MOSTLY_IQ2_XXSLLAMA_FTYPE_MOSTLY_IQ2_XSLLAMA_FTYPE_MOSTLY_Q2_K_SLLAMA_FTYPE_MOSTLY_IQ3_XSLLAMA_FTYPE_MOSTLY_IQ3_XXSLLAMA_FTYPE_MOSTLY_IQ1_SLLAMA_FTYPE_MOSTLY_IQ4_NLLLAMA_FTYPE_MOSTLY_IQ3_SLLAMA_FTYPE_MOSTLY_IQ3_MLLAMA_FTYPE_MOSTLY_IQ2_SLLAMA_FTYPE_MOSTLY_IQ2_MLLAMA_FTYPE_MOSTLY_IQ4_XSLLAMA_FTYPE_MOSTLY_IQ1_MLLAMA_FTYPE_MOSTLY_BF16LLAMA_FTYPE_MOSTLY_TQ1_0LLAMA_FTYPE_MOSTLY_TQ2_0LLAMA_FTYPE_MOSTLY_MXFP4_MOELLAMA_FTYPE_GUESSED#LLAMA_ROPE_SCALING_TYPE_UNSPECIFIEDLLAMA_ROPE_SCALING_TYPE_NONELLAMA_ROPE_SCALING_TYPE_LINEARLLAMA_ROPE_SCALING_TYPE_YARN LLAMA_ROPE_SCALING_TYPE_LONGROPE!LLAMA_ROPE_SCALING_TYPE_MAX_VALUELLAMA_POOLING_TYPE_UNSPECIFIEDLLAMA_POOLING_TYPE_NONELLAMA_POOLING_TYPE_MEANLLAMA_POOLING_TYPE_CLSLLAMA_POOLING_TYPE_LASTLLAMA_POOLING_TYPE_RANK LLAMA_ATTENTION_TYPE_UNSPECIFIEDLLAMA_ATTENTION_TYPE_CAUSALLLAMA_ATTENTION_TYPE_NON_CAUSALLLAMA_SPLIT_MODE_NONELLAMA_SPLIT_MODE_LAYERLLAMA_SPLIT_MODE_ROW	StructurerG   rc   r\   llama_token_data_array_prV   r   rh   LLAMA_KV_OVERRIDE_TYPE_INTLLAMA_KV_OVERRIDE_TYPE_FLOATLLAMA_KV_OVERRIDE_TYPE_BOOLLLAMA_KV_OVERRIDE_TYPE_STRrx   r   r   r   r   r   llama_log_callbackr   r   llama_logit_bias_pr   r   rE  llama_adapter_lora_p_ctypesr   r   r   r   r   GGML_NUMA_STRATEGY_DISABLEDGGML_NUMA_STRATEGY_DISTRIBUTEGGML_NUMA_STRATEGY_ISOLATEGGML_NUMA_STRATEGY_NUMACTLGGML_NUMA_STRATEGY_MIRRORGGML_NUMA_STRATEGY_COUNTr   r   r   r   r   r   r   r   r   r   r  re   r  r
  r  r  r  r  r   r  r  r  r  r  r  r   r"  r$  r&  r(  r+  r-  r/  r1  r3  r5  r7  r9  r;  r=  r?  rA  rC  rF  rJ  rL  rQ  rS  rV  rX  c_uint64r[  r]  ra  rc  re  rg  ri  rk  rm  rr  ru  rx  r|  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  c_uint8r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r  r  r  r!  r#  r%  r'  r)  r,  r/  r1  r3  r5  r7  r9  r;  r=  r?  rA  rC  rE  rG  rI  rK  rM  rO  rQ  rS  rU  rW  rY  r[  r]  r_  ra  rc  re  rg  ri  rk  rr  rx  r}  r  r  r  r  rN  llama_sampler_p_ctypesllama_sampler_i_namellama_sampler_i_acceptllama_sampler_i_applyllama_sampler_i_resetllama_sampler_i_clonellama_sampler_i_freerW   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r   r"  r$  r&  r2  r(  r-  r3  r7  rM   rY   rZ   <module>r!     s;   " 	        jjnn%9: QdQlW\\"''//"''//(*CDEMryr~r~  @S  sT
>:64T:L       $*#3#3
MM6??FMM6??$   &&v}}fooF  #%   !'   **,       #  #  #  ,   .   -  -  +S1  )3/  -s3  //  NN	nn{+~~     @  -  '  #  1  FV  !  $% !&' #  !"   !" !#       "  "        !#  $& !  !# ! %' "  "  " "$   ! "$    -. . */0 0 ,13 3 .      !      !     !  &  $     % ^                                  !   ') #  !"   #$  $D ! "$       $&   "#     v'' ( ^^$45 V-- . ">>*@A  !**
MM6>>6?? >!&"" !T       FLL 
.f.. 
.j2)) 2Lc6++ cX %%&,, J.0&"2"2 0nv'' " ^^$45 !1!1 &))   $nnV__= 
  

 "

 (

 )

 

    !     
 

 \\N

(  __()
1
  __()
1
" "^^FOO$foo7IJ
!$.@
 6??+

 


 

 /0
"6
 "/0
"6
 

 
NN


 $b&//: ;
 /V__E F
 &FMM: ;
 'V]]; <
 -r6==A B
 %r6==9 :
 !7 8&//J K
 #9":FOOL M
 !$:#;V__M N
 "%;$<fooN O
 $';&<fnnM N
 "6!7H I
 #7"8&..I J
 "6!7H I
 #7"8&..I J
 "%;$<>RS T
 #&<%=?TU V %(>'?N O
 

 (+?*@BVW X
 (+?*@&,,O P
 *-A,BFNNS T
 %(<'=v~~N O
 &)=(>O P
 %(<'=v~~N O
 (+?*@&..Q R
 $';&<fnnM N 47K6Lfnn] ^ (+?*@&//R S (+?*QSYSbSbc d #&:%;V\\J K
 '*>)?P Q 	 NN		& 
 	 		 ),@+A6>>R S #	 NN	 
2 	 		 '	 NN	 
2 	 		 6??FOO<
NN
	1 *
 	
 #&:%;V__M N ,/CV__.UW]WfWfg h '*>)?Q R *-A,BFMMR S *-A,BFMMR S %(<'=v~~ +.B-CV]]S T +.B-CV]]S T 23
 OO <
 	$ 6??+
%*#
  !

 8&..I
NN
	#7@E
 89
NN
	#7
 

$ v~~& NN	
, 
 	
   	6 FMM*

  	 MM		$ 	 		 
	4  	
	( ) 		
 	
$ 3\BD  	
	$ 	 		
 !
.  	
	$ 	 		
  
* !6 Ey	!9 !6 Ey	!9 ),A+BFMMR S 56 !7 8&.. 23T  	 MM		$ 	 		 
	,  	
			(	 )	 			
 		
	" 5|Dd"  	
			$	 		 			
 !	
	0  	
			$	 		 			
  	
	( "8,!G	": "8,!G	": '*@)A4H I *-C,DfmmT U '*@)A4H I '*@)A6??S T '*@)A6??S T v~~&
 OO				$	 &	
 			  v~~& OO	: V^^FNN;V__M
OO
		$ &
 	
 V^^FNN;<
OO
	:
 v' MM
	 ) 2	
 ; 

$ v' MM
	 ) 2	
 ; 

  	 MM		 % /	 
	  	 MM		 % /	 
	 \*
OO

 v~~&	 OO			$ & 	 		( v~~&	 OO			$ & 	 		"  OO
	  %	
 / 	
$ v' OO				 	 )		
 2	 ;	 			2  	$	&	 			. H+(
$ *
 $ #k]D9 : "8+!FW X, "8+!FW X 
 	
	
)
 0

 "%;$<fnnM N (+A*BFNNS T '*@&--)PRVW X (+A6==*QSWX Y #&<fmm%LdS T 0&//B
	5 )
 $'=&>E F /0&..2P		" V^^,
NN6>>"
	7 
  34fnnV^^6T V^^,
NN6>>"
	7 
 \*
NN6>>"
	": 
 1;?!8
 2K@&..!8
 1;?!8 /=v}} 3[A6==!8	 "%9$:KH I "%9$:KH I "%9$:KH I "%9$:KH I !$8#9;G H "%9$:KH I #&:%;[I J 
MM


 
MM


 
MM


 


 


 


 


 


 

 ;'
OO
!8

 ;'
NN
!8

 ;'
LL
!8
 ;'
MM
!8	
 ;'
MM
!8	
 

 

 

 

 


 


 


 
MM

 
MM


 

 

 

 

 

 

 

4  NN
 ' %	
 + , . 	P  NN" 
C %	
 % ( 	L {# NN$ ' 	
 + / 0 	X )* NN

)  	
 
  	6 "v' NN	$ 		. !// f&& F$$  #M2O6 ''9OP ))$0FT ((
 ":  ((/EF (()?AWX ''.DE  !"%&#$#$#$!"  ^^O$&=>
*1H
 
OO


 [)


 56

"E
 


 

 

  

 34


 V^^,
9
 
LL

  V^^,
9
 ,b2HI J
 *V__,=?UV W -r3IJ K +fnn-=?UV W ^^V__%

 ^^V__%

  ^^V__%

 *V^^,<>TU V !^^V^^V^^4
&+
 ^^V^^V__fooF
"%-0
 $^^

 !^^V__fnnfnnfnnU
"'.38;
 $__fnnfnn5

 %
  6??FOO<
',<A
 %v'{# 


 
 &	

 
 -
 
 

, .v'{# 


 
 )	

 
 -
 
 

& "^^V^^V^^V^^D
  	 
( v'	   	
    $ #^^V^^%78
 #1N
 

 
OO

 3V^^D

 /69
 __foovfllS
LL
		'	 	 '		
 *	 		
	 __foovfllS
LL
		'	 	 '		
 *	 		
	 *B@ A __foo&
-
2	f.. 	"f..  


 


 

 


 


 

  ))&--&//Z   __foo&
MM

v''  13CD

  	



 "
 !	

 
 $
 #

rY   