
    *iv!                        S SK Jr  S SKrS SKJrJrJrJrJrJ	r	  S SK
Jr  S SKJrJr  S SKJr  S SKrS\R$                  S'    " S	 S
\5      r " S S\5      r " S S\\5      r " S S\5      rg)    )annotationsN)OptionalListLiteralUnionDictcast)Self)Fieldmodel_validator)BaseSettings protected_namespacesc                  v   \ rS rSr% Sr\" SS9rS\S'   \" SSS	9rS
\S'   \" SSSS9r	S\S'   \" \
R                  SS	9rS\S'   \" SSSS9rS\S'   \" SSS	9rS\S'   \" SSS	9rS\S'   \" \
R                   " 5       SS	9rS\S'   \" \
R$                  " 5       SS	9rS\S '   \" SS!S	9rS"\S#'   \" SS$S	9rS
\S%'   \" \
R,                  S&S	9rS\S''   \" S(SS)S9rS\S*'   \" S+S,S-S9rS\S.'   \" S+S,S/S9rS\S0'   \" \" \R:                  " 5       S1-  S,5      S,S2S9rS\S3'   \" \" \R:                  " 5       S,5      SS4S9rS\S5'   \" \
R@                  S69r!S\S7'   \" S8S9S	9r"S:\S;'   \" S8S<S	9r#S:\S='   \" S>S69r$S:\S?'   \" S@S69r%S:\SA'   \" SBS69r&S:\SC'   \" S@S69r'S:\SD'   \" SS69r(S\SE'   \" SFSGS	9r)S\SH'   \" SFSIS	9r*S\SJ'   \" SSKS	9r+S\SL'   \" SFSMS	9r,S\SN'   \" SSOS	9r-S\SP'   \" SQSSRS9r.S\SS'   \" SSTS	9r/S
\SU'   \" SSVS	9r0S
\SW'   \" SSXS	9r1SY\SZ'   \" SS[S	9r2S
\S\'   \" SS]S	9r3S
\S^'   \" SS_S	9r4S\S`'   \" SaSbS	9r5Sc\Sd'   \" SeSfS	9r6S\Sg'   \" SShS	9r7S
\Si'   \" SSjS	9r8S
\Sk'   \" SSlS	9r9S
\Sm'   \" SSnS	9r:S
\So'   \" SpSqS	9r;S\Sr'   \" SSsS	9r<St\Su'   \" SSvS	9r=St\Sw'   \" SFSxS	9r>S\Sy'   \?" SzS{9S~S| j5       r@S}rAg)ModelSettings   z*Model settings used to load a Llama model.z8The path to the model to use for generating completions.)descriptionstrmodelNz9The alias of the model to use for generating completions.defaultr   Optional[str]model_aliasr   z_The number of layers to put on the GPU. The rest will be on the CPU. Set -1 to move all to GPU.)r   ger   intn_gpu_layerszThe split mode to use.
split_modezMain GPU to use.main_gpuz0Split layers across multiple GPUs in proportion.zOptional[List[float]]tensor_splitFz&Whether to only return the vocabulary.bool
vocab_onlyz	Use mmap.use_mmapz
Use mlock.	use_mlockzList of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.zOptional[List[str]]kv_overridesz2comma seperated list of rpc servers for offloadingrpc_serverszRandom seed. -1 for random.seedi   zThe context size.n_ctxi      zThe batch size to use per eval.n_batchz)The physical batch size used by llama.cppn_ubatch   z8The number of threads to use. Use -1 for max cpu threads	n_threadszNThe number of threads to use when batch processing. Use -1 for max cpu threadsn_threads_batch)r   rope_scaling_typeg        zRoPE base frequencyfloatrope_freq_basezRoPE frequency scaling factorrope_freq_scaleg      yarn_ext_factorg      ?yarn_attn_factorg      @@yarn_beta_fastyarn_beta_slowyarn_orig_ctxTz+if true, use experimental mul_mat_q kernels	mul_mat_qzWhether to return logits.
logits_allzWhether to use embeddings.	embeddingz"Whether to offload kqv to the GPU.offload_kqvzWhether to use flash attention.
flash_attn@   z5Last n tokens to keep for repeat penalty calculation.last_n_tokens_sizezoOptional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.	lora_basez*Path to a LoRA file to apply to the model.	lora_pathzEnable NUMA support.zUnion[bool, int]numazChat format to use.chat_formatz<Path to a CLIP model to use for multi-modal chat completion.clip_model_pathz=Use a cache to reduce processing times for evaluated prompts.cacheramz5The type of cache to use. Only used if cache is True.zLiteral['ram', 'disk']
cache_typel        z;The size of the cache in bytes. Only used if cache is True.
cache_sizez5The path to a HuggingFace tokenizer_config.json file.hf_tokenizer_config_pathz~The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained(). hf_pretrained_model_name_or_pathz=The model repo id to use for the HuggingFace tokenizer model.hf_model_repo_idzHMethod to use for speculative decoding. One of (prompt-lookup-decoding).draft_model
   z2Number of tokens to predict using the draft model.draft_model_num_pred_tokensz#Type of the key cache quantization.zOptional[int]type_kz%Type of the value cache quantization.type_vz#Whether to print debug information.verbosebefore)modec                    [         R                  " 5       n[        [        [        [
        4   U 5      nUR                  SS5      S:X  a  XS'   UR                  SS5      S:X  a  XS'   U $ )Nr-   r   r   r.   )multiprocessing	cpu_countr	   r   r   r   get)selfrU   valuess      W/var/www/html/ai-backend/venv/lib/python3.13/site-packages/llama_cpp/server/settings.pyset_dynamic_defaults"ModelSettings.set_dynamic_defaults   sc    
 $--/	d38nd+::k1%+"+;::'+r1(1$%    r   )returnr
   )B__name__
__module____qualname____firstlineno____doc__r   r   __annotations__r   r   	llama_cppLLAMA_SPLIT_MODE_LAYERr   r   r    r"   llama_supports_mmapr#   llama_supports_mlockr$   r%   r&   LLAMA_DEFAULT_SEEDr'   r(   r*   r+   maxrT   rU   r-   r.   #LLAMA_ROPE_SCALING_TYPE_UNSPECIFIEDr/   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r>   r?   r@   rA   rB   rC   rD   rF   rG   rH   rI   rJ   rK   rM   rN   rO   rP   r   rZ   __static_attributes__r   r\   rY   r   r      s   4NE3  "'O"K 
 uL# 
 00,J  &Hc 
 +0F+L'  #KJ  --/Hd  ..0 It  ). o)L%  "'H"K 
 ,,:WD#  t7JKE3K'HGS  'RHc  O--/14a8NIs 
 !O--/3dOS 
 #==s  "#;PQNEQ"!@OU  #40OU0#C0e0!$/NE/!#.NE.q)M3)"OIt  T7RSJSE7STItT"FK  #DJ  $K   % F I}   %@ I} 
 #*D
 
 "')"K  &+R&O] 
 SE4  */K*J&  QJ 
 /4K/m  7< U7$m 
 ',S'm 
 "'^"K  (-H( 
 "9FM  ";FM 
 "GGT  r\   r   c                      \ rS rSr% Sr\" SSS9rS\S'   \" SS	S9rS
\S'   \" SSS9r	S\S'   \" SSS9r
S\S'   \" SSS9rS\S'   \" SSS9rS\S'   \" SSS9rS\S'   \" SSS9rS\S'   Srg)ServerSettings   zAServer settings used to configure the FastAPI and Uvicorn server.	localhostzListen addressr   r   hosti@  zListen portr   portNzSSL key file for HTTPSr   ssl_keyfilezSSL certificate file for HTTPSssl_certfilezIAPI key for authentication. If set all requests need to be authenticated.api_keyTz=Whether to interrupt requests when a new request is received.r!   interrupt_requestsFz;Disable EventSource pings (may be needed for some clients).disable_ping_events zIThe root path for the server. Useful when running behind a reverse proxy.	root_pathr   )r^   r_   r`   ra   rb   r   rp   rc   rq   rr   rs   rt   ru   rv   rx   rk   r   r\   rY   rm   rm      s    K k7GHD#Hd>D#>!&":"K  #("B#L-  #_G]   %S   !&Q!  _Is r\   rm   c                      \ rS rSrSrg)Settings   r   N)r^   r_   r`   ra   rk   r   r\   rY   rz   rz      s    r\   rz   c                  2    \ rS rSr% Sr\" / SS9rS\S'   Srg)	ConfigFileSettings   z#Configuration file format settings.zModel configsr   zList[ModelSettings]modelsr   N)	r^   r_   r`   ra   rb   r   r   rc   rk   r   r\   rY   r}   r}      s    -"'"PFPr\   r}   )
__future__r   rT   typingr   r   r   r   r   r	   typing_extensionsr
   pydanticr   r   pydantic_settingsr   rd   model_configr   rm   rz   r}   r   r\   rY   <module>r      sg    "  = = " + *  57  0 1vL vr\ >	~} 	Q Qr\   