层#

激活#

class tensorrt_llm.layers.activation.Mish[source]#

基类: Module

forward(input)[source]#

注意力#

class tensorrt_llm.layers.attention.Attention(*, local_layer_idx, hidden_size, num_attention_heads, num_kv_heads=None, max_position_embeddings=1024, num_layers=1, apply_query_key_layer_scaling=False, attention_head_size=None, qk_layernorm=False, layernorm_type=LayerNormType.LayerNorm, layernorm_share=True, inner_layernorm=False, eps=1e-05, attention_mask_type=AttentionMaskType.padding, bias=True, dtype=None, position_embedding_type=PositionEmbeddingType.learned_absolute, rotary_embedding_base=10000.0, rotary_embedding_base_local=1.0, rotary_embedding_scaling=None, rotary_embedding_percentage=1.0, rope_scaling_short_factors=None, rope_scaling_long_factors=None, rope_scaling_short_mscale=None, rope_scaling_long_mscale=None, original_max_position_embeddings=1024, tp_group=None, tp_size=1, tp_rank=0, quant_mode: ~tensorrt_llm.quantization.mode.QuantMode = <QuantMode: 0>, q_scaling=1.0, cross_attention=False, relative_attention=False, max_distance=0, num_buckets=0, dense_bias=None, clip_qkv=None, alibi_bias_max=8, skip_cross_kv=False, max_attn_value=0.0, block_sparse_params=None, use_implicit_relative_attention=False, reorder=False, enable_qkv=True, cp_group=[0], cp_size=1, cp_rank=0, max_seqlen_for_logn_scaling=8192, use_logn_scaling=False, is_local=False)[source]#

基类: Module

static create_attention_const_params(model_cls, config)[source]#

static fill_attention_params(model_cls, attention_params)[source]#

forward( hidden_states: Tensor, attention_mask=None, attention_packed_mask=None, use_cache=False, spec_decoding_params=None, mrope_params=None, kv_cache_params=None, attention_params=None, encoder_output: Tensor | None = None, position_embedding=None, norm_before_bmm1=False, lora_layer_params=None, cross_kv_cache_gen: Tensor | None = None, cross_kv_reuse: Tensor | None = None, all_reduce_params: AllReduceParams | None = None, skip_attn=None, )[source]#

后处理(tllm_key, weights, **kwargs)[source]#

设置相对注意力表( 最大序列长度, 预计算的相对注意力, )[source]#

类 tensorrt_llm.layers.attention.注意力掩码参数( 自注意力掩码: Tensor = None, 自注意力打包掩码: Tensor = None, 交叉注意力掩码: Tensor = None, 交叉注意力打包掩码: Tensor = None, )[source]#: 基类: object

类 tensorrt_llm.layers.attention.注意力参数( 序列长度: Tensor = None, 上下文长度: Tensor = None, 主机上下文长度: Tensor = None, 最大上下文长度: int = None, 主机请求类型: Tensor = None, 编码器输入长度: Tensor = None, 编码器最大输入长度: Tensor = None, 主机运行时性能旋钮: Tensor = None, 主机上下文进度: Tensor = None, )[source]#

基类: object

填充长Rope的注意力常量参数( 嵌入位置, 长Rope嵌入位置, 旋转逆频率, 长Rope旋转逆频率, 用于GPT注意力的嵌入位置, 用于GPT注意力的长Rope嵌入位置, 短mscale, 长mscale, )[source]#

填充Rope的注意力常量参数( 嵌入位置: Tensor = None, 旋转逆频率: Tensor = None, 用于GPT注意力的嵌入位置: Tensor = None, 局部嵌入位置: Tensor = None, 局部旋转逆频率: Tensor = None, 用于GPT注意力的局部嵌入位置: Tensor = None, )[source]#

是否有效( gpt_attention_plugin, remove_input_padding, use_kv_cache, )[source]#

is_valid_cross_attn(do_cross_attention)[source]#

class tensorrt_llm.layers.attention.BertAttention( hidden_size (隐藏层大小), num_attention_heads (注意力头数), max_position_embeddings=1024 (最大位置嵌入), num_layers=1 (层数), attention_head_size=None (注意力头大小，如果为None则自动计算), num_kv_heads=None (KV头数，如果为None则和注意力头数相同), q_scaling=1.0 (Q缩放系数), apply_query_key_layer_scaling=False (是否应用查询-键层缩放), bias=True (是否使用偏置), dtype=None (数据类型), tp_group=None (张量并行组), tp_size=1 (张量并行大小), tp_rank=0 (张量并行秩), cp_group=None (列并行组), cp_size=1 (列并行大小), relative_attention=False (是否使用相对注意力), max_distance=0 (最大距离), num_buckets=0 (桶数), quant_mode=<QuantMode: 0> (量化模式), )[source]#

基类: Module

forward( hidden_states: Tensor, attention_mask=None, input_lengths=None (输入长度), max_input_length=None (最大输入长度), lora_layer_params=None, )[source]#

class tensorrt_llm.layers.attention.BlockSparseAttnParams( block_size: int = 64 (块大小), homo_head_pattern: bool = False (是否使用同构头模式), num_local_blocks: int = 16 (本地块数量), vertical_stride: int = 8 (垂直步长), )[source]#: 基类: object

class tensorrt_llm.layers.attention.CogVLMAttention( *, local_layer_idx (本地层索引), hidden_size (隐藏层大小), num_attention_heads (注意力头数), num_kv_heads=None (KV头数，如果为None则和注意力头数相同), max_position_embeddings=1024 (最大位置嵌入), attention_mask_type=AttentionMaskType.causal (注意力掩码类型，默认为因果掩码), bias=True (是否使用偏置), dtype=None (数据类型), position_embedding_type=PositionEmbeddingType.learned_absolute (位置嵌入类型，默认为可学习的绝对位置嵌入), rotary_embedding_base=10000.0 (旋转嵌入基数), rotary_embedding_scaling=None (旋转嵌入缩放), tp_group=None (张量并行组), tp_size=1 (张量并行大小), tp_rank=0 (张量并行秩), quant_mode: ~tensorrt_llm.quantization.mode.QuantMode = <QuantMode: 0> (量化模式), dense_bias=None (密集偏置), )[source]#

Bases: Attention (基类: Attention)

forward( hidden_states: Tensor, use_cache=False, kv_cache_params=None, attention_params=None, vision_token_mask=None (视觉token掩码), position_embedding=None, )[source]#

class tensorrt_llm.layers.attention.DeepseekV2Attention( *, local_layer_idx (本地层索引), hidden_size (隐藏层大小), num_attention_heads (注意力头数), q_lora_rank (Q LoRA 秩), kv_lora_rank (KV LoRA 秩), qk_nope_head_dim=None (QK NoPE 头维度), qk_rope_head_dim=None (QK RoPE 头维度), v_head_dim=None (V 头维度), eps=1e-06 (epsilon，用于数值稳定性), attention_mask_type=AttentionMaskType.causal (注意力掩码类型，默认为因果掩码), dtype=None (数据类型), position_embedding_type=PositionEmbeddingType.learned_absolute (位置嵌入类型，默认为可学习的绝对位置嵌入), max_position_embeddings=1024 (最大位置嵌入), rotary_embedding_base=10000.0 (旋转嵌入基数), rotary_embedding_scaling=None (旋转嵌入缩放), rotary_embedding_beta_fast=32 (快速旋转嵌入 beta), rotary_embedding_beta_slow=1 (慢速旋转嵌入 beta), rotary_embedding_mscale=1 (旋转嵌入 mscale), rotary_embedding_mscale_all_dim=0 (旋转嵌入 mscale 所有维度), rotary_embedding_origin_max_position=4096 (原始旋转嵌入最大位置), rotary_scaling=None (旋转缩放), tp_group=None (张量并行组), tp_size=1 (张量并行大小), tp_rank=0 (张量并行秩), quant_mode: ~tensorrt_llm.quantization.mode.QuantMode = <QuantMode: 0> (量化模式), )[source]#

Bases: Attention (基类: Attention)

forward( hidden_states: Tensor, use_cache=False, spec_decoding_params=None, kv_cache_params=None, attention_params=None, )[source]#

postprocess(tllm_key, weights, **kwargs)[source]#

weight_loader( mapping: Mapping (映射), param: Parameter (参数), loaded_weight: Tensor (已加载的权重), )[source]#

class tensorrt_llm.layers.attention.DiffusersAttention( *, query_dim: int (查询维度), cross_attention_dim: int | None = None (交叉注意力维度), heads: int = 8 (头数), kv_heads: int | None = None (KV 头数), dim_head: int = 64 (头维度), dropout: float = 0.0 (dropout 概率), bias: bool = False (是否使用偏置), upcast_attention: bool = False (是否将注意力强制转换为更高精度), upcast_softmax: bool = False (是否将 softmax 强制转换为更高精度), cross_attention_norm: str | None = None (交叉注意力归一化类型), cross_attention_norm_num_groups: int = 32 (交叉注意力归一化组数), qk_norm: str | None = None (QK 归一化类型), added_kv_proj_dim: int | None = None (添加的 KV 投影维度), added_proj_bias: bool | None = True (添加的投影偏置), norm_num_groups: int | None = None (归一化组数), spatial_norm_dim: int | None = None (空间归一化维度), out_bias: bool = True (输出偏置), scale_qk: bool = True (是否缩放 QK), only_cross_attention: bool = False (是否只进行交叉注意力), eps: float = 1e-05 (epsilon，用于数值稳定性), rescale_output_factor: float = 1.0 (重缩放输出因子), residual_connection: bool = False (是否使用残差连接), out_dim: int = None (输出维度), out_context_dim: int = None (输出上下文维度), context_pre_only=None (上下文预处理仅应用于上下文), pre_only=False (预处理仅应用于输入), elementwise_affine: bool = True (逐元素仿射变换), is_causal: bool = False (是否为因果关系), attn_forward_funcname: str = 'joint_attn_forward' (注意力前向传播函数名), mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward(

hidden_states: Tensor,

encoder_hidden_states: Tensor | None = None,

attention_mask: Tensor | None = None,

max_input_length: Tensor | None = None,

*args,

**kwargs,

)[source]#

joint_attn_forward(

hidden_states: Tensor,

encoder_hidden_states: Tensor | None = None,

attention_mask: Tensor | None = None,

max_input_length: Tensor | None = None,

*args,

**kwargs,

)[source]#

class tensorrt_llm.layers.attention.KeyValueCacheParams( past_key_value: List[Tensor] = None, host_past_key_value_lengths: Tensor = None, host_max_attention_window_sizes: Tensor = None, host_sink_token_length: Tensor = None, kv_cache_block_offsets: Tensor = None, host_kv_cache_block_offsets: Tensor = None, host_kv_cache_pool_pointers: Tensor = None, host_kv_cache_pool_mapping: Tensor = None, cache_indirection: Tensor = None, past_key_value_length: Tensor = None, cross_kv_cache_block_offsets: Tensor = None, host_cross_kv_cache_block_offsets: Tensor = None, host_cross_kv_cache_pool_pointers: Tensor = None, host_cross_kv_cache_pool_mapping: Tensor = None, )[源代码]#

基类: object

fill_none_tensor_list(list_size)[源代码]#

get_first_past_key_value()[源代码]#

is_valid(gpt_attention_plugin)[源代码]#

class tensorrt_llm.layers.attention.MropeParams( mrope_rotary_cos_sin: Tensor = None, mrope_position_deltas: Tensor = None, )[源代码]#: 基类: object

class tensorrt_llm.layers.attention.SpecDecodingParams( spec_decoding_is_generation_length_variable: bool = False, spec_decoding_max_generation_length: int = 1, spec_decoding_generation_lengths: Tensor = None, spec_decoding_position_offsets: Tensor = None, spec_decoding_packed_mask: Tensor = None, spec_decoding_use: Tensor = None, )[源代码]#: 基类: object

tensorrt_llm.layers.attention.compute_relative_bias( query_length, key_length, num_buckets, max_distance, bidirectional, rel_attn_table, tp_size=1, tp_group=None, tp_rank=None, )[源代码]#

tensorrt_llm.layers.attention.make_causal_mask(bsz, tgt_len, past_key_values_length, dtype)[源代码]#

Cast#

class tensorrt_llm.layers.cast.Cast(output_dtype: str = 'float32')[源代码]#

基类: Module

forward(x)[源代码]#

Conv#

class tensorrt_llm.layers.conv.Conv1d( in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1, groups: int = 1, bias: bool = True, padding_mode: str = 'zeros', dtype=None, )[源代码]#

基类: Module

forward(input)[源代码]#

class tensorrt_llm.layers.conv.Conv2d( in_channels: int, out_channels: int, kernel_size: Tuple[int, int], stride: Tuple[int, int] = (1, 1), padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1, bias: bool = True, padding_mode: str = 'zeros', dtype=None, )[源代码]#

基类: Module

forward(input)[source]#

class tensorrt_llm.layers.conv.Conv3d( in_channels: int, out_channels: int, kernel_size: Tuple[int, int, int], stride: Tuple[int, int, int] = (1, 1, 1), padding: Tuple[int, int, int] = (0, 0, 0), dilation: Tuple[int, int, int] = (1, 1, 1), groups: int = 1, bias: bool = True, padding_mode: str = 'zeros', dtype=None, )[source]#

基类: Module

forward(input)[source]#

class tensorrt_llm.layers.conv.ConvTranspose2d( in_channels: int, out_channels: int, kernel_size: Tuple[int, int], stride: Tuple[int, int] = (1, 1), padding: Tuple[int, int] = (0, 0), output_padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1, bias: bool = True, padding_mode: str = 'zeros', dtype=None, )[source]#

基类: Module

forward(input, output_size=None)[source]#

Embedding#

class tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings( num_classes, embedding_dim, class_dropout_prob=0.0, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward( timestep: Tensor, class_labels: Tensor, hidden_dtype: str | None = 'float32', )[source]#

class tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings( embedding_dim, pooled_projection_dim, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward( timestep: Tensor, pooled_projection: Tensor, )[source]#

class tensorrt_llm.layers.embedding.Embedding( num_embeddings: int, embedding_dim: int, dtype: str | None = None, tp_size: int = 1, tp_group: list | None = None, sharding_dim: int = 0, tp_rank: int | None = None, )[source]#

基类: Module

Embedding层接收输入索引 (x) 和 embedding 查询表 (weight) 作为输入。并根据输入索引输出相应的 embedding。weight 的大小为 [num_embeddings, embedding_dim]

张量并行涉及四个参数 (tp_size, tp_group, sharding_dim, tp_rank)。只有当“tp_size > 1 且 tp_group 不为 None”时，才会启用张量并行。

当“sharding_dim == 0”时，权重在词汇维度上共享。
当 sharding_dim == 0 时，必须设置 tp_rank。

当“sharding_dim == 1”时，权重在隐藏维度上分片。

forward(x)[source]#

postprocess(tllm_key, weights, **kwargs)[source]#

weight_loader( mapping: Mapping (映射), param: Parameter (参数), loaded_weight: Tensor (已加载的权重), )[source]#

class tensorrt_llm.layers.embedding.LabelEmbedding( num_classes: int, hidden_size: int, dropout_prob: float = 0.0, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward( labels: Tensor, force_drop_ids: Tensor | None = None, )[source]#

token_drop( labels: Tensor, force_drop_ids: Tensor, )[源代码]#

class tensorrt_llm.layers.embedding.PixArtAlphaTextProjection( in_features, hidden_size (隐藏层大小), out_features=None, act_fn='gelu_tanh', mapping=None, dtype=None, )[源代码]#

基类: Module

投影标题嵌入。还处理无分类器指导的 dropout。

改编自 PixArt-alpha/PixArt-alpha

forward(caption)[源代码]#

class tensorrt_llm.layers.embedding.PromptTuningEmbedding( num_embeddings, embedding_dim, vocab_size=None, dtype=None, tp_size=1, tp_group=None, sharding_dim=0, tp_rank=0, )[源代码]#

基类： Embedding

PromptTuningEmbedding 处理使用虚拟 token 进行微调的 prompt。在运行时，会传递一个补充嵌入字典。id >= vocab_size 的 token 将使用该附加字典进行嵌入。prompt 调整字典包含多个任务，并且每个序列都分配有给定的任务。来自给定序列的 Prompt 调整 token 使用由 tasks 输入定义的适当的任务字典。

forward( tokens, prompt_embedding_table, tasks, task_vocab_size, )[源代码]#

将所有 token 通过标准和 prompt 嵌入表。token 被屏蔽，以便“标准”嵌入仅看到“标准”token。对于“prompt”嵌入，逻辑相同。在这两个嵌入之后，根据 token 是“标准”还是“prompt 调整”来组合结果。

参数:

tokens – 要嵌入的 id 的张量，大小为 [batch_size, seq_len]
prompt_embedding_table – 用于 prompt 调整 token 的附加嵌入表，大小为 [num_tasks * num_tokens_per_task, hidden_size]
tasks – 每个 token 所需的任务的张量，大小为 [batch_size, seq_len]
task_vocab_size – 用于每个任务的 token 数量的张量，应等于 prompt_embedding_table 的 num_tokens_per_task，大小为 [1]

返回值:

Tokens 的嵌入

class tensorrt_llm.layers.embedding.SD3PatchEmbed( height: int = 224, width: int = 224, patch_size: int = 16, in_channels: int = 3, embed_dim: int = 768, layer_norm: bool = False, flatten: bool = True, bias: bool = True, interpolation_scale: int = 1, pos_embed_type: str = 'sincos', pos_embed_max_size: int | None = None, dtype=None, )[源代码]#

基类: Module

2D 图像到 Patch 嵌入，支持 SD3 裁剪。

cropped_pos_embed(height, width)[源代码]#: 裁剪位置嵌入以实现 SD3 兼容性。

forward(latent)[源代码]#

class tensorrt_llm.layers.embedding.TimestepEmbedding( in_channels: int, time_embed_dim: int, act_fn: str = 'silu', out_dim: int = None, post_act_fn: str | None = None, cond_proj_dim=None, sample_proj_bias=True, mapping=None, dtype=None, )[源代码]#

基类: Module

forward(sample, condition=None)[源代码]#

class tensorrt_llm.layers.embedding.Timesteps( num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float, scale: int = 1, )[源代码]#

基类: Module

forward(timesteps) → Tensor[源代码]#

tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid( embed_dim: int, pos: Tensor, )[源代码]#

tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed( embed_dim: int, grid_size: int | Sequence[int], cls_token: bool = False, extra_tokens: int = 0, interpolation_scale: float = 1.0, base_size: int = 16, )[source]#

tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid( embed_dim: int, grid: Sequence[Tensor], )[source]#

tensorrt_llm.layers.embedding.get_timestep_embedding( timesteps: Tensor, embedding_dim: int, flip_sin_to_cos: bool = False, downscale_freq_shift: float = 1, scale: float = 1, max_period: int = 10000, ) → Tensor[source]#

This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.

Args

timesteps (Tensor): a 1-D Tensor of N indices, one per batch element. These may be fractional.
embedding_dim (int): the dimension of the output.
flip_sin_to_cos (bool): Whether the embedding order should be cos, sin (if True) or sin, cos (if False)
downscale_freq_shift (float): Controls the delta between frequencies between dimensions
scale (float): Scaling factor applied to the embeddings.
max_period (int): Controls the maximum frequency of the embeddings

Returns

Tensor: an [N x dim] Tensor of positional embeddings.

Linear#

tensorrt_llm.layers.linear.ColumnLinear#: alias of Linear

class tensorrt_llm.layers.linear.Linear( in_features, out_features, bias=True, dtype=None, tp_group=None, tp_size=1, gather_output=True, share_weight=None, strict_dtype=False, pad_lda=0, pad_ldc=0, prefer_managed_weight=True, is_qkv=False, )[source]#

Bases: LinearBase

collect_and_bias(x, **kwargs)[source]#

postprocess(tllm_key, weights, **kwargs)[source]#

classmethod tp_split_dim() → int[source]#

class tensorrt_llm.layers.linear.LinearBase( local_in_features, local_out_features, bias=True, dtype=None, tp_group=None, tp_size=1, share_weight=None, strict_dtype=False, pad_lda=0, pad_ldc=0, prefer_managed_weight=True, )[source]#

基类: Module

abstract collect_and_bias( x: Tensor, ) → Tensor[source]#

forward(

x,

lora_runtime_params: LoraRuntimeParams | None = None,

lora_hidden_state: Tensor | None = None,

**kwargs,

) → Tensor[source]#

get_weight() → Tensor[source]#

multiply_and_lora( x, weight, gemm_plugin: str | None = None, low_latency_gemm_plugin: str | None = None, use_fp8: bool = False, alpha: ndarray | None = None, lora_runtime_params: LoraRuntimeParams | None = None, lora_hidden_state: Tensor | None = None, )[source]#

multiply_collect(

x,

weight,

gemm_plugin: str | None = None,

low_latency_gemm_plugin: str | None = None,

use_fp8: bool = False,

alpha: ndarray | None = None,

lora_runtime_params: LoraRuntimeParams | None = None,

lora_hidden_state: Tensor | None = None,

**kwargs,

)[source]#

abstract classmethod tp_split_dim() → int[source]#

weight_loader( mapping: Mapping (映射), param: Parameter (参数), loaded_weight: Tensor (已加载的权重), ) → None[source]#

class tensorrt_llm.layers.linear.RowLinear( in_features, out_features, bias=True, dtype=None, tp_group=None, tp_size=1, strict_dtype: bool = False, pad_lda=0, prefer_managed_weight=True, is_expert=False, )[source]#

Bases: LinearBase

collect_and_bias(x, **kwargs)[source]#

multiply_collect(

x,

weight,

gemm_plugin: str | None = None,

low_latency_gemm_plugin: str | None = None,

use_fp8: bool = False,

alpha: ndarray | None = None,

lora_runtime_params: LoraRuntimeParams | None = None,

lora_hidden_state: Tensor | None = None,

**kwargs,

)[source]#

classmethod tp_split_dim() → int[source]#

MLP#

class tensorrt_llm.layers.mlp.FusedGatedMLP( hidden_size (隐藏层大小), ffn_hidden_size, hidden_act, bias=True (是否使用偏置), dtype=None (数据类型), tp_group=None (张量并行组), tp_size=1 (张量并行大小), quant_mode=<QuantMode: 0> (量化模式), inner_layernorm=False, eps=1e-05, is_expert=False, )[source]#

基类: Module

fc_gate(hidden_states, lora_layer_params=None)[source]#

fc_gate_plugin(hidden_states, lora_layer_params=None)[source]#

forward( hidden_states, lora_layer_params=None, all_reduce_params: AllReduceParams | None = None, )[source]#

class tensorrt_llm.layers.mlp.GatedMLP( hidden_size (隐藏层大小), ffn_hidden_size, hidden_act, bias=True (是否使用偏置), dtype=None (数据类型), tp_group=None (张量并行组), tp_size=1 (张量并行大小), quant_mode=<QuantMode: 0> (量化模式), inner_layernorm=False, eps=1e-05, is_expert=False, )[source]#

Bases: MLP

forward( hidden_states, lora_layer_params=None, all_reduce_params: AllReduceParams | None = None, )[source]#

class tensorrt_llm.layers.mlp.LinearActivation( dim_in: int, dim_out: int, bias: bool = True, activation: str = 'silu', mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward(hidden_states)[source]#

class tensorrt_llm.layers.mlp.LinearApproximateGELU( dim_in: int, dim_out: int, bias: bool = True, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward(x)[source]#

class tensorrt_llm.layers.mlp.LinearGEGLU( dim_in: int, dim_out: int, approximate: str = 'tanh', bias: bool = True, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward(hidden_states)[source]#

class tensorrt_llm.layers.mlp.LinearGELU( dim_in: int, dim_out: int, approximate: str = 'tanh', bias: bool = True, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward(hidden_states)[source]#

class tensorrt_llm.layers.mlp.LinearSwiGLU( dim_in: int, dim_out: int, bias: bool = True, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward(hidden_states)[source]#

class tensorrt_llm.layers.mlp.MLP( hidden_size (隐藏层大小), ffn_hidden_size, hidden_act, bias=True (是否使用偏置), dtype=None (数据类型), tp_group=None (张量并行组), tp_size=1 (张量并行大小), quant_mode=<QuantMode: 0> (量化模式), inner_layernorm=False, eps=1e-05, is_expert=False, )[源代码]#

基类: Module

forward(hidden_states, lora_layer_params=None, gegelu_limit=None)[源代码]#

tensorrt_llm.layers.mlp.fc_gate_dora( hidden_states, dora, fused_gate_up_dora, lora_layer_params, )[源代码]#

tensorrt_llm.layers.mlp.fc_gate_lora( hidden_states, lora, fused_gate_up_lora, lora_layer_params, )[源代码]#

归一化#

class tensorrt_llm.layers.normalization.AdaLayerNorm( embedding_dim: int, num_embeddings: int | None = None, output_dim: int | None = None, norm_elementwise_affine: bool = False, norm_eps: float = 1e-05, chunk_dim: int = 0, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[源代码]#

基类: Module

forward( x: Tensor, timestep: Tensor | None = None, temb: Tensor | None = None, )[源代码]#

class tensorrt_llm.layers.normalization.AdaLayerNormContinuous( embedding_dim: int, conditioning_embedding_dim: int, elementwise_affine: bool = True (逐元素仿射变换), eps: float = 1e-05 (epsilon，用于数值稳定性), bias: bool = True, norm_type: str = 'layer_norm', mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[源代码]#

基类: Module

forward( x: Tensor, conditioning_embedding: Tensor, )[源代码]#

class tensorrt_llm.layers.normalization.AdaLayerNormZero( embedding_dim: int, num_embeddings: int | None = None, norm_type: str = 'layer_norm', bias: bool = True, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[源代码]#

基类: Module

forward( x: Tensor, timestep: Tensor | None = None, class_labels: Tensor | None = None, hidden_dtype: str = None, emb: Tensor | None = None, )[源代码]#

class tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle( embedding_dim: int, norm_type: str = 'layer_norm', bias: bool = True, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[源代码]#

基类: Module

forward( x: Tensor, emb: Tensor | None = None, )[源代码]#

class tensorrt_llm.layers.normalization.GroupNorm( num_groups, num_channels, eps=1e-05, affine=True, dtype=None, )[源代码]#

基类: Module

forward(x)[源代码]#

class tensorrt_llm.layers.normalization.LayerNorm( normalized_shape, eps=1e-05, elementwise_affine=True, bias=True, dtype=None, tp_size=1, tp_dim=-1, )[源代码]#

基类: Module

forward(x, normalized_shape=None)[源代码]#

class tensorrt_llm.layers.normalization.RmsNorm( normalized_shape, num_groups=1, eps=1e-06, elementwise_affine=True, dtype=None, )[源代码]#

基类: Module

forward(x, normalized_shape=None)[源代码]#

class tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX( embedding_dim: int, norm_type: str = 'layer_norm', bias: bool = True, mapping=<tensorrt_llm.mapping.Mapping object> (映射对象), dtype=None (数据类型), )[source]#

基类: Module

forward( hidden_states: Tensor, emb: Tensor, )[source]#

Pooling#

class tensorrt_llm.layers.pooling.AvgPool2d( kernel_size: Tuple[int], stride: Tuple[int] | None = None, padding: Tuple[int] | None = (0, 0), ceil_mode: bool = False, count_include_pad: bool = True, )[source]#

基类: Module

forward(input)[source]#