#

激活#

class tensorrt_llm.layers.activation.Mish[source]#

基类: Module

forward(input)[source]#

注意力#

class tensorrt_llm.layers.attention.Attention(*, local_layer_idx, hidden_size, num_attention_heads, num_kv_heads=None, max_position_embeddings=1024, num_layers=1, apply_query_key_layer_scaling=False, attention_head_size=None, qk_layernorm=False, layernorm_type=LayerNormType.LayerNorm, layernorm_share=True, inner_layernorm=False, eps=1e-05, attention_mask_type=AttentionMaskType.padding, bias=True, dtype=None, position_embedding_type=PositionEmbeddingType.learned_absolute, rotary_embedding_base=10000.0, rotary_embedding_base_local=1.0, rotary_embedding_scaling=None, rotary_embedding_percentage=1.0, rope_scaling_short_factors=None, rope_scaling_long_factors=None, rope_scaling_short_mscale=None, rope_scaling_long_mscale=None, original_max_position_embeddings=1024, tp_group=None, tp_size=1, tp_rank=0, quant_mode: ~tensorrt_llm.quantization.mode.QuantMode = <QuantMode: 0>, q_scaling=1.0, cross_attention=False, relative_attention=False, max_distance=0, num_buckets=0, dense_bias=None, clip_qkv=None, alibi_bias_max=8, skip_cross_kv=False, max_attn_value=0.0, block_sparse_params=None, use_implicit_relative_attention=False, reorder=False, enable_qkv=True, cp_group=[0], cp_size=1, cp_rank=0, max_seqlen_for_logn_scaling=8192, use_logn_scaling=False, is_local=False)[source]#

基类: Module

static create_attention_const_params(model_cls, config)[source]#
static fill_attention_params(model_cls, attention_params)[source]#
forward(
hidden_states: Tensor,
attention_mask=None,
attention_packed_mask=None,
use_cache=False,
spec_decoding_params=None,
mrope_params=None,
kv_cache_params=None,
attention_params=None,
encoder_output: Tensor | None = None,
position_embedding=None,
norm_before_bmm1=False,
lora_layer_params=None,
cross_kv_cache_gen: Tensor | None = None,
cross_kv_reuse: Tensor | None = None,
all_reduce_params: AllReduceParams | None = None,
skip_attn=None,
)[source]#
后处理(tllm_key, weights, **kwargs)[source]#
设置相对注意力表(
最大序列长度,
预计算的相对注意力,
)[source]#
tensorrt_llm.layers.attention.注意力掩码参数(
自注意力掩码: Tensor = None,
自注意力打包掩码: Tensor = None,
交叉注意力掩码: Tensor = None,
交叉注意力打包掩码: Tensor = None,
)[source]#

基类: object

tensorrt_llm.layers.attention.注意力参数(
序列长度: Tensor = None,
上下文长度: Tensor = None,
主机上下文长度: Tensor = None,
最大上下文长度: int = None,
主机请求类型: Tensor = None,
编码器输入长度: Tensor = None,
编码器最大输入长度: Tensor = None,
主机运行时性能旋钮: Tensor = None,
主机上下文进度: Tensor = None,
)[source]#

基类: object

填充长Rope的注意力常量参数(
嵌入位置,
长Rope嵌入位置,
旋转逆频率,
长Rope旋转逆频率,
用于GPT注意力的嵌入位置,
用于GPT注意力的长Rope嵌入位置,
短mscale,
长mscale,
)[source]#
填充Rope的注意力常量参数(
嵌入位置: Tensor = None,
旋转逆频率: Tensor = None,
用于GPT注意力的嵌入位置: Tensor = None,
局部嵌入位置: Tensor = None,
局部旋转逆频率: Tensor = None,
用于GPT注意力的局部嵌入位置: Tensor = None,
)[source]#
是否有效(
gpt_attention_plugin,
remove_input_padding,
use_kv_cache,
)[source]#
is_valid_cross_attn(do_cross_attention)[source]#
class tensorrt_llm.layers.attention.BertAttention(
hidden_size (隐藏层大小),
num_attention_heads (注意力头数),
max_position_embeddings=1024 (最大位置嵌入),
num_layers=1 (层数),
attention_head_size=None (注意力头大小,如果为None则自动计算),
num_kv_heads=None (KV头数,如果为None则和注意力头数相同),
q_scaling=1.0 (Q缩放系数),
apply_query_key_layer_scaling=False (是否应用查询-键层缩放),
bias=True (是否使用偏置),
dtype=None (数据类型),
tp_group=None (张量并行组),
tp_size=1 (张量并行大小),
tp_rank=0 (张量并行秩),
cp_group=None (列并行组),
cp_size=1 (列并行大小),
relative_attention=False (是否使用相对注意力),
max_distance=0 (最大距离),
num_buckets=0 (桶数),
quant_mode=<QuantMode: 0> (量化模式),
)[source]#

基类: Module

forward(
hidden_states: Tensor,
attention_mask=None,
input_lengths=None (输入长度),
max_input_length=None (最大输入长度),
lora_layer_params=None,
)[source]#
class tensorrt_llm.layers.attention.BlockSparseAttnParams(
block_size: int = 64 (块大小),
homo_head_pattern: bool = False (是否使用同构头模式),
num_local_blocks: int = 16 (本地块数量),
vertical_stride: int = 8 (垂直步长),
)[source]#

基类: object

class tensorrt_llm.layers.attention.CogVLMAttention(
*,
local_layer_idx (本地层索引),
hidden_size (隐藏层大小),
num_attention_heads (注意力头数),
num_kv_heads=None (KV头数,如果为None则和注意力头数相同),
max_position_embeddings=1024 (最大位置嵌入),
attention_mask_type=AttentionMaskType.causal (注意力掩码类型,默认为因果掩码),
bias=True (是否使用偏置),
dtype=None (数据类型),
position_embedding_type=PositionEmbeddingType.learned_absolute (位置嵌入类型,默认为可学习的绝对位置嵌入),
rotary_embedding_base=10000.0 (旋转嵌入基数),
rotary_embedding_scaling=None (旋转嵌入缩放),
tp_group=None (张量并行组),
tp_size=1 (张量并行大小),
tp_rank=0 (张量并行秩),
quant_mode: ~tensorrt_llm.quantization.mode.QuantMode = <QuantMode: 0> (量化模式),
dense_bias=None (密集偏置),
)[source]#

Bases: Attention (基类: Attention)

forward(
hidden_states: Tensor,
use_cache=False,
kv_cache_params=None,
attention_params=None,
vision_token_mask=None (视觉token掩码),
position_embedding=None,
)[source]#
class tensorrt_llm.layers.attention.DeepseekV2Attention(
*,
local_layer_idx (本地层索引),
hidden_size (隐藏层大小),
num_attention_heads (注意力头数),
q_lora_rank (Q LoRA 秩),
kv_lora_rank (KV LoRA 秩),
qk_nope_head_dim=None (QK NoPE 头维度),
qk_rope_head_dim=None (QK RoPE 头维度),
v_head_dim=None (V 头维度),
eps=1e-06 (epsilon,用于数值稳定性),
attention_mask_type=AttentionMaskType.causal (注意力掩码类型,默认为因果掩码),
dtype=None (数据类型),
position_embedding_type=PositionEmbeddingType.learned_absolute (位置嵌入类型,默认为可学习的绝对位置嵌入),
max_position_embeddings=1024 (最大位置嵌入),
rotary_embedding_base=10000.0 (旋转嵌入基数),
rotary_embedding_scaling=None (旋转嵌入缩放),
rotary_embedding_beta_fast=32 (快速旋转嵌入 beta),
rotary_embedding_beta_slow=1 (慢速旋转嵌入 beta),
rotary_embedding_mscale=1 (旋转嵌入 mscale),
rotary_embedding_mscale_all_dim=0 (旋转嵌入 mscale 所有维度),
rotary_embedding_origin_max_position=4096 (原始旋转嵌入最大位置),
rotary_scaling=None (旋转缩放),
tp_group=None (张量并行组),
tp_size=1 (张量并行大小),
tp_rank=0 (张量并行秩),
quant_mode: ~tensorrt_llm.quantization.mode.QuantMode = <QuantMode: 0> (量化模式),
)[source]#

Bases: Attention (基类: Attention)

forward(
hidden_states: Tensor,
use_cache=False,
spec_decoding_params=None,
kv_cache_params=None,
attention_params=None,
)[source]#
postprocess(tllm_key, weights, **kwargs)[source]#
weight_loader(
mapping: Mapping (映射),
param: Parameter (参数),
loaded_weight: Tensor (已加载的权重),
)[source]#
class tensorrt_llm.layers.attention.DiffusersAttention(
*,
query_dim: int (查询维度),
cross_attention_dim: int | None = None (交叉注意力维度),
heads: int = 8 (头数),
kv_heads: int | None = None (KV 头数),
dim_head: int = 64 (头维度),
dropout: float = 0.0 (dropout 概率),
bias: bool = False (是否使用偏置),
upcast_attention: bool = False (是否将注意力强制转换为更高精度),
upcast_softmax: bool = False (是否将 softmax 强制转换为更高精度),
cross_attention_norm: str | None = None (交叉注意力归一化类型),
cross_attention_norm_num_groups: int = 32 (交叉注意力归一化组数),
qk_norm: str | None = None (QK 归一化类型),
added_kv_proj_dim: int | None = None (添加的 KV 投影维度),
added_proj_bias: bool | None = True (添加的投影偏置),
norm_num_groups: int | None = None (归一化组数),
spatial_norm_dim: int | None = None (空间归一化维度),
out_bias: bool = True (输出偏置),
scale_qk: bool = True (是否缩放 QK),
only_cross_attention: bool = False (是否只进行交叉注意力),
eps: float = 1e-05 (epsilon,用于数值稳定性),
rescale_output_factor: float = 1.0 (重缩放输出因子),
residual_connection: bool = False (是否使用残差连接),
out_dim: int = None (输出维度),
out_context_dim: int = None (输出上下文维度),
context_pre_only=None (上下文预处理仅应用于上下文),
pre_only=False (预处理仅应用于输入),
elementwise_affine: bool = True (逐元素仿射变换),
is_causal: bool = False (是否为因果关系),
attn_forward_funcname: str = 'joint_attn_forward' (注意力前向传播函数名),
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(
hidden_states: Tensor,
encoder_hidden_states: Tensor | None = None,
attention_mask: Tensor | None = None,
max_input_length: Tensor | None = None,
*args,
**kwargs,
)[source]#
joint_attn_forward(
hidden_states: Tensor,
encoder_hidden_states: Tensor | None = None,
attention_mask: Tensor | None = None,
max_input_length: Tensor | None = None,
*args,
**kwargs,
)[source]#
class tensorrt_llm.layers.attention.KeyValueCacheParams(
past_key_value: List[Tensor] = None,
host_past_key_value_lengths: Tensor = None,
host_max_attention_window_sizes: Tensor = None,
host_sink_token_length: Tensor = None,
kv_cache_block_offsets: Tensor = None,
host_kv_cache_block_offsets: Tensor = None,
host_kv_cache_pool_pointers: Tensor = None,
host_kv_cache_pool_mapping: Tensor = None,
cache_indirection: Tensor = None,
past_key_value_length: Tensor = None,
cross_kv_cache_block_offsets: Tensor = None,
host_cross_kv_cache_block_offsets: Tensor = None,
host_cross_kv_cache_pool_pointers: Tensor = None,
host_cross_kv_cache_pool_mapping: Tensor = None,
)[源代码]#

基类: object

fill_none_tensor_list(list_size)[源代码]#
get_first_past_key_value()[源代码]#
is_valid(gpt_attention_plugin)[源代码]#
class tensorrt_llm.layers.attention.MropeParams(
mrope_rotary_cos_sin: Tensor = None,
mrope_position_deltas: Tensor = None,
)[源代码]#

基类: object

class tensorrt_llm.layers.attention.SpecDecodingParams(
spec_decoding_is_generation_length_variable: bool = False,
spec_decoding_max_generation_length: int = 1,
spec_decoding_generation_lengths: Tensor = None,
spec_decoding_position_offsets: Tensor = None,
spec_decoding_packed_mask: Tensor = None,
spec_decoding_use: Tensor = None,
)[源代码]#

基类: object

tensorrt_llm.layers.attention.compute_relative_bias(
query_length,
key_length,
num_buckets,
max_distance,
bidirectional,
rel_attn_table,
tp_size=1,
tp_group=None,
tp_rank=None,
)[源代码]#
tensorrt_llm.layers.attention.make_causal_mask(bsz, tgt_len, past_key_values_length, dtype)[源代码]#

Cast#

class tensorrt_llm.layers.cast.Cast(output_dtype: str = 'float32')[源代码]#

基类: Module

forward(x)[源代码]#

Conv#

class tensorrt_llm.layers.conv.Conv1d(
in_channels: int,
out_channels: int,
kernel_size: int,
stride: int = 1,
padding: int = 0,
dilation: int = 1,
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros',
dtype=None,
)[源代码]#

基类: Module

forward(input)[源代码]#
class tensorrt_llm.layers.conv.Conv2d(
in_channels: int,
out_channels: int,
kernel_size: Tuple[int, int],
stride: Tuple[int, int] = (1, 1),
padding: Tuple[int, int] = (0, 0),
dilation: Tuple[int, int] = (1, 1),
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros',
dtype=None,
)[源代码]#

基类: Module

forward(input)[source]#
class tensorrt_llm.layers.conv.Conv3d(
in_channels: int,
out_channels: int,
kernel_size: Tuple[int, int, int],
stride: Tuple[int, int, int] = (1, 1, 1),
padding: Tuple[int, int, int] = (0, 0, 0),
dilation: Tuple[int, int, int] = (1, 1, 1),
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros',
dtype=None,
)[source]#

基类: Module

forward(input)[source]#
class tensorrt_llm.layers.conv.ConvTranspose2d(
in_channels: int,
out_channels: int,
kernel_size: Tuple[int, int],
stride: Tuple[int, int] = (1, 1),
padding: Tuple[int, int] = (0, 0),
output_padding: Tuple[int, int] = (0, 0),
dilation: Tuple[int, int] = (1, 1),
groups: int = 1,
bias: bool = True,
padding_mode: str = 'zeros',
dtype=None,
)[source]#

基类: Module

forward(input, output_size=None)[source]#

Embedding#

class tensorrt_llm.layers.embedding.CombinedTimestepLabelEmbeddings(
num_classes,
embedding_dim,
class_dropout_prob=0.0,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(
timestep: Tensor,
class_labels: Tensor,
hidden_dtype: str | None = 'float32',
)[source]#
class tensorrt_llm.layers.embedding.CombinedTimestepTextProjEmbeddings(
embedding_dim,
pooled_projection_dim,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(
timestep: Tensor,
pooled_projection: Tensor,
)[source]#
class tensorrt_llm.layers.embedding.Embedding(
num_embeddings: int,
embedding_dim: int,
dtype: str | None = None,
tp_size: int = 1,
tp_group: list | None = None,
sharding_dim: int = 0,
tp_rank: int | None = None,
)[source]#

基类: Module

Embedding层接收输入索引 (x) 和 embedding 查询表 (weight) 作为输入。 并根据输入索引输出相应的 embedding。weight 的大小为 [num_embeddings, embedding_dim]

张量并行涉及四个参数 (tp_size, tp_group, sharding_dim, tp_rank)。 只有当“tp_size > 1 且 tp_group 不为 None”时,才会启用张量并行。

当“sharding_dim == 0”时,权重在词汇维度上共享。

当 sharding_dim == 0 时,必须设置 tp_rank。

当“sharding_dim == 1”时,权重在隐藏维度上分片。

forward(x)[source]#
postprocess(tllm_key, weights, **kwargs)[source]#
weight_loader(
mapping: Mapping (映射),
param: Parameter (参数),
loaded_weight: Tensor (已加载的权重),
)[source]#
class tensorrt_llm.layers.embedding.LabelEmbedding(
num_classes: int,
hidden_size: int,
dropout_prob: float = 0.0,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(
labels: Tensor,
force_drop_ids: Tensor | None = None,
)[source]#
token_drop(
labels: Tensor,
force_drop_ids: Tensor,
)[源代码]#
class tensorrt_llm.layers.embedding.PixArtAlphaTextProjection(
in_features,
hidden_size (隐藏层大小),
out_features=None,
act_fn='gelu_tanh',
mapping=None,
dtype=None,
)[源代码]#

基类: Module

投影标题嵌入。还处理无分类器指导的 dropout。

改编自 PixArt-alpha/PixArt-alpha

forward(caption)[源代码]#
class tensorrt_llm.layers.embedding.PromptTuningEmbedding(
num_embeddings,
embedding_dim,
vocab_size=None,
dtype=None,
tp_size=1,
tp_group=None,
sharding_dim=0,
tp_rank=0,
)[源代码]#

基类: Embedding

PromptTuningEmbedding 处理使用虚拟 token 进行微调的 prompt。在运行时,会传递一个补充嵌入字典。id >= vocab_size 的 token 将使用该附加字典进行嵌入。prompt 调整字典包含多个任务,并且每个序列都分配有给定的任务。来自给定序列的 Prompt 调整 token 使用由 tasks 输入定义的适当的任务字典。

forward(
tokens,
prompt_embedding_table,
tasks,
task_vocab_size,
)[源代码]#

将所有 token 通过标准和 prompt 嵌入表。token 被屏蔽,以便“标准”嵌入仅看到“标准”token。对于“prompt”嵌入,逻辑相同。在这两个嵌入之后,根据 token 是“标准”还是“prompt 调整”来组合结果。

参数:
  • tokens – 要嵌入的 id 的张量,大小为 [batch_size, seq_len]

  • prompt_embedding_table – 用于 prompt 调整 token 的附加嵌入表,大小为 [num_tasks * num_tokens_per_task, hidden_size]

  • tasks – 每个 token 所需的任务的张量,大小为 [batch_size, seq_len]

  • task_vocab_size – 用于每个任务的 token 数量的张量,应等于 prompt_embedding_table 的 num_tokens_per_task,大小为 [1]

返回值:

Tokens 的嵌入

class tensorrt_llm.layers.embedding.SD3PatchEmbed(
height: int = 224,
width: int = 224,
patch_size: int = 16,
in_channels: int = 3,
embed_dim: int = 768,
layer_norm: bool = False,
flatten: bool = True,
bias: bool = True,
interpolation_scale: int = 1,
pos_embed_type: str = 'sincos',
pos_embed_max_size: int | None = None,
dtype=None,
)[源代码]#

基类: Module

2D 图像到 Patch 嵌入,支持 SD3 裁剪。

cropped_pos_embed(height, width)[源代码]#

裁剪位置嵌入以实现 SD3 兼容性。

forward(latent)[源代码]#
class tensorrt_llm.layers.embedding.TimestepEmbedding(
in_channels: int,
time_embed_dim: int,
act_fn: str = 'silu',
out_dim: int = None,
post_act_fn: str | None = None,
cond_proj_dim=None,
sample_proj_bias=True,
mapping=None,
dtype=None,
)[源代码]#

基类: Module

forward(sample, condition=None)[源代码]#
class tensorrt_llm.layers.embedding.Timesteps(
num_channels: int,
flip_sin_to_cos: bool,
downscale_freq_shift: float,
scale: int = 1,
)[源代码]#

基类: Module

forward(timesteps) Tensor[源代码]#
tensorrt_llm.layers.embedding.get_1d_sincos_pos_embed_from_grid(
embed_dim: int,
pos: Tensor,
)[源代码]#
tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed(
embed_dim: int,
grid_size: int | Sequence[int],
cls_token: bool = False,
extra_tokens: int = 0,
interpolation_scale: float = 1.0,
base_size: int = 16,
)[source]#
tensorrt_llm.layers.embedding.get_2d_sincos_pos_embed_from_grid(
embed_dim: int,
grid: Sequence[Tensor],
)[source]#
tensorrt_llm.layers.embedding.get_timestep_embedding(
timesteps: Tensor,
embedding_dim: int,
flip_sin_to_cos: bool = False,
downscale_freq_shift: float = 1,
scale: float = 1,
max_period: int = 10000,
) Tensor[source]#

This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.

Args
timesteps (Tensor)

a 1-D Tensor of N indices, one per batch element. These may be fractional.

embedding_dim (int)

the dimension of the output.

flip_sin_to_cos (bool)

Whether the embedding order should be cos, sin (if True) or sin, cos (if False)

downscale_freq_shift (float)

Controls the delta between frequencies between dimensions

scale (float)

Scaling factor applied to the embeddings.

max_period (int)

Controls the maximum frequency of the embeddings

Returns

Tensor: an [N x dim] Tensor of positional embeddings.

Linear#

tensorrt_llm.layers.linear.ColumnLinear#

alias of Linear

class tensorrt_llm.layers.linear.Linear(
in_features,
out_features,
bias=True,
dtype=None,
tp_group=None,
tp_size=1,
gather_output=True,
share_weight=None,
strict_dtype=False,
pad_lda=0,
pad_ldc=0,
prefer_managed_weight=True,
is_qkv=False,
)[source]#

Bases: LinearBase

collect_and_bias(x, **kwargs)[source]#
postprocess(tllm_key, weights, **kwargs)[source]#
classmethod tp_split_dim() int[source]#
class tensorrt_llm.layers.linear.LinearBase(
local_in_features,
local_out_features,
bias=True,
dtype=None,
tp_group=None,
tp_size=1,
share_weight=None,
strict_dtype=False,
pad_lda=0,
pad_ldc=0,
prefer_managed_weight=True,
)[source]#

基类: Module

abstract collect_and_bias(
x: Tensor,
) Tensor[source]#
forward(
x,
lora_runtime_params: LoraRuntimeParams | None = None,
lora_hidden_state: Tensor | None = None,
**kwargs,
) Tensor[source]#
get_weight() Tensor[source]#
multiply_and_lora(
x,
weight,
gemm_plugin: str | None = None,
low_latency_gemm_plugin: str | None = None,
use_fp8: bool = False,
alpha: ndarray | None = None,
lora_runtime_params: LoraRuntimeParams | None = None,
lora_hidden_state: Tensor | None = None,
)[source]#
multiply_collect(
x,
weight,
gemm_plugin: str | None = None,
low_latency_gemm_plugin: str | None = None,
use_fp8: bool = False,
alpha: ndarray | None = None,
lora_runtime_params: LoraRuntimeParams | None = None,
lora_hidden_state: Tensor | None = None,
**kwargs,
)[source]#
abstract classmethod tp_split_dim() int[source]#
weight_loader(
mapping: Mapping (映射),
param: Parameter (参数),
loaded_weight: Tensor (已加载的权重),
) None[source]#
class tensorrt_llm.layers.linear.RowLinear(
in_features,
out_features,
bias=True,
dtype=None,
tp_group=None,
tp_size=1,
strict_dtype: bool = False,
pad_lda=0,
prefer_managed_weight=True,
is_expert=False,
)[source]#

Bases: LinearBase

collect_and_bias(x, **kwargs)[source]#
multiply_collect(
x,
weight,
gemm_plugin: str | None = None,
low_latency_gemm_plugin: str | None = None,
use_fp8: bool = False,
alpha: ndarray | None = None,
lora_runtime_params: LoraRuntimeParams | None = None,
lora_hidden_state: Tensor | None = None,
**kwargs,
)[source]#
classmethod tp_split_dim() int[source]#

MLP#

class tensorrt_llm.layers.mlp.FusedGatedMLP(
hidden_size (隐藏层大小),
ffn_hidden_size,
hidden_act,
bias=True (是否使用偏置),
dtype=None (数据类型),
tp_group=None (张量并行组),
tp_size=1 (张量并行大小),
quant_mode=<QuantMode: 0> (量化模式),
inner_layernorm=False,
eps=1e-05,
is_expert=False,
)[source]#

基类: Module

fc_gate(hidden_states, lora_layer_params=None)[source]#
fc_gate_plugin(hidden_states, lora_layer_params=None)[source]#
forward(
hidden_states,
lora_layer_params=None,
all_reduce_params: AllReduceParams | None = None,
)[source]#
class tensorrt_llm.layers.mlp.GatedMLP(
hidden_size (隐藏层大小),
ffn_hidden_size,
hidden_act,
bias=True (是否使用偏置),
dtype=None (数据类型),
tp_group=None (张量并行组),
tp_size=1 (张量并行大小),
quant_mode=<QuantMode: 0> (量化模式),
inner_layernorm=False,
eps=1e-05,
is_expert=False,
)[source]#

Bases: MLP

forward(
hidden_states,
lora_layer_params=None,
all_reduce_params: AllReduceParams | None = None,
)[source]#
class tensorrt_llm.layers.mlp.LinearActivation(
dim_in: int,
dim_out: int,
bias: bool = True,
activation: str = 'silu',
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(hidden_states)[source]#
class tensorrt_llm.layers.mlp.LinearApproximateGELU(
dim_in: int,
dim_out: int,
bias: bool = True,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(x)[source]#
class tensorrt_llm.layers.mlp.LinearGEGLU(
dim_in: int,
dim_out: int,
approximate: str = 'tanh',
bias: bool = True,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(hidden_states)[source]#
class tensorrt_llm.layers.mlp.LinearGELU(
dim_in: int,
dim_out: int,
approximate: str = 'tanh',
bias: bool = True,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(hidden_states)[source]#
class tensorrt_llm.layers.mlp.LinearSwiGLU(
dim_in: int,
dim_out: int,
bias: bool = True,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(hidden_states)[source]#
class tensorrt_llm.layers.mlp.MLP(
hidden_size (隐藏层大小),
ffn_hidden_size,
hidden_act,
bias=True (是否使用偏置),
dtype=None (数据类型),
tp_group=None (张量并行组),
tp_size=1 (张量并行大小),
quant_mode=<QuantMode: 0> (量化模式),
inner_layernorm=False,
eps=1e-05,
is_expert=False,
)[源代码]#

基类: Module

forward(hidden_states, lora_layer_params=None, gegelu_limit=None)[源代码]#
tensorrt_llm.layers.mlp.fc_gate_dora(
hidden_states,
dora,
fused_gate_up_dora,
lora_layer_params,
)[源代码]#
tensorrt_llm.layers.mlp.fc_gate_lora(
hidden_states,
lora,
fused_gate_up_lora,
lora_layer_params,
)[源代码]#

归一化#

class tensorrt_llm.layers.normalization.AdaLayerNorm(
embedding_dim: int,
num_embeddings: int | None = None,
output_dim: int | None = None,
norm_elementwise_affine: bool = False,
norm_eps: float = 1e-05,
chunk_dim: int = 0,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[源代码]#

基类: Module

forward(
x: Tensor,
timestep: Tensor | None = None,
temb: Tensor | None = None,
)[源代码]#
class tensorrt_llm.layers.normalization.AdaLayerNormContinuous(
embedding_dim: int,
conditioning_embedding_dim: int,
elementwise_affine: bool = True (逐元素仿射变换),
eps: float = 1e-05 (epsilon,用于数值稳定性),
bias: bool = True,
norm_type: str = 'layer_norm',
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[源代码]#

基类: Module

forward(
x: Tensor,
conditioning_embedding: Tensor,
)[源代码]#
class tensorrt_llm.layers.normalization.AdaLayerNormZero(
embedding_dim: int,
num_embeddings: int | None = None,
norm_type: str = 'layer_norm',
bias: bool = True,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[源代码]#

基类: Module

forward(
x: Tensor,
timestep: Tensor | None = None,
class_labels: Tensor | None = None,
hidden_dtype: str = None,
emb: Tensor | None = None,
)[源代码]#
class tensorrt_llm.layers.normalization.AdaLayerNormZeroSingle(
embedding_dim: int,
norm_type: str = 'layer_norm',
bias: bool = True,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[源代码]#

基类: Module

forward(
x: Tensor,
emb: Tensor | None = None,
)[源代码]#
class tensorrt_llm.layers.normalization.GroupNorm(
num_groups,
num_channels,
eps=1e-05,
affine=True,
dtype=None,
)[源代码]#

基类: Module

forward(x)[源代码]#
class tensorrt_llm.layers.normalization.LayerNorm(
normalized_shape,
eps=1e-05,
elementwise_affine=True,
bias=True,
dtype=None,
tp_size=1,
tp_dim=-1,
)[源代码]#

基类: Module

forward(x, normalized_shape=None)[源代码]#
class tensorrt_llm.layers.normalization.RmsNorm(
normalized_shape,
num_groups=1,
eps=1e-06,
elementwise_affine=True,
dtype=None,
)[源代码]#

基类: Module

forward(x, normalized_shape=None)[源代码]#
class tensorrt_llm.layers.normalization.SD35AdaLayerNormZeroX(
embedding_dim: int,
norm_type: str = 'layer_norm',
bias: bool = True,
mapping=<tensorrt_llm.mapping.Mapping object> (映射对象),
dtype=None (数据类型),
)[source]#

基类: Module

forward(
hidden_states: Tensor,
emb: Tensor,
)[source]#

Pooling#

class tensorrt_llm.layers.pooling.AvgPool2d(
kernel_size: Tuple[int],
stride: Tuple[int] | None = None,
padding: Tuple[int] | None = (0, 0),
ceil_mode: bool = False,
count_include_pad: bool = True,
)[source]#

基类: Module

forward(input)[source]#