U
    Mh                     @   s>  d Z ddlZddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	 Zeejejejejejgejf Zejejejejejejd
ddZefejejejeejdddZejejejejejejd
ddZejejejejejejd
ddZejejejejejejd
ddZedddZdS )zIThis module implements the user facing API for flex_attention in PyTorch.    N)Callable)flex_attention)_set_compilation_env)-_temp_remove_pre_dispatch_torch_function_mode)_validate_sdpa_inputc                  G   s   dd }t || S )z*Compose a sequence of score_mod functions.c                    s    fdd}|S )Nc                    s    | ||||||||S N )scorebhmnfgr   T/var/www/html/venv/lib/python3.8/site-packages/torch/nn/attention/_flex_attention.pyinner   s    z)_compose.<locals>.compose2.<locals>.innerr   )r   r   r   r   r   r   compose2   s    z_compose.<locals>.compose2)	functoolsreduce)fsr   r   r   r   _compose   s    r   r	   batchheadtoken_qtoken_kvreturnc                 C   s   | S r   r   r	   r   r   r   r   r   r   r   	_identity    s    r   )querykeyvalue	score_modr   c                 C   s   t j r>| ||fD ]}t j|d qt| |||\}}|S t| || | dd dkrdtdt j	 svt
dt r t jj Z t H t jtddd	| |||\}}|W  5 Q R  W  5 Q R  W  5 Q R  S Q R X W 5 Q R X W 5 Q R X d
S )a 	  This function implements scaled dot product attention with an arbitrary attention score modification function.

    This function computes the scaled dot product attention between query, key, and value tensors with a user-defined
    attention score modification function. The attention score modification function will be applied after the attention
    scores have been calculated between the query and key tensors. The attention scores are calculated as follows:

    The ``score_mod`` function should have the following signature:

    .. code-block:: python

        def score_mod(
            score: torch.Tensor,
            batch: torch.Tensor,
            head: torch.Tensor,
            token_q: torch.Tensor,
            token_kv: torch.Tensor
        ) -> torch.Tensor:

    Where:
        - ``score``: A scalar tensor representing the attention score,
          with the same data type and device as the query, key, and value tensors.
        - ``batch``, ``head``, ``token_q``, ``token_kv``: Scalar tensors indicating
          the batch index, head index, query index, and key/value index, respectively.
          These should have the ``torch.int`` data type and be located on the same device as the score tensor.

    Args:
        query (Tensor): Query tensor; shape :math:`(B, H, L, E)`.
        key (Tensor): Key tensor; shape :math:`(B, H, S, E)`.
        value (Tensor): Value tensor; shape :math:`(B, H, S, Ev)`.
        score_mod (Callable): Function to modify attention scores. By default no score_mod is applied.

    Returns:
        output (Tensor): Attention output; shape :math:`(B, H, L, Ev)`.

    Shape legend:
        - :math:`N: \text{Batch size} ... : \text{Any number of other batch dimensions (optional)}`
        - :math:`S: \text{Source sequence length}`
        - :math:`L: \text{Target sequence length}`
        - :math:`E: \text{Embedding dimension of the query and key}`
        - :math:`Ev: \text{Embedding dimension of the value}`

    .. warning::
        `torch.nn.attention.flex_attention` is a prototype feature in PyTorch. It doesn't support training currently.
        Please look forward to a more stable implementation in a future version of PyTorch.
        Read more about feature classification at: https://pytorch.org/blog/pytorch-feature-classification-changes/#prototype

       r   z&NYI: S and L must be a multiple of 128z'flex_attention requires dynamo support.eagerT)backendZ	fullgraphN)torchcompilerZis_dynamo_compilingZ_dynamoZmark_staticflex_attention_hopr   size
ValueErrorZis_dynamo_supportedRuntimeErrorr   utilsZdisable_cache_limitr   compile)r    r!   r"   r#   xout_r   r   r   _flex_attention*   s0    6

     r4   c                 C   s   t ||k| tdS Nz-infr)   wherefloatr   r   r   r   _causal{   s    r9   c                 C   s   | ||  S r   r   r   r   r   r   	_rel_bias   s    r:   c                 C   s   t ||k| ||  tdS r5   r6   r   r   r   r   _rel_causal   s    r;   	num_headsc                    s,   t jt jt jt jt jt jd fdd}|S )Nr   c                    s(   t |d d    }| || |  S )N   g       @)r)   Zexp2)r	   r   r   r   r   scaler<   r   r   _alibi_bias   s    z)_generate_alibi_bias.<locals>._alibi_bias)r)   Tensor)r=   r@   r   r<   r   _generate_alibi_bias   s    
rB   )__doc__r   typingr   r)   Z&torch._higher_order_ops.flex_attentionr   r+   Ztorch._higher_order_ops.utilsr   Z"torch.fx.experimental.proxy_tensorr   Ztorch.nn.attention._utilsr   r   rA   Z_score_mod_signaturer   r4   r9   r:   r;   intrB   r   r   r   r   <module>   s`   R
