U
    yh                     @   sF  d dl Z d dlZd dlZd dlZd dlmZmZ d dlZd dlZd dl	Zd dl
mZ d dlmZmZmZmZmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ ejjZdd	d
dgZ i Z!g Z"ej#j$j%Z%dd Z&dd Z'e&e%j(j)e%j(j*e%j+j)e%j+j*e%j,j)e%j,j*e%j-j)e%j-j*e%j.j)e%j.j*e%j/j)e%j/j*e%j/j0e%j/j1e%j2j)e%j2j*e%j3j)e%j3j*e%j4j)e%j4j*e%j5j)e%j5j*e%j6j)e%j6j*e%j7j)e%j7j*Z8e&e%j9j)e%j:j)e%j:j*e%j;j)e%j<j)e%j=j>e%j=j?e%j@j)e%j@j*e%jAj)e%jAj*ZBe%jCfZDdd ZEe FdedddZGeeegeHf ef dddZIeIe!jJdd ZKeIeGeIe8dd ZLeIe%j=j?eIe%j=j>dd ZMdd
 ZNeIeNdd  ZOeIe%jPj)d!d" ZPeIe%jQj)d#d$ ZQeId%d& d'd( ZRdld+d,ZSeIe%jTj)dmd-d.ZUeIe%jVj)dnd/d0ZVeIe%jWjXdod1d2ZYeIej$j%jZj)d3d4 Z[eIej$j%j\j)d5d6 Z\eIej$j%j]j)d7d8 Z]eId9d& d:d; Z^d<d= Z_d>d? Z`e&d@dAdBZadCdD ZbdEd ZceIdFd& dGdH ZdeIe%jejXdIdJ ZfeIe%jgj)dKdL ZheIe%jij)eIe%jjj)eIe%jkj)eIe%jlj)dMdN ZmeIe%jjj*eIe%jlj*dOdP ZneIe%joj)eIe%jpj)dQdR ZqeIe%j:j)eIe%j:j*eIe%jrj)eIe%jsj)dSdT ZteIdUdV eBD dWdX ZueIe%jvj)e%jwj)gdYdZ ZxeIe%jyj)d[d\ ZzeIe%j{j)d]d^ Z|eIe%j}j)d_d` Z~eIe%jj)dadb ZeIej$j%jj)dcdd Zi ZeddedfZdgdh Zdidj Ze Fddkd	 ZdS )p    N)CallableUnion)
OpOverload)elementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KINDis_boolean_dtypeis_float_dtypeis_integer_dtype)DataDependentOutputExceptionDynamicOutputShapeException
FakeTensorin_kernel_invocation_managerrun_fallback_kernelUnsupportedOperatorException)normalize_functioncount_labelop_implementations_checksget_fast_op_implsstride_incorrect_ophas_metac                  G   s   t | dS )NT)dictfromkeys)items r   N/var/www/html/venv/lib/python3.8/site-packages/torch/_subclasses/fake_impls.pyordered_set2   s    r   c                 C   s   | j dkrdS dS )NZhpuFT)typedevicer   r   r   is_noncontiguous_supported8   s    
r    c                 C   s,   t jj }| |p*tdd |  D S )Nc                 s   s   | ]}t |V  qd S N)contains_tensor_types).0er   r   r   	<genexpr>p   s    z(contains_tensor_types.<locals>.<genexpr>)torch_C
TensorTypegetZisSubtypeOfanyZcontainedTypes)r   Ztensor_typer   r   r   r"   n   s    r"   funcc                 C   sR   t | tst| j}tdd |jD r,dS t|jdkoP|jd jt	j
j kS )Nc                 s   s   | ]}t |jV  qd S r!   )r"   r   )r#   argr   r   r   r%   y   s     z)_is_tensor_constructor.<locals>.<genexpr>F   r   )
isinstancer   AssertionErrorZ_schemar*   	argumentslenZreturnsr   r&   r'   r(   r)   )r,   Zschemar   r   r   _is_tensor_constructoru   s    $r3   run_impl_checkc                    s    fdd}|S )Nc                    sn   t  tr* tks td  | t < n@t  ttfrP D ]}t||  q<nt s\tt	 | f | S )Nzduplicate registration: )
r/   r   op_implementations_dictr0   listtupleregister_op_implcallabler   append)op_implopr4   r   r   impl_decorator   s    

z(register_op_impl.<locals>.impl_decoratorr   )r5   r>   r   r4   r   r9      s    r9   c                 O   s   t | | |f||S r!   )r6   	fake_moder,   argskwargsr   r   r   #dispatch_to_op_implementations_dict   s    rC   c           	   	   O   s   |t kstt|||dd\}}d|kr0td|tkrP|d j}|df}ntd}d}|dd }|d k	rv|n|}td	|d< t|  |||}W 5 Q R X t	| ||S )
NTrA   rB   normalize_to_only_use_kwargsnamesz+torch.compile doesn't support named tensorsinputcpur   r   meta)
_non_kwarg_device_constructorsr0   r   r   _like_tensor_constructorsr   popr&   r   r   )	r@   r,   rA   rB   _
new_kwargsZdefault_device
out_devicerr   r   r   constructors   s,       



rQ   c           
   	   O   sv   t |||dd\}}|d }|r$|n|d j}td|d< |d}t|  ||f|}	W 5 Q R X | j| |	|S )NT)rE   r   rG   rI   )r   r   r&   rL   r   fake_tensor_converterfrom_meta_and_device)
r@   r,   rA   rB   rM   rN   Zinput_devicerO   ZinprP   r   r   r   non_kwarg_to   s"       


  rT   c                 C   s6   | j dkrdS | tjjkrdS |  }d|kr2dS dS )N)atenprimsFZfftT)	namespacerU   Z_fft_c2cdefaultname)r=   Zop_namer   r   r   r      s    
c                    sb   dd  | j rVt fddt|| D }|sVt||f\}}t| |||d S t|d S )Nc                 S   s.   t | tr| jS t | tjtjtjfr*dS dS )NTF)r/   r   Z_has_symbolic_sizes_stridesr&   ZSymIntZSymFloatZSymBool)xr   r   r   is_symbolic   s
    
z3wordaround_stride_incorrect_op.<locals>.is_symbolicc                 3   s   | ]} |V  qd S r!   r   r#   rZ   r[   r   r   r%      s    z1wordaround_stride_incorrect_op.<locals>.<genexpr>)	Zallow_fallback_kernelsr*   	itertoolschainvaluespytreeZtree_flattenr   r   )r@   r,   rA   rB   Zrequire_dynamicZ	flat_argsZ	args_specr   r]   r   wordaround_stride_incorrect_op   s    rb   c              
   O   s*   t |  |||W  5 Q R  S Q R X d S r!   )r   r?   r   r   r   
resize_as_   s    
rc   c                 O   s   t | |f||S r!   )rQ   r?   r   r   r   (_sparse_coo_tensor_with_dims_and_tensors   s    rd   c                 C   s(   t jj| jko&| tjjtjjtj	jfkS r!   )
r&   TagZdynamic_output_shapetagsrU   indexTensornonzerorX   repeat_interleaver+   r   r   r   <lambda>   s   rk   c                 O   s   t |d S r!   )r   r?   r   r   r   	dyn_shape   s    rl   TFc                 C   s  | j d ks| j jst||d k	s0|j }d krddlm}m}	 |	| s^| dkr^d}nJ| j  }t	j
d }
|d kr| n||}|	|st|}
|||
d |d kr||_|d kr||fg}n,|j|jd | |f|j|d d   g}|d k	o|jtdk}|s |rD||d kr4|jn
|j| f}n
|d}|| |sd|r||d kr||d jn|d j| f}n
|d}|| t|S )Nr   _constrain_range_for_sizehas_free_symbolsr.   maxrH   )	shape_envallow_dynamic_output_shape_opsr   Zunique_memo%torch.fx.experimental.symbolic_shapesrn   ro   numelcreate_unbacked_symintsysmaxsizesizeint	new_emptyshapefake_devicer&   r   r;   r8   )r@   r,   r-   dimsortedreturn_inversereturn_countsnnzrn   ro   maxvalru   retZreturn_if_dim_and_cpuZinversecountsr   r   r   _unique  s<    	

,$

,

r   c                 C   s   t | ||d |||S r!   )r   )r@   r,   r-   r   r   r   r   r   r   unique2D  s    r   c                 C   s,   t | |||dkr|n|t|jd |||S )Nr   r.   )r   rq   ndim)r@   r,   r-   r~   r   r   r   r   r   r   
unique_dimK  s    r   c                 C   sJ   |d kr@| j d ks| j js"t|| j  }ddlm} || ||S )Nr   rn   )rr   rs   r   rv   rt   rn   r{   )r@   r,   ZrepeatsZoutput_sizern   r   r   r   repeat_interleave_tensor[  s    
r   c                 C   s   |j  }d k	r|S | jd ks*| jjs2| js2t|t|jrH| j }n<t|jr^| j }n&t	|jrt| j
 }ntd|j ||_ |S )Nz local_scalar_dense/item NYI for )Z	item_memorr   Zallow_scalar_outputsr
   r   dtypeZcreate_unbacked_symfloatr	   rv   r   Zcreate_unbacked_symboolNotImplementedError)r@   r,   r-   rP   r   r   r   local_scalar_densen  s"    



r   c                 C   s   | j d ks| j jst||j }d krddlm}m} || sV| dkrVd}n8| j  }t	j
d }|| st| }|||d ||_|j|| ftjdS )Nr   rm   r.   rp   )r   )rr   rs   r   Znonzero_memort   rn   ro   ru   rv   rw   rx   rz   r{   r~   r&   Zint64)r@   r,   r-   r   rn   ro   r   r   r   r   ri     s     	

ri   c                 C   sz   | j d ks| j jst|| j  }tjd }ddlm}m} ||	 sb|	 dkrbt
|	 }|||d ||fS )Nr.   r   rm      rp   )rr   rs   r   rv   rw   rx   rt   rn   ro   ru   rz   r{   )r@   r,   selfmaskr   r   rn   ro   r   r   r   masked_select  s    

r   c                 C   s   t jj| jkS r!   )r&   re   Zdata_dependent_outputrf   r+   r   r   r   rk         c                 O   s   t |d S r!   )r
   r?   r   r   r   data_dep  s    r   c                 C   s0   |D ]&}|d k	r|j tjtjfkrt| qd S r!   )r   r&   boolZuint8r   )r,   r   indicesrg   r   r   r   check_no_bool_index_tensors  s    r   c              	   C   sl   t |||dd\}}|d j}t| $ |||}t|sF||j}W 5 Q R X ||d kr`|S t| ||S NTrD   rG   )r   r   r   r    r{   r|   r   )r@   r,   rA   rB   rM   rN   rO   outr   r   r   )run_and_return_new_tensor_of_input_device  s       



r   rU   rV   Zprimc                 C   s
   | j tkS r!   )rW   _is_builtin_namespaces)r=   r   r   r   
is_builtin  s    r   c                 C   s   t j|  dS )NZMeta)r&   r'   Z._dispatch_has_computed_kernel_for_dispatch_keyrY   r+   r   r   r   r     s    c                 C   s   t | od|  kot| S )NZforeach)r   rY   r   r+   r   r   r   rk     r   c              
      s   g }t || D ]4}t|ttfrt|rt|d tjr|	| qz"t
|  |||}W 5 Q R X W n* tk
r } zt W Y S d }~X Y nX |s|S |stg }t|D ]<\ }	t| fdd|D \}
}|	| j| |	|
 q|S )Nr   c                    s   g | ]}|  qS r   r   )r#   tlir   r   
<listcomp>  s     z4foreach_run_and_map_input_device.<locals>.<listcomp>)r^   r_   r`   r/   r7   r8   r2   r&   rh   r;   r   r   NotImplementedr0   	enumerater   Z_find_common_devicerR   rS   )r@   r,   rA   rB   Ztensor_listsr-   Zout_metaZnot_implemented_errorZout_fakeZmeta_tr   rM   r   r   r    foreach_run_and_map_input_device  s8    
  r   c           	   
   O   sZ   ddl m} t|||dd\}}|d j}| " |||}||W  5 Q R  S Q R X d S )Nr   )meta_index_TensorTrD   rG   )torch._meta_registrationsr   r   r   to)	r@   r,   rA   rB   r   rM   rN   rO   r   r   r   r   index_tensor  s       


r   c              
   O   s2   ddl m} |  |||W  5 Q R  S Q R X d S )Nr   )meta_embedding_bag)r   r   )r@   r,   rA   rB   r   r   r   r   embedding_bag+  s    r   c                 O   s   t | |||S r!   )r   r?   r   r   r   multi_device_op_default4  s    r   c              	   O   s:   t |  |||}W 5 Q R X t|||dd\}}|d S r   )r   r   )r@   r,   rA   rB   r   rM   rN   r   r   r   multi_device_op_out=  s    
   
r   c                    s   t  ||dd\}}|d |d jtjkpHjdkoH dk fdd t|  ||} tjj	kr||d S |S d S )	NTrD   r`   rG   r   r.   c                      s   d  d dj  dS )NzMismatching z device between self (z) and values ()r   r   r,   Zself_devicer`   r   r   rk   U  r   z index_put_impl.<locals>.<lambda>)
r   r}   r&   _checkr   ru   r   rU   
index_put_rX   )r@   r,   rA   rB   rM   rN   r   r   r   r   index_put_implJ  s        

r   c                 O   s   t dd S )Nz3torch.compile does not support strided NestedTensor)r   r?   r   r   r   nested_tensors_unsupported_  s    r   c                 C   s0   g | ](}|t jjt jjt jjt jjfkr|qS r   )rU   r   r   prim_Device_nested_tensor_from_tensor_listrX   r   r\   r   r   r   r   j  s   r   c                 O   s   |t kstd| d S )NzNYI: )_device_not_kwarg_opsr0   r?   r   r   r   nyii  s    r   c                    s  t |||dd\}}|d j  |d j}|d jd }ddlm} ||sVd }n|dkrx|d jsx|d jsxd }nx|tj	j
krtjjf |}	nDtjj|d |d d |d |d	 |d
 |d |d |d |d d
}	tj|d |d |	}W 5 Q R X  fdd}
th |f |}|tj	j
kr@|
||W  5 Q R  S |
|d ||
|d ||
|d d fW  5 Q R  S W 5 Q R X d S )NTrD   rG   weightr   )has_hint   stridepaddingdilation
transposedoutput_paddinggroups
bias_sizes)Zbiasr   r   r   r   r   r   r   c                    s,   | d kr| S |d k	r | j |d} t|  S )Nmemory_format)r   r   )tmem_fmtr   r@   r   r   convert  s
    zconv.<locals>.convertr.   r   )r   r}   r   r|   rt   r   Z	is_mkldnnZis_xpurU   convolutionrX   r&   r'   Z_select_conv_backendZ%_conv_determine_backend_memory_formatr   )r@   r,   rA   rB   rM   kbatchr   r   Zconv_backendr   r   r   r   r   conv{  sV       


  

r   c                    sj  t |||dd\}}|d }|d }|d } fdd}|d}	|d	}
|d
}|d}|d
}|d	d
}t|d	d
}|tj|	|
|ftjdd|jd}|r|dkrdnd}t	|| }|dkrd}n|dkrd}|tj|	|
||f|j
dd|jd}n|tjd|j
dd|j}||d d |||tjdtjdd|j|tjdtjdd|j|f	S )NTrD   querykeyreturn_debug_maskc                    s   t  | |S r!   r   r   r   r@   r   r   convert_tensor  s    z6meta__scaled_dot_product_flash.<locals>.convert_tensorr   r.   r   r   rI   r   r   r   @         r   )r   ry   	transposer&   
empty_likeemptyfloatr   mathceilr   long)r@   r,   rA   rB   rM   r   r   r   r   
batch_size	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_kZquery_t	attention	logsumexpblocksize_cmax_seqlen_k
debug_maskr   r   r   meta__scaled_dot_product_flash  sl       





	
	r   c              	      sF  t |||dd\}}|d }|d }|d }|d } fdd}	|d	d
}|d	d
}|d	d
}|d}
|d	}|d	}|d}|d}|d}|	tj|
||||jdd|j}|rt|d d nd}|	tj|
||ftj	dd|j}|d	d
}|	tjdtj
dd|j}|	tjdtj
dd|j}||||fS )NTrD   r   r   valuecompute_log_sumexpc                    s   t  | |S r!   r   r   r   r   r   r     s    z:meta__scaled_dot_product_efficient.<locals>.convert_tensorr.   r   r   rI   r       r   )r   r   ry   r&   r   r   r   r   r   r   r   )r@   r,   rA   rB   rM   r   r   r   r   r   BMNr   KKvreslogsumexp_dim
logsum_expseedoffsetr   r   r   "meta__scaled_dot_product_efficient  sV       






	  r   c              	      s  t |||dd\}}|d }|d }|d }|d }|d }	|d }
|d	 } fd
d}|d krj|dn
| d }|d kr|dn|	}|d kr|dn|
}|d}|d}t|}|tj|||ftjdd|jd}|rH|dkrdnd}t	|| }|dkrd}n|dkr$d}|tj||||f|j
dd|j}n|tjd|j
dd|j}|||tjdtjdd|j|tjdtjdd|j|fS )NTrD   r   r   	cum_seq_q	cum_seq_kmax_qmax_kr   c                    s   t  | |S r!   r   r   r   r   r   r   E  s    z5meta__flash_attention_forward.<locals>.convert_tensorr   r.   r   r   rI   r   r   r   r   r   r   )r   ry   ru   r&   r   r   r   r   r   r   r   r   )r@   r,   rA   rB   rM   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   meta__flash_attention_forward5  sj       



	


	r   c              	      sz  t |||dd\}}|d }|d }|d }|d }|d }	|d }
|d	 } fd
d}|d}|d}|d}|d}|d}|d}|tj|||||jdd|j}|d k	r|dd n|}|}|d k	r|	d k	st|	}|
d k	r|
n|}|rt|d d nd}|tj|||ftj	dd|j}|tjdtj
dd|j}|tjdtj
dd|j}||||||fS )NTrD   r   r   r   cu_seqlens_qmax_seqlen_qr   r   c                    s   t  | |S r!   r   r   r   r   r   r     s    z9meta__efficient_attention_forward.<locals>.convert_tensorr   r.   r   r   rI   r   r   r   )r   ry   r&   r   r   r   r0   r   r   r   r   )r@   r,   rA   rB   rM   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zlogsumexp_batch_dimZactual_max_seqlen_qZactual_max_seqlen_kr   r   r   r   r   r   r   !meta__efficient_attention_forward}  sb       







  r   c           
      C   st   | j d ks| j jst|| j  }ddlm} || |sH|dd}|jdd  }||}||f}	||	fS )Nr   r   r.   )	rr   rs   r   rv   rt   rn   r   r|   r{   )
r@   r,   inputslengthsZbatch_firstZnew_batch_sizern   Zres_sizeZpacked_datar   r   r   r   _pack_padded_sequence  s    

r  c                    s    fdd}|S )Nc                    s   | t  < | S r!   )FAST_OP_IMPLEMENTATIONS)r<   r+   r   r   r>     s    z-register_fast_op_impl.<locals>.impl_decoratorr   )r,   r>   r   r+   r   register_fast_op_impl  s    r  c           
         s   ddl m} t| }t|}t||}dg| }t|d ddD ] |d   }|d | }|d | }	|dkrx| | nd|	dkr||	 ndt|dkp|dkpk fdd |dkrԈn| < q@t|S )Nr   )guard_size_obliviousr.   r   c                      s   d d d  dS )NzThe size of tensor a (z#) must match the size of tensor b (z) at non-singleton dimension r   r   r   r   ZsizeAZsizeBr   r   rk     r   zinfer_size.<locals>.<lambda>)rt   r  r2   rq   ranger&   r   r8   )
abr  ZdimsAZdimsBr   ZexpandedSizesr   ZdimAZdimBr   r  r   
infer_size  s(    


r	  c                    s    fdd}|S )Nc                    s   fdd}t d  }d}d}d }|D ]F}t|tjrD|jnd}	t|	dkrZd}nd}|d krj|	}t||	}q.|d k	st|D ]0}t|tjrt|jt|kr|j|kr qq|dS td	}
|
}d }d }d}|D ]T}t|tjsd}q||
kr|jj	d	ks|j}|d kr$|j
}q||j
krd}q|rNt|d
tji\}}d}d}|D ]t}t|tjspqZ||
kr| dkr|j|
kr||kr|d  S |d7 }n|j|krZ|d  S qZd}d}t|r(|D ]@}t|tjsq|o|jtjd}|o"|jtjd}q|rTt d ttj||dtjd|dS |rt d ttj||dtjd|dS |dS )Nc              
      s4   t d|     W  5 Q R  S Q R X d S )Nzslow r   )msg)rA   rB   modeslow_refr   r   slow  s    z=make_fast_binary_impl.<locals>.fast_binary_impl.<locals>.slowzattempt fastFr   r   Tz#both tensors nontrivially broadcastrH   Ztype_promotion_kindr.   errorr   zfast is_contiguousrI   )r   r   r   r   zfast channels_lastzno contiguity match)r   r/   r&   rh   r|   r2   r	  r0   r   r   r   r   r   DEFAULTr~   r    is_contiguousZcontiguous_formatZchannels_lastr   r   )r  rA   rB   r  ZoperandsZhas_scalarsZhas_tensorsZfinal_shaper=   r|   rH   Zcommon_deviceZcommon_dtypeZoutput_dtypeZhas_different_input_dtypesrM   Zcurrent_cpu_scalars_on_non_cpuZmax_cpu_scalars_on_non_cpur  Zis_channels_lastr  )rA   rB   r  r   fast_binary_impl  s    





$






z/make_fast_binary_impl.<locals>.fast_binary_implr   )r  r  r   r  r   make_fast_binary_impl  s     r  c                  C   s|   dd l } t| jjjjt| jj t| jjjjt| jj t| jjj	jt| jj	 t| jjj
jt| jj
 tS )Nr   )Ztorch._refsr  opsrU   addrh   r  Z_refssubmuldivr  )r&   r   r   r   r     s    


)TFF)TFF)TFF)N)	functoolsr^   r   rw   typingr   r   r&   Ztorch._custom_opZtorch._loggingZ
torch._opsr   Ztorch._prims_commonr   r   r   r   r	   Ztorch._subclasses.fake_tensorr
   r   r   r   r   r   Ztorch.fx.operator_schemasr   Ztorch.utils._statsr   utilsZ_pytreera   __all__r6   r   Z_opsr  rU   r   r    r   rX   r   Z	full_likeZ	ones_likeZ	rand_likeZ
randn_likeZrandint_likeZ	low_dtypeZlow_dtype_outZ
zeros_liker{   Znew_empty_stridedZnew_fullZ	new_zerosZnew_onesrK   Z_resize_output_r   Z
pin_memory	is_pinnedr   r   r   Z_pin_memoryZ_resize_outputr   Z_list_to_tensorrJ   r"   	lru_cacher3   r   r9   __contains__rC   rQ   rT   r   rb   rc   rd   rl   r   Z_unique2r   r   rj   rh   r   Z_local_scalar_denser   ri   r   r   r   r   r   r   r   r   rg   r   Z_embedding_bagr   Z_unsafe_index_putcopyZcopy_Zslice_scatterr   r   Z	index_putr   r   Z_nested_view_from_bufferZ_nested_view_from_buffer_copyr   r   r   Zconvolution_backwardr   Z#_scaled_dot_product_flash_attentionr   Z'_scaled_dot_product_efficient_attentionr   Z_flash_attention_forwardr   Z_efficient_attention_forwardr   r  r  r  r	  r  r   r   r   r   r   <module>   sL   
 








     
?
     
     


(



$

















<

F

5

G

:
	% 
