U
    yhB                     @   s4  d dl mZmZmZmZmZ d dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZ ejjZeejjejjejjgee	ddd	Zeejjejjejjgee	dd
dZeejj ejj ejj ej!j gee	dddZ"eej#j ej$j gedddZ%eej&j'gedddZ(eej)jgedddZ*eej+jee	dddZ,eej-jee	dddZ.eej/jee	dddZ0eej1j2ee	dddZ3eej4jee	dddZ5eej6jee	ddd Z7eee2 ee d!d"d#Z8eej9jee	dd$d%Z:dS )&    )castListOptionalSequenceTupleN)OpSchemaOutputSharding)pointwise_rule)register_prop_rule)_PartialDTensorSpec	Placement	ReplicateShard
TensorMeta)	op_schemareturnc                 C   s4   | j d }t|tr&tdd |D s*tt|dS )Nr   c                 s   s   | ]}t |tV  qd S N
isinstancer   .0s r   Z/var/www/html/venv/lib/python3.8/site-packages/torch/distributed/_spmd/experimental_ops.py	<genexpr>   s     z'_prop__foreach_unaop.<locals>.<genexpr>output_specargs_schemar   listallAssertionErrorr   )r   selfr   r   r   _prop__foreach_unaop   s    
 r$   c                 C   s   | j d d \}}t| j dk r$d n| j d }t|trJtdd |D sXtd| t|trttdd |D std| t|t|kstdt| dt| td	d t||D rtd t	| j
|r|||fn||f| jd
dS t|dS d S )N      c                 s   s   | ]}t |tV  qd S r   r   r   r   r   r   r   /   s    z,_prop__foreach_binop_list.<locals>.<genexpr>z#Expect a List[DTensorSpec] but got c                 s   s   | ]}t |tV  qd S r   r   )r   or   r   r   r   2   s    z/Two tensor lists must match in length, but got z and c                 s   s   | ]\}}||kV  qd S r   r   )r   r   r'   r   r   r   r   :   s     opr   kwargs_schemar   redistribute_schemar   r   lenr   r    r!   r"   anyzipr   r   r)   r*   )r   r#   otherscalarr   r   r   _prop__foreach_binop_list%   s2    

	r3   c                 C   sB   | j \}}t|tr&tdd |D s*tt|tr8tt|dS )Nc                 s   s   | ]}t |tV  qd S r   r   r   r   r   r   r   T   s     z._prop__foreach_binop_scalar.<locals>.<genexpr>r   r   )r   r#   r2   r   r   r   _prop__foreach_binop_scalarJ   s    	
 r4   r   c                 C   s   | j d d \}}}t| j dk r&d n| j d }t|trLtdd |D sPtt|trltdd |D sptt|trtdd |D sttdd t|||D rtd t	| j
|r||||fn|||f| jdd	S t|d
S d S )Nr&      c                 s   s   | ]}t |tV  qd S r   r   r   r   r   r   r   b   s     z/_prop__foreach_addcop_scalar.<locals>.<genexpr>c                 s   s   | ]}t |tV  qd S r   r   r   r   r   r   r   c   s     c                 s   s   | ]}t |tV  qd S r   r   r   r   r   r   r   d   s     c                 s   s$   | ]\}}}||kp||kV  qd S r   r   )r   r   t1t2r   r   r   r   e   s     r(   r+   r   r-   )r   r#   Ztensor1Ztensor2r2   r   r   r   _prop__foreach_addcop_scalarY   s"       r9   c                 C   s4   | j \}}t|tr&tdd |D s*tt|dS )Nc                 s   s   | ]}t |tV  qd S r   r   r   r   r   r   r   z   s    z7_prop__foreach_pow_scalar_and_tensor.<locals>.<genexpr>r   r   )r   scalaexponentr   r   r   $_prop__foreach_pow_scalar_and_tensorw   s
    
r<   c                    s   d} j d | }tdd |D s(ttdd |D s>tdd |D tfddD s|tdd	d D  d
tdd t D rt fdd|D }td t j| j |d    j	ddS t j d f| dS d S )N   c                 s   s   | ]}t |tV  qd S r   )r   r    r   schemar   r   r   r      s     z$_prop__fused_adam.<locals>.<genexpr>c                 s   s"   | ]}|D ]}t |tV  q
qd S r   r   )r   r?   r   r   r   r   r      s      c                 S   s   g | ]}t |r|qS r   r.   r>   r   r   r   
<listcomp>   s     z%_prop__fused_adam.<locals>.<listcomp>c                 3   s"   | ]}t |t  d  kV  qdS r   Nr@   r   )tensor_schemasr   r   r      s     z8expect the same number of gradients and states, but got c                 S   s   g | ]}t |qS r   r@   r   r   r   r   rA      s     .c                 3   s$   | ] t  fd d D V  qdS )c                 3   s   | ]}| d  kV  qdS rB   r   r   ttsr   r   r      s     z._prop__fused_adam.<locals>.<genexpr>.<genexpr>N)r/   )r   r   rG   r   r      s     c                 3   s$   | ]}t |r jd  n|V  qdS rB   )r.   r   r   r5   r   r   r      s    r(   r+   r   r   )
r   r!   r"   r/   r0   tupler   r   r)   r*   )r   ZNTZtesnor_list_argsZnew_schemasr   )r   rC   r   _prop__fused_adam   s2    	rJ   c                 C   s   | j d d \}}t|ts tt|ts.t|j|jkrzt|j|j|jd}td t| j	||f| j dd   | j
ddS tt|jt fdt|jt fdfdS d S )Nr%   mesh
placementstensor_metar(   r+   rL   rM   r   )r   r   r   r"   rM   rL   rN   r   r   r)   r*   r   r   )r   r#   targetZnew_selfr   r   r   _prop_nll_loss_forward   s,    	rQ   c                 C   s8   | j d d \}}t|ts tt|ts.tt|dS )Nr%   r   )r   r   r   r"   r   )r   Zgrad_outputr#   r   r   r   _prop_nll_loss_backward   s    rR   c                    s   | j d t| j dkrdntt| j d  ttrBtdksJtdtdd D sjtd tfddD std d	tfd
dD std d	t fddd jD stdt	t
d jd jddS )Nr      z#expect at least one tensor to stackc                 s   s   | ]}t |tV  qd S r   r   rE   r   r   r   r      s    z_prop_stack.<locals>.<genexpr>z'expect a list of DTensorSpecs, but got c                 3   s   | ]}|j  d  j kV  qdS rB   )shaperE   tensorsr   r   r      s    z3expect all tensors to have the same shape, but got rD   c                 3   s   | ]}|j  d  j kV  qdS rB   )rM   rE   rU   r   r   r      s    z8expect all tensors to have the same placements, but got c                 3   s   | ]}|   V  qd S r   Zis_shardr   pdimr   r   r      s    z4DTensor does not support stack on sharded dimension.rO   r   )r   r.   r   intr   r    r"   r!   rM   r   r   rL   r5   r   )r[   rV   r   _prop_stack   s>    
"






r]   c                    s   | j d d \} t|ts tt ts.t|j}t fdd|D sRtdg }|D ]8}t|tr|j kr|	t|jd  qZ|	| qZt
t|jt|ddS )Nr%   c                 3   s   | ]}|   V  qd S r   rW   rX   rZ   r   r   r      s    z_prop_select.<locals>.<genexpr>z5DTensor does not support select on sharded dimension.rS   rO   r   )r   r   r   r"   r\   rM   r!   r   r[   appendr   rL   rI   )r   ZtensorrM   Znew_placementsrY   r   rZ   r   _prop_select   s"    
r_   c                    s   | j \}}}}}t|tstt|ttfs0t|d k	r^t|tsFttdd |jD s^t|d k	rt|tstttdd |jD stt|j	t|  t fdd|jD stt|j
|jd}t|||fdS )Nc                 s   s   | ]}t |tV  qd S r   r   r   rX   r   r   r   r   
  s     z*_prop_native_layer_norm.<locals>.<genexpr>c                 s   s   | ]}t |tV  qd S r   r`   rX   r   r   r   r     s     c                 3   s.   | ]&}t |tp$t |to"|j k fV  qd S r   )r   r   r   r[   rX   Z
batch_ndimr   r   r     s   rO   r   )r   r   r   r"   rI   r    r!   rM   r.   rT   rL   r   )r   inputnormalized_shapeweightbiasepsZ
stats_specr   ra   r   _prop_native_layer_norm  s$    rg   c                 C   s<  | j \}}}}}}}}t|ts$tt|ttfs6t|d k	rdt|tsLttdd |jD sdt|d k	rt|tszttdd |jD sttdd |jD std|j |rt|j	tt
 g|j	j dnd }	|rt|j	tt
 g|j	j dnd }
t|d r|nd |d r"|	nd |d	 r2|
nd fd
S )Nc                 s   s   | ]}t |tV  qd S r   r`   r   r   r   r   r   +  s     z3_prop_native_layer_norm_backward.<locals>.<genexpr>c                 s   s   | ]}t |tV  qd S r   r`   r   r   r   r   r   .  s     c                 s   s"   | ]}t |to|jd kV  qdS rB   )r   r   r[   r   r   r   r   r   1  s    zGot rO   r   rS   r%   r   )r   r   r   r"   r    rI   r!   rM   r/   rL   r   ndimr   )r   Zgradrb   rc   Zresult1Zresult2rd   re   Zgrad_input_maskZweight_gradZ	bias_gradr   r   r    _prop_native_layer_norm_backward  sV    

ri   )r   
active_dimr   c                 C   s   g }| j dd D ]z}t|tr*|jdk	s.t|t|j|jt|dk	rpt	
|jd| d |j|d d  n|j|jj|jjdd qt| j|i d} t| d	d
}|jrt|jtst|jjS |jdk	st|jj d }t|tstt|jS dS )zvConsiders 2 first inputs of op_schema as having same shape, and returns suggested placement for a pointwise operation.Nr%   r   )rS   rS   )rT   stridedtyperK   r(   F)Z	linearity)r   r   r   rN   r"   r^   rL   rM   r   torchSizerT   rk   rl   r   r)   r	   r   r,   rI   )r   rj   r   r   Zoutput_shardingZ
out_schemar   r   r   _refine_shardingO  s>    "ro   c              
   C   sd  d}| j |t| j d   \}}}}}}t|ts6tt|tsDtt|tsRt|dk rd||j7 }|j| |j| kr|dkst||j| kstd }tt	| |}t
|D ]&\}	}
t|
tr|
j|krt ||	< qt|}|t|jkr|jt|jkrtt|j|jddS td t| jt|j||jdt|j||jdf| j dd   | jddS d S )	N)NNr   NNrS   r   rO   r   rK   r%   r(   r+   )r   r.   r   r   r"   r\   rh   rT   r    ro   	enumerater   r[   r   rI   rM   r   rL   r   r)   rN   r*   )r   defaultsrb   srcr[   startendstepZinput_suggestionirY   r   r   r   prop_slice_scatterz  s\    

rw   );typingr   r   r   r   r   rm   Z$torch.distributed._tensor._op_schemar   r   Z*torch.distributed._tensor.ops.common_rulesr	   Z#torch.distributed._tensor.ops.utilsr
   Z)torch.distributed._tensor.placement_typesr   r   r   r   r   r   ZopsZatenZ_foreach_negdefaultZ_foreach_reciprocalZ_foreach_sqrtr$   Z_foreach_addZ_foreach_divZ_foreach_mulr3   ZScalarZ_foreach_subr4   Z_foreach_addcdivZ_foreach_addcmulr9   Z_foreach_powZScalarAndTensorr<   Z_fused_adamrJ   Znll_loss_forwardrQ   Znll_loss_backwardrR   stackr]   selectr\   r_   Znative_layer_normrg   Znative_layer_norm_backwardri   ro   Zslice_scatterrw   r   r   r   r   <module>   sp    	"
#




4 +
