U
    yh&                     @   s  U d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ dZe eZe jed< eejj eejj eejj ejjdd	d
ZejjedddZejjeeejj eegef ddddZG dd dejjZejjeeef dddZ eejj edddZ!dd dd dd fejjeejjgef eeejj gef eegef ejjdddZ"dS )    N)CallableListOptionalSetTuple)make_fx)compile_fx_inner)select_decomp_table
   logger)inputsbodyoutputsreturnc           	         s   t j }i  t| D ]*\}}|jd| d}|j|_| |< q|D ]"}|j| fddd}| |< qF|jt fdd|D d |	  |
  t jji |d	S )
NZarg_)namec                    s    |  S N xZnode_to_subgraph_noder   W/var/www/html/venv/lib/python3.8/site-packages/torch/distributed/_spmd/partial_lower.py<lambda>!       z)_create_subgraph_module.<locals>.<lambda>)Zarg_transformc                 3   s   | ]} | V  qd S r   r   ).0r   r   r   r   	<genexpr>%   s     z*_create_subgraph_module.<locals>.<genexpr>)result)rootgraph)torchfxZGraph	enumerateplaceholdermetaZ	node_copyoutputtupleZeliminate_dead_codelintGraphModule)	r   r   r   subgraphidxZinpZsubgraph_inpnodeZsubgraph_noder   r   r   _create_subgraph_module   s     

 

r*   r)   r   c                 C   sX   t dd | jD rTtdd | jD sPtdj|  ddd | jD ddS d	S )
Nc                 s   s   | ]}|j tjkV  qd S r   targetoperatorgetitemr   userr   r   r   r   ,   s     z%_is_container_node.<locals>.<genexpr>c                 s   s   | ]}|j tjkV  qd S r   r,   r0   r   r   r   r   -   s     zmMalformed graph: a container node is used as input for non-getitem nodes.
Node: {fmt_node}
Users: {fmt_users}
c                 s   s   | ]}|  V  qd S r   )format_node)r   ur   r   r   r   1   s     )Zfmt_nodeZ	fmt_usersTF)anyusersallAssertionErrorformatr3   joinr)   r   r   r   _is_container_node+   s    r<   )gmsubgraph_namesubgraph_nodesdumperr   c              	      s  g g g  t  tjjd dfdd}D ]\}|jdkrX| | q4|jD ]}|kr^|| q^|kr4 | | q4g tjjd d fdd} D ]$}tfdd	|j	D s|| qt
t
t kstt
t
t kstt }|t|j}	t| |t| d
 j}
D ]}|
| qF| j|
` | jjd|td|	id}tD ]4\}}| jjdtj||fd}|j|_|| qW 5 Q R X t  D ]"}t
|j	dkr| j| qd S )N)argr   c                    sl   | g}t |dkrh| }t|rJfdd|jD  ||j q|kr | | qd S )Nr   c                 3   s   | ]}| kr|V  qd S r   r   r0   )r?   r   r   r   L   s      z;_lower_subgraph_nodes.<locals>.add_input.<locals>.<genexpr>)lenpopr<   extendr6   appendadd)rA   stackr)   )r   prologuer?   visibler   r   	add_inputF   s    
z(_lower_subgraph_nodes.<locals>.add_inputZget_attr)r#   r   c                    sl   | g}t |dkrh| }t|r< |j ||j qtfdd|jD s|kr| qd S )Nr   c                 3   s   | ]}| kV  qd S r   r   r0   rI   r   r   r   o   s     z<_lower_subgraph_nodes.<locals>.add_output.<locals>.<genexpr>)rB   rC   r<   rD   r6   r7   rE   )r#   rG   r)   )r   r   rI   r   r   
add_outputh   s    z)_lower_subgraph_nodes.<locals>.add_outputc                 3   s   | ]}| kV  qd S r   r   r0   rK   r   r   r   t   s     z(_lower_subgraph_nodes.<locals>.<genexpr>Zcall_moduletag)opr-   argskwargsZcall_function)rO   r-   rP   r   )setr   r   NoderO   rE   rF   Zall_input_nodesr7   r6   rB   r8   r*   strr   setattr_InductorModulenextprependZinserting_beforeZcreate_noder$   r    r.   r/   r"   Zreplace_all_uses_withreversedZ
erase_node)r=   r>   r?   r@   rJ   r)   rA   rL   Zsubgraph_moduleZreadable_tagZinsertion_pointZsubgraph_callr(   r#   Z
new_outputr   )r   r   r   rH   r?   rI   r   _lower_subgraph_nodes8   s^    







rZ   c                       sB   e Zd Zejjdd fddZejee	ej dddZ
  ZS )rV   N)r=   r   c                    s   t    || _d | _d S r   )super__init__r=   compiled)selfr=   	__class__r   r   r\      s    
z_InductorModule.__init__)rP   rN   r   c             
   G   s   | j d krNt }t| j|d| }td| t|t|dd| _ td| tj	
|* | j d k	sjt|  t|W  5 Q R  S Q R X d S )N)Zdecomposition_tablez%Lowering subgraph (%s) to Inductor...F)Z
cudagraphsz,Completed lowering subgraph (%s) to Inductor)r]   r	   r   r=   r   infor   listr   ZprofilerZrecord_functionr8   )r^   rN   rP   Zinductor_decompositionsZ	decomp_gmr   r   r   forward   s    
z_InductorModule.forward)__name__
__module____qualname__r   r   r&   r\   ZTensorrT   r   rc   __classcell__r   r   r_   r   rV      s   rV   c                 C   s   | j tjjjjtjjjjtjjjjtjjj	jfkr6dS | j tjjj
jkrLdS t| j tjjr~| j tjjjs~d| j  dfS dS )N)Fzfused adam is not supported yetT Fz& doesn't have a meta kernel registered)r-   r   ZopsZatenZ_fused_adam_defaultZ_fused_adamZ_foreach_add_ZScalarZ_foreach_addflattenZ
using_ints
isinstanceZ_opsZ
OpOverloadZhas_kernel_for_dispatch_keyZ_CZDispatchKeyZMetar;   r   r   r   _is_inductor_compatible   s    



rm   )nodesr   c                 C   s   t dd | D }|tkS )Nc                 S   s    g | ]}t |jd r|qS )zaten.)rT   r-   
startswith)r   nr   r   r   
<listcomp>   s      z'_subgraph_predicate.<locals>.<listcomp>)rB   MIN_ATEN_OPS_TO_LOWER)rn   Znum_aten_opsr   r   r   _subgraph_predicate   s    rs   c                 C   s   dS NTr   r   r   r   r   r      r   r   c                 C   s   dS rt   r   r   r   r   r   r      r   c                 C   s   dS )Nr'   r   r   r   r   r   r      r   )r=   node_predicatesubgraph_predicater@   r   c                    s   g g}t t| jj}tjjttt	f d fdd}|j
dkr|j
dkrR|j }q6||\}}|rr|d | n,t|d dkrtdt	|| |g  |j }q6fd	d
|D }t|D ] \}	}
d|	 }t| ||
| q| j  |   | S )a  
    Lower Inductor compatible portions of the graph module to Inductor.

    Args:
        node_predicate: user predicate for determining whether to consider a node for
            lowering.
        subgraph_predicate: user predicate for determining whether to consider a list of
            candidate nodes for lowering.
        dumper: a callback for dumping subgraphs for human digestion. For exmaple, it
            can be a function that writes to disk/blob storage and returns the
            path/handle. The returned path/handle for each subgraph will be made
            available in the subgraph call node in the parent graph, as well as the
            label of the profiler block for the subgraph.
    r+   c                    s(   t | \}}|s||fS  | s$dS dS )N)Fzuser predicaterh   )rm   )r)   should_lowerreason)ru   r   r   _node_predicate   s    z&partial_lower.<locals>._node_predicater#   r!   rM   r   z,partial_lower: graph break at %s. Reason: %sc                    s    g | ]} |rt |r|qS r   )rs   )r   rn   )rv   r   r   rq      s    z!partial_lower.<locals>.<listcomp>Z	subgraph_)rW   iterr   rn   r   r   rS   r   boolrT   rO   rE   rB   r   warningr    rZ   r%   Z	recompile)r=   ru   rv   r@   Znodes_per_subgraphZptrry   rw   rx   r(   r?   r>   r   )ru   rv   r   partial_lower   s6     

  



r}   )#loggingr.   typingr   r   r   r   r   r   Z	functorchr   Ztorch._inductor.compile_fxr   Ztorch._inductor.decompositionr	   rr   	getLoggerrd   r   Logger__annotations__r   rS   r&   r*   r{   r<   rT   rZ   nnModulerV   rm   rs   r}   r   r   r   r   <module>   sD    
 
 

b