U
    zh                     @   s  d dl Z d dlmZmZmZmZmZmZ d dlZd dl	m
Z
 ejedZe jddG dd dZe jddG d	d
 d
Zejjeejj dddZejjee dddZeedddZeejj ee edddZeeej eejgef ee dddZee dddZeejejjf ee dddZ eejejjf d d!d"Z!d#d$ Z"e jG d%d& d&Z#ejj$ee ee d'd(d)Z%ejjee d*d+d,Z&dS )-    N)AnyCallableDictListOptionalTuple)countersZ
perf_hintsT)frozenc                   @   s   e Zd ZU dZeed< dS )
FunctionIDz9Unique counter of a function wrapped in cudagraphify_implidN)__name__
__module____qualname____doc__int__annotations__ r   r   Q/var/www/html/venv/lib/python3.8/site-packages/torch/_inductor/cudagraph_utils.pyr
      s   
r
   c                   @   sd   e Zd ZU dZedef ed< ee ed< e	ed< e
ejdf ed< eejj ed< ee ed< d	S )
WrappedFunctionz
    Represents a function that you want to record for CUDA graph replay,
    with a little more metadata so we can identify if we have an applicable
    CUDA graph in our CUDA graph tree for it.
    .modelstatic_input_idxsr   	constantsplaceholdersmutated_input_idxsN)r   r   r   r   r   r   r   r   r   r
   r   torchTensorfxNoder   r   r   r   r      s   
r   )graphreturnc                 C   s   dd | j D S )Nc                 S   s   g | ]}|j d kr|qS placeholderop.0noder   r   r   
<listcomp>"   s     
 z$get_placeholders.<locals>.<listcomp>)nodes)r   r   r   r   get_placeholders!   s    r)   )placeholder_noder   c                 C   sb   t | jdkr&tt| jjdd S | jD ]0}|jtjj	j
jkr,|jdd  }r,|  S q,d S )N   stack_trace)lenusersnextitermetagettargetr   ZopsZatenZcopy_default)r*   user,   r   r   r   get_mutating_use_stack_trace%   s    

r6   )reasonr   c                 C   s
   d|  S )Nzskipping cudagraphs due to r   )r7   r   r   r   format_default_skip_message2   s    r8   )r   mutation_indicesr   c                 C   sP   d}|D ]}| | }t | }r q&qtdt| d}|rL| d| S |S )N zmutated inputs (z instances). Found from : 
 )r6   r8   r-   )r   r9   r,   idxr!   msgr   r   r   get_mutation_stack_trace6   s    r>   )funcinputsis_cuda_graph_recorded_tensorr   c                    s@   t jjjjr& fdd jD }n j}|r<t j|S d S )Nc                    s&   g | ]}| j ks| s|qS r   )r   r%   r<   r?   r@   rA   r   r   r'   Q   s   
z&check_for_mutation.<locals>.<listcomp>)r   	_inductorconfigtritoncudagraph_treesr   r>   r   )r?   r@   rA   r9   r   rC   r   check_for_mutationI   s    	rH   )r   c                 C   s*   | j D ]}|jdd  }r|  S qd S )Nr,   )r.   r1   r2   )r&   r5   r,   r   r   r   get_use_stack_tracec   s    

rI   )device_node_mappingr   c                 C   s   |  td }rHd|j d}t| }r@t| d| S t|S t| dkrntt| 	 j
dkrnd S dd | 	 D }td	d
| S )Ncpuzcpu device ()r;   r+   cudac                 s   s   | ]}t |V  qd S N)repr)r%   keyr   r   r   	<genexpr>z   s     z:check_multiple_devices_or_any_cpu_nodes.<locals>.<genexpr>zmultiple devices: z, )r2   r   devicenamerI   r8   r-   r/   r0   keystypejoin)rJ   Zcpu_noder=   r,   Z	keys_reprr   r   r   'check_multiple_devices_or_any_cpu_nodesj   s    
rW   rJ   c                 C   s   t | S rN   )rW   rX   r   r   r    check_lowering_disable_cudagraph~   s    rY   c                 C   s"   t |  td d  d7  < d S )NZinductorZcudagraph_skipsr+   )perf_hint_logwarningr   )r=   r   r   r   #log_cudagraph_skip_and_bump_counter   s    
r\   c                   @   s,   e Zd ZU ee ed< ee dddZdS )BoxedDeviceIndexvalue)
device_idxc                 C   s    |d kst |tst|| _d S rN   )
isinstancer   AssertionErrorr^   )selfr_   r   r   r   set   s    zBoxedDeviceIndex.setN)r   r   r   r   r   r   rc   r   r   r   r   r]      s   
r]   )gmr   r   c                    s~   t d}tjjjjr`t|  fdd|jD }t|dk}|sDd S dd | j	j
D }t||S t|jdk}|svd S |S d S )Nzmutated inputsc                    s   g | ]}| kr|qS r   r   rB   Zunique_idxsr   r   r'      s     zGcheck_for_mutation_ignore_cuda_graph_managed_tensor.<locals>.<listcomp>r   c                 S   s   g | ]}|j d kr|qS r    r"   r$   r   r   r   r'      s     
 )r8   r   rD   rE   rF   rG   rc   r   r-   r   r(   r>   Zmutated_inputs)rd   Zcompiled_graphr   Zdefault_msgr9   Zhas_mutationr   r   re   r   3check_for_mutation_ignore_cuda_graph_managed_tensor   s    

rf   )r!   r   c                 C   s,   | j r| j S | jD ]}|j r|j   S qdS )zM
    Gets the first non-empty stack trace of a placeholder or its users.
    N)r,   r.   )r!   userr   r   r   get_placeholder_stack_trace   s    
rh   )'dataclassestypingr   r   r   r   r   r   r   Ztorch._dynamo.utilsr   Z_loggingZgetArtifactLoggerr   rZ   	dataclassr
   r   r   ZGraphr   r)   strr6   r8   r   r>   r   boolrH   rI   rR   rW   rY   r\   r]   ZGraphModulerf   rh   r   r   r   r   <module>   sD    


 	 