U
    zh                      @   sP  d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZ d d	lmZ d
dlmZ dd Z ej!j"dddZ#ej!j"ee$ dddZ%ej!j"ee$ dddZ&e'dddZ(eee$  dddZ)dd Z*G dd dZ+ede+ d d#d!d"Z,dS )$    N)defaultdict)DictListOptional)config)aot_autograd)	boxed_nop)BoxedDeviceIndex'check_multiple_devices_or_any_cpu_nodesformat_default_skip_messageget_mutation_stack_traceget_placeholders#log_cudagraph_skip_and_bump_counter)	BoxedBoolcount_tangents%get_first_incompatible_cudagraph_nodenum_fw_fixed_argumentsoutput_node)StorageWeakRef   )register_backendc                 C   s  dd }t t}d}t }| jD ]}|jdkrft||jtjr\|t||j	  
| |d7 }q |jdkr |jtjkr~q |jj}t|jD ]p\}}|t|jk r|j| }	n|j|jkrq|j|j }	d}
|jr|jjrd}
|
r||t||	j	  O }qq |S )	Nc                 S   s   d| kr| d S | d S )NvalZfake_result )metar   r   S/var/www/html/venv/lib/python3.8/site-packages/torch/_dynamo/backends/cudagraphs.pymeta_fk!   s    z%find_input_mutations.<locals>.meta_fkr   placeholderr   Zcall_functionFT)r   setnodesop
isinstancer   torchTensorr   Z_typed_storageaddtargetoperatorgetitemZ_schema	enumerate	argumentslenargsnamekwargsZ
alias_infoZis_write)gr   inputsZ	input_idxZmutated_inputsnZschemaiargargumentZmut_argr   r   r   find_input_mutations    s8    




r3   )gmc                 C   sD   i }| j jD ]2}|jdd }t|tjr|j|kr|||j< q|S )Nr   )graphr   r   getr    r!   r"   device)r4   Zdevice_node_mappingr/   tr   r   r   get_device_node_mappingF   s    r9   )	aot_modelreturnc                 C   s:   t | jtt| }|sd S dd | jjD }t||S )Nc                 S   s   g | ]}|j d kr|qS )r   )r   ).0noder   r   r   
<listcomp>V   s     
 zGcheck_for_mutation_ignore_cuda_graph_managed_tensor.<locals>.<listcomp>)r3   r5   r   ranger   r   )r:   	num_fixedZmutation_indicesplaceholdersr   r   r   3check_for_mutation_ignore_cuda_graph_managed_tensorO   s
    rB   c                 C   sN   t jst| | }r|S tt|  }r,|S t|  }rJtd|j dS d S )Nzincompatible op ())r   Z(cudagraph_backend_support_input_mutationrB   r
   r9   r   r   r+   )r:   r@   Zmut_skipskipr=   r   r   r   check_for_skipZ   s     rE   )r;   c                 C   s$   t tt| }|jdkst|jS )Ncuda)nextiterr9   typeAssertionErrorindex)r4   r7   r   r   r   get_device_indexl   s    rL   c                 C   s.   t | }t|jdkstdd |jd D S )Nr   c                 S   s&   g | ]}t |tjjjr|jnd qS N)r    r!   fxr=   NodeZstack_trace)r<   r1   r   r   r   r>   u   s   z$get_stack_traces.<locals>.<listcomp>r   )r   r)   r*   rJ   )r4   outputr   r   r   get_stack_tracesr   s
    rQ   c                    sj   ddl m tdtd  d fdd	} fdd}t||tj|dd	tjj	j
d
}|| S )Nr   )cudagraphify_implTFc                    s   t | |}ttt|}t| | }rFt td|  |S  t|  ||t	| j
ddt| t| jt| jd	}d|_|S )Nzskipping cudagraphs due to FZdevice_indexZis_backwardis_inferenceZstack_tracesrA   Zmutated_input_idxsT)r   r   r)   rE   r   disabler   r   rL   r?   valuerQ   r   r5   r3   _boxed_call)r:   
aot_inputsrT   interpfixedskip_msgoutboxed_device_indexrR   do_cudagraphsdynamo_inputsr   r   forward_cudagraphs   s,    

z&cudagraphs.<locals>.forward_cudagraphsc                    s   t  |}s S t }t | }rjtd| tjjjjddd k	sRt	 fdd}d|_
|S ||t|t ddt t jt jd	}d|_
|S )Nzskipping cudagraphs due to %sF)Zcreate_if_none_existsc                    s       | S rM   )Zset_to_running_backward)r.   r:   managerr   r   fn   s    z3cudagraphs.<locals>.backward_cudagraphs.<locals>.fnTrS   )r   r   rE   r   r!   Z	_inductorZcudagraph_treesZget_managerrV   rJ   rW   r?   rL   rQ   r   r5   r3   )r:   rX   rY   rZ   r[   rd   r\   )r^   rR   r_   rb   r   backward_cudagraphs   s<    
  z'cudagraphs.<locals>.backward_cudagraphs)rT   )Zfw_compilerZbw_compilerZinference_compilerZkeep_inference_input_mutations)F)torch._inductor.cudagraph_treesrR   r   r	   r   	functoolspartialr!   Z_dynamor   Z%cudagraph_backend_keep_input_mutation)Zdynamo_modelr`   ra   re   Zaot_cudagraphsr   r]   r   
cudagraphs{   s    &ri   c                   @   s(   e Zd ZdZedd Zedd ZdS )CudagraphsBackendri   c                  C   s   ddl m}  |   d S )Nr   reset_cudagraph_trees)rf   rl   rk   r   r   r   reset   s    zCudagraphsBackend.resetc                 C   s
   t | |S rM   )ri   )modelr.   r   r   r   __call__   s    zCudagraphsBackend.__call__N)__name__
__module____qualname__Zcompiler_namestaticmethodrm   ro   r   r   r   r   rj      s
   
rj   )r+   Zcompiler_fnTc              	      s   t |ttfst r&dd |D nt|tj  tj }|tj	  tj
| | |  W 5 Q R X |  tj	 | tj  tj tjj|d |  W 5 Q R X t ttfsֈf fdd}|S )zBThis isn't registered as a backend, but is used in some benchmarksc                 S   s   g | ]}t |qS r   )r!   Z
zeros_liker<   xr   r   r   r>      s     z$cudagraphs_inner.<locals>.<listcomp>)streamc                     sX   t t | kst r6t| D ]\}}|| q"  rPdd D S S d S )Nc                 S   s   g | ]}|  qS r   )clonert   r   r   r   r>      s     z1cudagraphs_inner.<locals>.run.<locals>.<listcomp>)r)   rJ   zipZcopy_Zreplay)Z
new_inputsdstsrccopy_inputscopy_outputsr5   Zstatic_inputsZstatic_outputsr   r   run   s    zcudagraphs_inner.<locals>.run)r    listtuplerJ   r!   rF   ZsynchronizeZStreamZwait_streamZcurrent_streamrv   Z	CUDAGraphr5   )rn   r.   r}   r|   rv   r~   r   r{   r   cudagraphs_inner   s&    



r   )TT)-rg   r%   collectionsr   typingr   r   r   r!   Ztorch._dynamor   Ztorch._dynamo.backends.commonr   Z torch._dynamo.backends.debuggingr   Ztorch._inductor.cudagraph_utilsr	   r
   r   r   r   r   Ztorch._inductor.utilsr   r   r   r   r   Z torch.multiprocessing.reductionsr   registryr   r3   rN   ZGraphModuler9   strrB   rE   intrL   rQ   ri   rj   r   r   r   r   r   <module>   s.    &
	N