U
    yhca                     @   sn  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlZd dlZd dlZedZddddgZd]d
dZedd Zd^ddZdd Zdd Zd_ddZdd ZefddZefddZefddZdd ZG dd dZdd Zd d! Zd"Zd#ee d$Z d%d& Z!d'd( Z"d)Z#d*d+ Z$d`d,d-Z%d.d/ Z&dad0d1Z'dbe	d2d3d4Z(e)d5krjd dl*Zej+,ej+-e.Z/e/ej+krej+0e/ d dl1Z1d6Z2d7e2 Z3e1j4d8e2 d9Z5e5j6d:d;Z7d<d= Z8d>Z9e7j:d?e9d9Z;e;j<d@e3dA dBZ9e7j:d(e9d9Z=e=j<d@e3dA dCZ9e7j:de9d9Z>e>j<d@e3dA e8e> dDZ9e7j:de9d9Z?e?j<d@e3dA e8e? dEZ9e7j:de9d9Z@e@j<dFe3dA e@j<dGe3dA e8e@ dHZAeAD ]t\ZBZ9e7j:eBe9d9ZCeCj<d@e3dA dIZDeCj<dJdKeEdeDdL dMZDeCj<dNdOdPeDdQ eBd-krdRZDeCj<dSdTdUeDdV qe5F ZGdWdX ZHdYdZ ZIeGjJdkrheHeGjKZLeIeGjMeeL neGjJdkreHeGjKZLeIeGjMeeL neGjJd?kreHeGjKZLeNe!eL neGjJd(kreHeGjKZLeNe"eL neGjJdkreHeGjOZOeHeGjPZPeIeGjMeeOeP n^eGjJd-kr>eHeGjKZLeIeGjMe%eLeGjQeGjd[ n,eGjJd4krjeHeGjKZLeIeGjMe(eLeGjQd\ dS )c    N)	lru_cache)Any)groupbyformat_flamegraphsegmentsmemorycompareFc                 C   s>   | d }| d }|s"| dd }| d }| d| d| S )Nlinefilename/name:)split)ffull_filenameifnamefunc r   H/var/www/html/venv/lib/python3.8/site-packages/torch/cuda/_memory_viz.py
_frame_fmt   s    r   c              
   C   s`   ddddddddg}d	d
ddddddddg
}|D ]}|| kr0 dS q0|D ]}||krH dS qHdS )Nzunwind::unwindzCapturedTraceback::gatherZgather_with_cpp_startZ__libc_start_mainZPyEval_Z	PyObject_ZPyFunction_zcore/boxingz	/Registerz/Redispatchzpythonrun.czModules/main.czObjects/call.czObjects/methodobject.czpycore_ceval.hzceval.czcpython/abstract.hFTr   )r   r
   Zomit_functionsZomit_filenamesZofr   r   r   _frame_filter   s6    r   c                    s   |rt | }  fdd| D S )Nc                    s(   g | ] }t |d  |d rt| qS )r   r
   )r   r   ).0r   r   r   r   
<listcomp>>   s      z_frames_fmt.<locals>.<listcomp>)reversed)framesr   reverser   r   r   _frames_fmt;   s    r    c                 C   sJ   d| kr.| d d  dg }| d d d }n|  d| d }g }||fS )Nhistoryr   r   	real_sizerequested_sizesize)get)br   r"   r   r   r   _block_extra_legacy@   s    r'   c                 C   s    d| krt | S | d | d fS )Nr   r#   )r'   )r&   r   r   r   _block_extraI   s    r(   c                 C   s   |d krdt   d}t j|sXdd l}td|  |jd| t	dd|g |dd	g}tj
|tjtjd
d}|jd k	st|jd k	st|j|  |j  |j }|j  |  | dkst|S )Nz/tmp/z_flamegraph.plr   zDownloading flamegraph.pl to: zNhttps://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.plchmodz+xz--countnamebytesutf-8)stdinstdoutencoding)osgetuidpathexistsurllib.requestprintrequesturlretrieve
subprocess
check_callPopenPIPEr,   AssertionErrorr-   writeclosereadwait)Zflamegraph_linesZflamegraph_scripturllibargspresultr   r   r   r   O   s*     



c           
      C   s  dd }|D ]}d|krPt |\}}| | d|d  d|| d| d n~d}|d D ]p}|d	 }||7 }d
|kr|d
 }| | d|d  d|| d| d q\| | d|d  d| d q\|d | }	|	r| | d|d  d|	 d qd S )Nc                 S   s   | sdS d t| ddS )Nz<non-python>;T)r   )joinr    )r   r   r   r   frames_fragmente   s    z&_write_blocks.<locals>.frames_fragmentr!   rD   state 
r   r"   r   z;<no-context> r$   z;<gaps> )r(   r<   )
r   prefixblocksrF   r&   r   Zaccounted_for_sizehszZgapsr   r   r   _write_blocksd   s     ,,"rN   c                 C   sJ   t  }| d D ],}d|d  d|d  }t|||d  q|| S )Nr   stream_stream;seg_addressrK   ioStringIOrN   getvaluesnapshotr   r   segrJ   r   r   r   r   {   s
    c                 C   s@   t  }| d D ]"}d|d  }t|||d  q|| S )Nr   rO   rP   rK   rS   rW   r   r   r   r      s
    c                    s   dd  dd }t  } fdd| D } fdd|D }tdd	d
 || D   tddd
 || D   | D ]*} ||krxt|d|| |d  qx|D ]*} ||krt|d|| |d  q|| S )Nc                 S   s   | d | d fS )NrR   
total_sizer   rY   r   r   r   _seg_key   s    zcompare.<locals>._seg_keyc                 S   s   d| d  d| d  S )NrO   rP   rQ   rR   r   r[   r   r   r   	_seg_info   s    zcompare.<locals>._seg_infoc                    s   h | ]} |qS r   r   r   rY   r\   r   r   	<setcomp>   s     zcompare.<locals>.<setcomp>c                    s   h | ]} |qS r   r   r^   r_   r   r   r`      s     zonly_before = c                 S   s   g | ]\}}|qS r   r   r   a_r   r   r   r      s     zcompare.<locals>.<listcomp>zonly_after = c                 S   s   g | ]\}}|qS r   r   ra   r   r   r   r      s     zonly_before;rK   zonly_after;)rT   rU   r4   rN   rV   )beforeafterr   r]   r   Zbefore_segsZ
after_segsrY   r   r_   r   r      s    c                 C   s>   dD ],}t | dk r(| d| d  S | d } q| ddS )N) ZKiZMiZGiZTiPiZEiZZig      @z3.1fB.1fYiB)abs)numunitr   r   r   _format_size   s
    
rn   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )Bytesc                 C   s
   || _ d S N)value)selfrq   r   r   r   __init__   s    zBytes.__init__c                 C   s   t | j| S rp   )ro   rq   )rr   rhsr   r   r   __add__   s    zBytes.__add__c                 C   s
   t | jS rp   )rn   rq   )rr   r   r   r   __repr__   s    zBytes.__repr__N)__name__
__module____qualname__rs   ru   rv   r   r   r   r   ro      s   ro   c                 C   s   t dd | d D S )Nc                 s   s"   | ]}|d  dkr|d V  qdS )rG   active_allocatedr$   Nr   )r   r&   r   r   r   	<genexpr>   s      zcalc_active.<locals>.<genexpr>rK   )sumr[   r   r   r   calc_active   s    r}   c                 C   s>   | | }d}|dkr.|| d }d|dd}t | | S )Nrf   r   d    (ri   z% internal))ro   )free_externalfree_internaltotalsuffixZpctr   r   r   _report_free   s    r   i  @zC
Legend:
    [a     ] - a segment in the allocator
     ^-- a page aU   of memory in the segment
    a-z: pages filled with a single block's content
    ' ': page is completely free
    *: page if completely full with multiple blocks
    0-9: page is partially full with tensors of multiple blocks (9 == 90% full)
    (X% internal) - of the free memory, X% is free because we rounded the size of the allocation.
c           "      C   sT  g }t  }|dtt d d}d}d}d}t| d dd dD ]`}||d 7 }d}d}	d}
g }d}|d	 D ]b}|d
 dk}|rt|\}}|||df |
|7 }
|	|d | 7 }	n||d 7 }||d 7 }qt||
7 }||7 }||	7 }|d d t d }dd t|D }dd t|D }d}t	|D ]\}\}}}||7 }|| }|t }|d t d }t
t|rxdnd|d  }t||D ]n}t||t }t||d t }||  || t 7  < || dkrdt|| d  ||< n|||< qq4|d dkrdnd|d  }d|} ||	 |
 |d ksJt|d dkrfd|d  nd}|d tkrF|d|  dt|d  dt||	 d| d 	 qF|d!t| d  d  |d"t| d  |d#t| d  |rd$t| d%t| d&nd}!|d't|| d  |t || | |ksLt| S )(a  Visually reports how the allocator has filled its segments.

    This printout can help debug fragmentation issues since free fragments
    will appear as gaps in this printout.  The amount of free space is reported
    for each segment.
    We distinguish between internal free memory which occurs because the
    allocator rounds the allocation size, and external free memory, which are
    the gaps between allocations in a segment.
    Args:
        data: snapshot dictionary created from _snapshot()
    zSummary of segments >= z	 in size
r   r   c                 S   s   | d t | fS )NrZ   )r}   )xr   r   r   <lambda>       zsegsum.<locals>.<lambda>keyrZ   rK   rG   rz   Tr$      c                 S   s   g | ]}d qS )rH   r   r   rc   r   r   r   r      s     zsegsum.<locals>.<listcomp>c                 S   s   g | ]}d qS )g        r   r   r   r   r   r      s     rb   A   rH   z0123456789*
   rP   rf   z	, stream_z stream_[z] z allocated, z freerI   z
segments: ztotal_reserved: ztotal_allocated: r   z internal + z
 external)ztotal_free: )rT   rU   r<   ro   	PAGE_SIZEsortedr(   appendrange	enumeratechrordmaxminintrE   r;   r   lenlegendrV   )"datar   outtotal_reservedZtotal_allocatedr   r   rY   Zseg_free_externalZseg_free_internalZseg_allocatedZ
all_rangesZboffsetr&   Zactiverc   allocated_sizeZnsegZoccupiedfracZactive_sizer   Zstart_r$   Zfinish_startfinishmjserP   bodyZinternal_externalr   r   r   segsum   sr     
 4$
r   c                    sR   t   fdd}t d D ]&\}}|r"d| d || q" S )Nc                    s  g i }i }g  d fdd}fdd}d} t|  d d}d D ]}||d 7 }qTt| D ]t\}}|d	 d
kr|d |d  }	}
| }||	\}}|d krd}|	}n|	| } | d| d| dt|
 d ||
|f||	< ||
7 }qn|d	 dkrX|d |d  }	}
||	|	d d f\}}} d| dt|
 d qn|d	 dkr|d |d  }	}
||
8 }||	|	d d f\}}} d| dt|
 d ||kr | ||= qn|d	 dkr0|d |d  }	}
| } | d|	 dt|
 d ||	|
f |||	< qn|d	 dkr|d |d  }	}
||	|	} d| dt|
 d ||kr | ||= qn|d	 d kr|d }
|d! } d"t|
 d#t| d$ qn | d qn d%t|  d S )&Nr   c                     sJ    r   S d d  } }d7 ttd|  | dkrBdn|  S )Nr   r   rb   r   rf   )popr   r   )rr   )
free_names	next_namer   r   _name$  s
    z$trace.<locals>.format.<locals>._namec                    s   D ]*\}}}| |kr| || k r||f  S qt  d D ]>\}}|d }|d }| |kr<| || k r<d| |f  S q<dS )Nr   rR   r   Zseg_)NN)r   )addrr   Zsaddrr$   r   rY   )r   segment_intervalsr   r   find_segment,  s    z+trace.<locals>.format.<locals>.find_segmentz	 entries
r   rZ   actionallocr   r$   ZMEMz = r   r   z]
free_requestedzdel z # rI   free_completedz# free completed for rH   Zsegment_allocz = cudaMalloc(z, z)
Zsegment_freez	cudaFree(z) # ZoomZdevice_freezraise OutOfMemoryError # z requested, z free in CUDA
zTOTAL MEM: )r<   r   r   ro   r%   r   )entriesZsegment_addr_to_nameZallocation_addr_to_namer   r   countr   rY   r   r   r$   nZseg_nameZseg_addroffsetr   rc   freer   r   )r   r   r   r   format  sn    
&


 


"ztrace.<locals>.formatdevice_traceszDevice z ----------------
)rT   rU   r   r<   rV   )r   r   r   dr   r   r   trace  s    O
r   a  
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<script type="module">
import {add_local_files} from "https://cdn.jsdelivr.net/gh/pytorch/pytorch@main/torch/utils/viz/MemoryViz.js"
const local_files = $SNAPSHOT
add_local_files(local_files, $VIZ_KIND)
</script>
</body>
c                 C   st   |d k	rt jdtdd t| }|ddt|d   7 }t|d}t	d|dg}t
dt|d	|S )
Nz;device argument is deprecated, plots now contain all device   )
stacklevel    r+   zsnapshot.pickle)r   base64z	$VIZ_KINDz	$SNAPSHOT)warningswarnFutureWarningpickledumpsr   r   	b64encodedecodejson_memory_viz_templatereplacerepr)r   Zviz_kinddevicebufferZencoded_bufferZjson_formatr   r   r   _format_viz  s    
 r   c                 C   s   t | |sdnd|S )a  Generate a visualization over time of the memory usage recorded by the trace as an html file.

    Args:
        data: Memory snapshot as generated from torch.cuda.memory._snapshot()
        device (torch.device, optional): Generate the trace for this device, needed if multiple devices have allocations.
        plot_segments (bool, optional): Plots memory returned from cudaMalloc, rather than individual allocations.
                                        Defaults to False.

    Returns:
        str: HTML of visualization
    Active Memory TimelinezActive Cached Memory Timeliner   )r   r   plot_segmentsr   r   r   
trace_plot  s    r   c              	      s  dd l }ddlm}m} ddlm} |  i  jjD ]f}|j	|j
kr8|j}g }|rz|j	|j|jfkrr|| |j}qR||j}|r8|jjdkr8| |< q8|j dd td D dd td D dfd	d
d fdd	}	fdd}
i }jD ]\}}\}}}t||s.q||jkrP|	||||||f< n||jkrx|
|||f|j nh||jkr|
|||f|j |	|||d |||d f< n$||jkr|	|||dd|||f< qfdd| D }tt|t !ddD ]\}}d | }|d }|D ]P\}}}}||k r^|d || dd |d |d||d || }q0||d k r|d |d | dd qdd d D d< d D ]<}|d  |d 8  < |d s|d |d dd qΈS )Nr   )Action	TensorKey)
_EventTypec                 S   s   g | ]}g qS r   r   r   r   r   r   r     s     z(_profile_to_snapshot.<locals>.<listcomp>r   c                 S   s   g | ]}|d ddg dqS )Nr   )r   rR   rZ   rP   rK   r   )r   r   r   r   r   r     s   )r   r   c                    s   | j dkr| jS  S d S )Ncuda)typeindexr   )device_countr   r   	to_device  s    
z'_profile_to_snapshot.<locals>.to_deviceTc           
         s   |j }|jj}d | }|d d ks6|d |kr>||d< t|d ||  |d< j||}|d k	rt|j nd} |d}dd |D }d|| d	||d
}	|rd | |	 |	S )Nr   rR   rZ   unknownr   c                 S   s   g | ]}d d|j dqS )noner   )r
   r	   r   )r   )r   rB   r   r   r   r     s     z:_profile_to_snapshot.<locals>.allocate.<locals>.<listcomp>r   r   )r   r   r$   rP   r   categoryr   )	r   ZstorageZptrr   Z_categoriesr%   r   lowerr   )
r$   
tensor_keyversionduring_tracer   r   rY   r   stackr   )allocation_stacksmemory_profilerX   r   r   r   allocate  s    
z&_profile_to_snapshot.<locals>.allocatec              	      s8   dD ].} d |  || d | d d| d d qd S )N)r   r   r   r   r$   r   r   )r   r   r$   rP   r   )r   )r   r   r   )rX   r   r   r     s    z"_profile_to_snapshot.<locals>.freeF)r   c                    s2   g | ]*\\}}} |j |d  |d |d fqS )r   r$   r   r   )r   r   r   event)r   r   r   r     s   
r   r   rR   rK   Zinactive)r$   rG   rz   )r$   rG   r#   r   rZ   c                 S   s   g | ]}|d  r|qS )rK   r   r^   r   r   r   r      s      )T)"torchZtorch.profiler._memory_profilerr   r   Ztorch._C._profilerr   Z_memory_profileZ_op_treeZsorted_nodestagZ
AllocationparentZPyCallZPyCCallr   Zfrom_allocationZextra_fieldsZ
alloc_sizer   r   r   Ztimeline
isinstanceZCREATEZDESTROYr   r   ZINCREMENT_VERSIONZPREEXISTINGitemsr   r   operator
itemgetter)profiler   r   r   r   r   r   Zpython_parentsr   r   r   Z
kv_to_elemtimer   r   r   r$   Zblocks_at_endr   rK   rY   Z	last_addrrc   r   r   r   )r   r   r   rX   r   r   _profile_to_snapshot  sr    



	

 
r   c                 C   s   t | }t|d|S )a}  Generate a visualization over time of the memory usage recorded by kineto memory profiling as an html file.

    Args:
        profile: profile as generated by `torch.profiler.profile(profile_memory=True)`
        device (torch.device, optional): Generate the trace for this device, needed if multiple devices have allocations.

    Returns:
        str: HTML of visualization
    r   )r   r   )r   r   rX   r   r   r   profile_plot  s    
r   )r   c                 C   s   t | d|S )NzAllocator State Historyr   )r   r   r   r   r   segment_plot  s    r   __main__ztorch.cuda.memory._snapshot()zpickled memory statistics from z#Visualize memory dumps produced by )descriptionr   )destc                 C   s   | j ddddd d S )N-o--outputz
output.svgz$flamegraph svg (default: output.svg)defaulthelp)add_argument)rB   r   r   r   _output'  s    r   zmPrints overall allocation statistics and a visualization of how the allocators segments are currently filled.statsinput)r   z`Prints buffer of the most recent allocation events embedded in the snapshot in a Pythonic style.zaGenerate a flamegraph that visualizes what memory is stored in each allocator segment (aka block)zNGenerate a flamegraph the program locations contributing to CUDA memory usage.zGenerate a flamegraph that shows segments (aka blocks) that have been added or removed between two different memorys snapshots.rd   re   ))r   z]Generate a visualization over time of the memory usage recorded by the trace as an html file.)r   zfVisualize how allocations are packed into allocator segments at each point in a trace as an html file.z]visualize trace from this device (default: chooses the only device with trace info or errors)z-dz--device)r   r   r   z4path to save the visualization(default: output.html)r   r   zoutput.htmlr   z?visualize change to segments rather than individual allocationsz-sz
--segments
store_true)r   r   c                 C   s>   | dkrt jj}n
t| d}t|}t|tr:|g d}|S )N-rb)r   Ztraces)sysr,   r   openr   loadr   list)r   r   r   r   r   r   _readU  s    




r	  c              	   C   s$   t | d}|| W 5 Q R X d S )Nw)r  r<   )r   r   r   r   r   r   _write_  s    r  )r   r   r   )F)FF)N)NF)N)N)Rr   r  r/   rT   r7   r   	functoolsr   typingr   	itertoolsr   r   r   r   cache__all__r   r   r    r'   r(   r   rN   r   r   r   rn   ro   r}   r   r   r   r   r   r   r   r   r   r   r   rw   os.pathr1   realpathdirname__file__Zthedirremoveargparsefn_nameZpickledArgumentParserparseradd_subparsersZ
subparsersr   r   
add_parserZstats_ar   Ztrace_aZ
segments_aZmemory_aZ	compare_aZplotscmdZtrace_plot_ar   r   
parse_argsrA   r	  r  r   r  r   outputr4   rd   re   r   r   r   r   r   <module>   s   


	

MY
i












