U
    yhm0                     @   s  d dl mZ d dlmZmZmZmZ d dlmZm	Z	 G dd dZ
G dd deZG dd	 d	eZG d
d deZG dd deZG dd deZeee edddZe
eeef edddZee
 eeef ee
ef dddZe
e
edddZee
 ee
ef edddZdS )     )Enum)
NamedTupleDictListSet)Nodemap_argc                   @   s@   e Zd ZdZeddddZdd Zdd	 Zd
d Zdd Z	dS )	PartitionzPartition class contains all the information about an individual partition.
    It also provides necessary methods for manipulation the partition.
    N)partition_idreturnc                 C   s4   t  | _|| _t  | _t  | _d| _d| _g | _d S )Nr   )setnodesr
   parentschildrenZ	bfs_levelused_mem_byteslogical_device_ids)selfr
    r   Y/var/www/html/venv/lib/python3.8/site-packages/torch/fx/experimental/partitioner_utils.py__init__   s    zPartition.__init__c                 C   s
   t | jS N)strr
   r   r   r   r   __str__   s    zPartition.__str__c                 C   s,   d| _ | jD ]}|  j t|| j7  _ qd S )Nr   )r   r   get_extra_size_of)r   noder   r   r   recalculate_mem_size   s    
zPartition.recalculate_mem_sizec                 C   sX   i }t |j|j t |j|j |D ]}|jdkr$| j| q$| j| |   d S )N>   get_attrplaceholder)r   args
setdefaultkwargsopr   addr   )r   r   input_nodesnr   r   r   add_node   s    
zPartition.add_nodec                    sz   | j krv j | i }t|j|j t|j|j |D ]2}t fdd|jD r:|jdkr: j | q: 	  d S )Nc                 3   s   | ]}| j kV  qd S r   )r   .0r&   r   r   r   	<genexpr>5   s    z(Partition.remove_node.<locals>.<genexpr>>   r   r   )
r   remover   r    r!   r"   allusersr#   r   )r   r   r%   Z
input_noder   r   r   remove_node)   s    
zPartition.remove_node)
__name__
__module____qualname____doc__intr   r   r   r'   r.   r   r   r   r   r	      s   	r	   c                   @   s&   e Zd ZU eed< eed< eed< dS )DevicenameZavailable_mem_bytesZ
logical_idN)r/   r0   r1   r   __annotations__r3   r   r   r   r   r4   <   s   
r4   c                   @   s   e Zd ZU eed< eed< dS )NodeLatencymem_latency_seccomputer_latency_secNr/   r0   r1   floatr6   r   r   r   r   r7   B   s   
r7   c                   @   s&   e Zd ZU eed< eed< eed< dS )PartitionLatencyr8   r9   overall_latency_secNr:   r   r   r   r   r<   I   s   
r<   c                   @   s    e Zd ZdZdZdZdZdZdS )PartitionModer               N)r/   r0   r1   
size_basedZ	sparse_nnZ
cost_awareZkl_basedZ	aot_basedr   r   r   r   r>   R   s
   r>   c                   @   s   e Zd ZU ee ed< ejZeed< dZ	e
ed< i Zeeef ed< i Zeeef ed< i Zeeee f ed< dZeed	< d
S )PartitionerConfigZdevicesmode        transfer_rate_bytes_per_secnode_to_latency_mappingnode_to_partition_mapping#partition_to_logical_device_mappingFsaturate_hostN)r/   r0   r1   r   r4   r6   r>   rC   rE   rG   r;   rH   r   r   r7   rI   r3   rJ   rK   boolr   r   r   r   rD   Z   s   
rD   )r   r   r   c                 C   s   i }t | j|j t | j|j d}|D ]0}||kr(t|dd}|rP||j7 }q(tdq(t| dd}|rv||j7 }ntd|S )zGiven a node and a set of nodes,
    this function return the extra size that needed
    if this node is included in this set.
    r   
size_bytesNznode has no size_bytes attr)r   r    r!   r"   getattroutput_sizeRuntimeErrorZ
total_size)r   r   r%   Ztotal_size_of_input_nodesr&   rM   r   r   r   r   e   s    
r   )	partitionrH   r   c                    st   t tt ddd}ttd fdd |}tdddd}|D ](} |tdddd}|j|jkrF|}qF|S )	zVGiven a partition and its nodes' latency, return a PartitionLatency for this partition)rQ   r   c                    s`   g } j D ]P}|jdkrq
i }t|j|j t|j|j t fdd|D s
|| q
|S )z>Given a partition, return a list of nodes on the top bfs level>   r   r   c                 3   s"   | ]}| j ko|jd kV  qdS )>   r   r   N)r   r#   r(   rQ   r   r   r*      s   zFget_latency_of_one_partition.<locals>.get_top_nodes.<locals>.<genexpr>)r   r#   r   r    r!   r"   anyappend)rQ   	top_nodesr   r%   r   rR   r   get_top_nodes   s    

z3get_latency_of_one_partition.<locals>.get_top_nodes)r   r   c           
         s   |  }|j t|j|j }|j|j }|j|j }t| jj}|rtdddd}|D ]&} |t|||}	|	j |j kr\|	}q\|S t|||S )zyGiven a top node of a partition, this function returns
        the latency of the critical path in the partition
        rF   r8   r9   r=   )	r=   maxr9   r8   r   r-   intersectionr   r<   )
r   partition_latencyZnode_latencyr=   r8   r9   r-   Zmax_latencyr&   Znew_partition_latency
dfs_helperrH   rQ   r   r   r\      sH     

      z0get_latency_of_one_partition.<locals>.dfs_helperrF   rW   )r	   r   r   r<   r=   )rQ   rH   rV   rU   Zcritical_path_latencyr   rZ   r   r[   r   get_latency_of_one_partition   s.    ,    r]   )
partitionsrH   r   c                 C   s$   i }| D ]}t ||}|||< q|S )zGiven all the partitions and node_to_latency_mapping dictionary,
    return a mapping dictionary of each partition to its overall latency
    )r]   )r^   rH   partition_to_latency_mappingrQ   rZ   r   r   r    get_partition_to_latency_mapping   s     
r`   )parent_partitionchild_partitionrG   c           	      C   s   | j g kr$|j g kr$| j |j kr$dS d}t }|jD ]h}i }t|j|j t|j|j |D ]>}|| jkr\||kr\t|dd}|dk	r||j7 }|	| q\q4|| S )zfGiven two partitions (parent and child),
    calculate the communication latency between the two.
    rF   r   rM   N)
r   r   r   r   r    r!   r"   rN   rO   r$   )	ra   rb   rG   Z	comm_sizeZvisited_nodesr   r%   r&   rM   r   r   r   get_comm_latency_between   s(    



rc   )r^   r_   rG   c                    sb   t ttd fdd tt  tt  ddd}|| }d}|D ]} |d}||krB|}qB|S )zGiven all partitions in a graph, find the critical path among all partitions
    and return its latency as the latency of the whole graph
    )rQ   latency_so_far_secr   c                    sX   ||  j 7 }| j}| jrTd}| jD ]*}t| |} ||| }||kr$|}q$|S |S )zJThis function helps to recursively get the latency of a path of partitionsrF   )r=   r   rc   )rQ   rd   r   Zmax_latency_secchildZcomm_latency_secZnew_latency_secr\   r_   rG   r   r   r\     s(    
   z4get_latency_of_partitioned_graph.<locals>.dfs_helper)r^   r   c                 S   s*   g }| D ]}t |jdkr|| q|S )zvThis function is to return all the partitions without parents
        as the starting points of all the paths
        r   )lenr   rT   )r^   top_partitionsrQ   r   r   r   get_top_partitions-  s
    z<get_latency_of_partitioned_graph.<locals>.get_top_partitionsrF   )r	   r;   r   )r^   r_   rG   ri   rh   Zcritical_path_latency_secrQ   Zlatency_secr   rf   r    get_latency_of_partitioned_graph  s    	
rj   N)enumr   typingr   r   r   r   Ztorch.fx.noder   r   r	   r4   r7   r<   r>   rD   r3   r   r]   r`   r;   rc   rj   r   r   r   r   <module>   s2   4	 
Z 

%
