@ -130,6 +130,7 @@ class Api:
self . add_api_route ( " /sdapi/v1/preprocess " , self . preprocess , methods = [ " POST " ] , response_model = PreprocessResponse )
self . add_api_route ( " /sdapi/v1/train/embedding " , self . train_embedding , methods = [ " POST " ] , response_model = TrainResponse )
self . add_api_route ( " /sdapi/v1/train/hypernetwork " , self . train_hypernetwork , methods = [ " POST " ] , response_model = TrainResponse )
self . add_api_route ( " /sdapi/v1/memory " , self . get_memory , methods = [ " GET " ] , response_model = MemoryResponse )
def add_api_route ( self , path : str , endpoint , * * kwargs ) :
if shared . cmd_opts . api_auth :
@ -465,6 +466,42 @@ class Api:
shared . state . end ( )
return TrainResponse ( info = " train embedding error: {error} " . format ( error = error ) )
def get_memory ( self ) :
def gb ( val : float ) :
return round ( val / 1024 / 1024 / 1024 , 2 )
try :
import os , psutil
process = psutil . Process ( os . getpid ( ) )
res = process . memory_info ( )
ram_total = 100 * res . rss / process . memory_percent ( )
ram = { ' free ' : gb ( ram_total - res . rss ) , ' used ' : gb ( res . rss ) , ' total ' : gb ( ram_total ) }
except Exception as err :
ram = { ' error ' : f ' { err } ' }
try :
import torch
if torch . cuda . is_available ( ) :
s = torch . cuda . mem_get_info ( )
system = { ' free ' : gb ( s [ 0 ] ) , ' used ' : gb ( s [ 1 ] - s [ 0 ] ) , ' total ' : gb ( s [ 1 ] ) }
s = dict ( torch . cuda . memory_stats ( shared . device ) )
allocated = { ' current ' : gb ( s [ ' allocated_bytes.all.current ' ] ) , ' peak ' : gb ( s [ ' allocated_bytes.all.peak ' ] ) }
reserved = { ' current ' : gb ( s [ ' reserved_bytes.all.current ' ] ) , ' peak ' : gb ( s [ ' reserved_bytes.all.peak ' ] ) }
active = { ' current ' : gb ( s [ ' active_bytes.all.current ' ] ) , ' peak ' : gb ( s [ ' active_bytes.all.peak ' ] ) }
inactive = { ' current ' : gb ( s [ ' inactive_split_bytes.all.current ' ] ) , ' peak ' : gb ( s [ ' inactive_split_bytes.all.peak ' ] ) }
warnings = { ' retries ' : s [ ' num_alloc_retries ' ] , ' oom ' : s [ ' num_ooms ' ] }
cuda = {
' system ' : system ,
' active ' : active ,
' allocated ' : allocated ,
' reserved ' : reserved ,
' inactive ' : inactive ,
' events ' : warnings ,
}
else :
cuda = { ' error ' : ' unavailable ' }
except Exception as err :
cuda = { ' error ' : f ' { err } ' }
return MemoryResponse ( ram = ram , cuda = cuda )
def launch ( self , server_name , port ) :
self . app . include_router ( self . router )
uvicorn . run ( self . app , host = server_name , port = port )