@ -135,6 +135,7 @@ class Api:
self . add_api_route ( " /sdapi/v1/preprocess " , self . preprocess , methods = [ " POST " ] , response_model = PreprocessResponse )
self . add_api_route ( " /sdapi/v1/train/embedding " , self . train_embedding , methods = [ " POST " ] , response_model = TrainResponse )
self . add_api_route ( " /sdapi/v1/train/hypernetwork " , self . train_hypernetwork , methods = [ " POST " ] , response_model = TrainResponse )
self . add_api_route ( " /sdapi/v1/memory " , self . get_memory , methods = [ " GET " ] , response_model = MemoryResponse )
def add_api_route ( self , path : str , endpoint , * * kwargs ) :
if shared . cmd_opts . api_auth :
@ -501,6 +502,40 @@ class Api:
shared . state . end ( )
return TrainResponse ( info = " train embedding error: {error} " . format ( error = error ) )
def get_memory ( self ) :
try :
import os , psutil
process = psutil . Process ( os . getpid ( ) )
res = process . memory_info ( ) # only rss is cross-platform guaranteed so we dont rely on other values
ram_total = 100 * res . rss / process . memory_percent ( ) # and total memory is calculated as actual value is not cross-platform safe
ram = { ' free ' : ram_total - res . rss , ' used ' : res . rss , ' total ' : ram_total }
except Exception as err :
ram = { ' error ' : f ' { err } ' }
try :
import torch
if torch . cuda . is_available ( ) :
s = torch . cuda . mem_get_info ( )
system = { ' free ' : s [ 0 ] , ' used ' : s [ 1 ] - s [ 0 ] , ' total ' : s [ 1 ] }
s = dict ( torch . cuda . memory_stats ( shared . device ) )
allocated = { ' current ' : s [ ' allocated_bytes.all.current ' ] , ' peak ' : s [ ' allocated_bytes.all.peak ' ] }
reserved = { ' current ' : s [ ' reserved_bytes.all.current ' ] , ' peak ' : s [ ' reserved_bytes.all.peak ' ] }
active = { ' current ' : s [ ' active_bytes.all.current ' ] , ' peak ' : s [ ' active_bytes.all.peak ' ] }
inactive = { ' current ' : s [ ' inactive_split_bytes.all.current ' ] , ' peak ' : s [ ' inactive_split_bytes.all.peak ' ] }
warnings = { ' retries ' : s [ ' num_alloc_retries ' ] , ' oom ' : s [ ' num_ooms ' ] }
cuda = {
' system ' : system ,
' active ' : active ,
' allocated ' : allocated ,
' reserved ' : reserved ,
' inactive ' : inactive ,
' events ' : warnings ,
}
else :
cuda = { ' error ' : ' unavailable ' }
except Exception as err :
cuda = { ' error ' : f ' { err } ' }
return MemoryResponse ( ram = ram , cuda = cuda )
def launch ( self , server_name , port ) :
self . app . include_router ( self . router )
uvicorn . run ( self . app , host = server_name , port = port )