Merge branch 'master' into master
commit
f42e0aae6d
@ -0,0 +1,28 @@
|
||||
# Please read the [contributing wiki page](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Contributing) before submitting a pull request!
|
||||
|
||||
If you have a large change, pay special attention to this paragraph:
|
||||
|
||||
> Before making changes, if you think that your feature will result in more than 100 lines changing, find me and talk to me about the feature you are proposing. It pains me to reject the hard work someone else did, but I won't add everything to the repo, and it's better if the rejection happens before you have to waste time working on the feature.
|
||||
|
||||
Otherwise, after making sure you're following the rules described in wiki page, remove this section and continue on.
|
||||
|
||||
**Describe what this pull request is trying to achieve.**
|
||||
|
||||
A clear and concise description of what you're trying to accomplish with this, so your intent doesn't have to be extracted from your code.
|
||||
|
||||
**Additional notes and description of your changes**
|
||||
|
||||
More technical discussion about your changes go here, plus anything that a maintainer might have to specifically take a look at, or be wary of.
|
||||
|
||||
**Environment this was tested in**
|
||||
|
||||
List the environment you have developed / tested this on. As per the contributing page, changes should be able to work on Windows out of the box.
|
||||
- OS: [e.g. Windows, Linux]
|
||||
- Browser [e.g. chrome, safari]
|
||||
- Graphics card [e.g. NVIDIA RTX 2080 8GB, AMD RX 6600 8GB]
|
||||
|
||||
**Screenshots or videos of your changes**
|
||||
|
||||
If applicable, screenshots or a video showing off your changes. If it edits an existing UI, it should ideally contain a comparison of what used to be there, before your changes were made.
|
||||
|
||||
This is **required** for anything that touches the user interface.
|
||||
@ -0,0 +1 @@
|
||||
* @AUTOMATIC1111
|
||||
|
@ -0,0 +1,177 @@
|
||||
|
||||
contextMenuInit = function(){
|
||||
let eventListenerApplied=false;
|
||||
let menuSpecs = new Map();
|
||||
|
||||
const uid = function(){
|
||||
return Date.now().toString(36) + Math.random().toString(36).substr(2);
|
||||
}
|
||||
|
||||
function showContextMenu(event,element,menuEntries){
|
||||
let posx = event.clientX + document.body.scrollLeft + document.documentElement.scrollLeft;
|
||||
let posy = event.clientY + document.body.scrollTop + document.documentElement.scrollTop;
|
||||
|
||||
let oldMenu = gradioApp().querySelector('#context-menu')
|
||||
if(oldMenu){
|
||||
oldMenu.remove()
|
||||
}
|
||||
|
||||
let tabButton = uiCurrentTab
|
||||
let baseStyle = window.getComputedStyle(tabButton)
|
||||
|
||||
const contextMenu = document.createElement('nav')
|
||||
contextMenu.id = "context-menu"
|
||||
contextMenu.style.background = baseStyle.background
|
||||
contextMenu.style.color = baseStyle.color
|
||||
contextMenu.style.fontFamily = baseStyle.fontFamily
|
||||
contextMenu.style.top = posy+'px'
|
||||
contextMenu.style.left = posx+'px'
|
||||
|
||||
|
||||
|
||||
const contextMenuList = document.createElement('ul')
|
||||
contextMenuList.className = 'context-menu-items';
|
||||
contextMenu.append(contextMenuList);
|
||||
|
||||
menuEntries.forEach(function(entry){
|
||||
let contextMenuEntry = document.createElement('a')
|
||||
contextMenuEntry.innerHTML = entry['name']
|
||||
contextMenuEntry.addEventListener("click", function(e) {
|
||||
entry['func']();
|
||||
})
|
||||
contextMenuList.append(contextMenuEntry);
|
||||
|
||||
})
|
||||
|
||||
gradioApp().getRootNode().appendChild(contextMenu)
|
||||
|
||||
let menuWidth = contextMenu.offsetWidth + 4;
|
||||
let menuHeight = contextMenu.offsetHeight + 4;
|
||||
|
||||
let windowWidth = window.innerWidth;
|
||||
let windowHeight = window.innerHeight;
|
||||
|
||||
if ( (windowWidth - posx) < menuWidth ) {
|
||||
contextMenu.style.left = windowWidth - menuWidth + "px";
|
||||
}
|
||||
|
||||
if ( (windowHeight - posy) < menuHeight ) {
|
||||
contextMenu.style.top = windowHeight - menuHeight + "px";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function appendContextMenuOption(targetEmementSelector,entryName,entryFunction){
|
||||
|
||||
currentItems = menuSpecs.get(targetEmementSelector)
|
||||
|
||||
if(!currentItems){
|
||||
currentItems = []
|
||||
menuSpecs.set(targetEmementSelector,currentItems);
|
||||
}
|
||||
let newItem = {'id':targetEmementSelector+'_'+uid(),
|
||||
'name':entryName,
|
||||
'func':entryFunction,
|
||||
'isNew':true}
|
||||
|
||||
currentItems.push(newItem)
|
||||
return newItem['id']
|
||||
}
|
||||
|
||||
function removeContextMenuOption(uid){
|
||||
menuSpecs.forEach(function(v,k) {
|
||||
let index = -1
|
||||
v.forEach(function(e,ei){if(e['id']==uid){index=ei}})
|
||||
if(index>=0){
|
||||
v.splice(index, 1);
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function addContextMenuEventListener(){
|
||||
if(eventListenerApplied){
|
||||
return;
|
||||
}
|
||||
gradioApp().addEventListener("click", function(e) {
|
||||
let source = e.composedPath()[0]
|
||||
if(source.id && source.id.indexOf('check_progress')>-1){
|
||||
return
|
||||
}
|
||||
|
||||
let oldMenu = gradioApp().querySelector('#context-menu')
|
||||
if(oldMenu){
|
||||
oldMenu.remove()
|
||||
}
|
||||
});
|
||||
gradioApp().addEventListener("contextmenu", function(e) {
|
||||
let oldMenu = gradioApp().querySelector('#context-menu')
|
||||
if(oldMenu){
|
||||
oldMenu.remove()
|
||||
}
|
||||
menuSpecs.forEach(function(v,k) {
|
||||
if(e.composedPath()[0].matches(k)){
|
||||
showContextMenu(e,e.composedPath()[0],v)
|
||||
e.preventDefault()
|
||||
return
|
||||
}
|
||||
})
|
||||
});
|
||||
eventListenerApplied=true
|
||||
|
||||
}
|
||||
|
||||
return [appendContextMenuOption, removeContextMenuOption, addContextMenuEventListener]
|
||||
}
|
||||
|
||||
initResponse = contextMenuInit();
|
||||
appendContextMenuOption = initResponse[0];
|
||||
removeContextMenuOption = initResponse[1];
|
||||
addContextMenuEventListener = initResponse[2];
|
||||
|
||||
(function(){
|
||||
//Start example Context Menu Items
|
||||
let generateOnRepeat = function(genbuttonid,interruptbuttonid){
|
||||
let genbutton = gradioApp().querySelector(genbuttonid);
|
||||
let interruptbutton = gradioApp().querySelector(interruptbuttonid);
|
||||
if(!interruptbutton.offsetParent){
|
||||
genbutton.click();
|
||||
}
|
||||
clearInterval(window.generateOnRepeatInterval)
|
||||
window.generateOnRepeatInterval = setInterval(function(){
|
||||
if(!interruptbutton.offsetParent){
|
||||
genbutton.click();
|
||||
}
|
||||
},
|
||||
500)
|
||||
}
|
||||
|
||||
appendContextMenuOption('#txt2img_generate','Generate forever',function(){
|
||||
generateOnRepeat('#txt2img_generate','#txt2img_interrupt');
|
||||
})
|
||||
appendContextMenuOption('#img2img_generate','Generate forever',function(){
|
||||
generateOnRepeat('#img2img_generate','#img2img_interrupt');
|
||||
})
|
||||
|
||||
let cancelGenerateForever = function(){
|
||||
clearInterval(window.generateOnRepeatInterval)
|
||||
}
|
||||
|
||||
appendContextMenuOption('#txt2img_interrupt','Cancel generate forever',cancelGenerateForever)
|
||||
appendContextMenuOption('#txt2img_generate', 'Cancel generate forever',cancelGenerateForever)
|
||||
appendContextMenuOption('#img2img_interrupt','Cancel generate forever',cancelGenerateForever)
|
||||
appendContextMenuOption('#img2img_generate', 'Cancel generate forever',cancelGenerateForever)
|
||||
|
||||
appendContextMenuOption('#roll','Roll three',
|
||||
function(){
|
||||
let rollbutton = get_uiCurrentTabContent().querySelector('#roll');
|
||||
setTimeout(function(){rollbutton.click()},100)
|
||||
setTimeout(function(){rollbutton.click()},200)
|
||||
setTimeout(function(){rollbutton.click()},300)
|
||||
}
|
||||
)
|
||||
})();
|
||||
//End example Context Menu Items
|
||||
|
||||
onUiUpdate(function(){
|
||||
addContextMenuEventListener()
|
||||
});
|
||||
@ -0,0 +1,45 @@
|
||||
addEventListener('keydown', (event) => {
|
||||
let target = event.originalTarget || event.composedPath()[0];
|
||||
if (!target.hasAttribute("placeholder")) return;
|
||||
if (!target.placeholder.toLowerCase().includes("prompt")) return;
|
||||
|
||||
let plus = "ArrowUp"
|
||||
let minus = "ArrowDown"
|
||||
if (event.key != plus && event.key != minus) return;
|
||||
|
||||
selectionStart = target.selectionStart;
|
||||
selectionEnd = target.selectionEnd;
|
||||
if(selectionStart == selectionEnd) return;
|
||||
|
||||
event.preventDefault();
|
||||
|
||||
if (selectionStart == 0 || target.value[selectionStart - 1] != "(") {
|
||||
target.value = target.value.slice(0, selectionStart) +
|
||||
"(" + target.value.slice(selectionStart, selectionEnd) + ":1.0)" +
|
||||
target.value.slice(selectionEnd);
|
||||
|
||||
target.focus();
|
||||
target.selectionStart = selectionStart + 1;
|
||||
target.selectionEnd = selectionEnd + 1;
|
||||
|
||||
} else {
|
||||
end = target.value.slice(selectionEnd + 1).indexOf(")") + 1;
|
||||
weight = parseFloat(target.value.slice(selectionEnd + 1, selectionEnd + 1 + end));
|
||||
if (isNaN(weight)) return;
|
||||
if (event.key == minus) weight -= 0.1;
|
||||
if (event.key == plus) weight += 0.1;
|
||||
|
||||
weight = parseFloat(weight.toPrecision(12));
|
||||
|
||||
target.value = target.value.slice(0, selectionEnd + 1) +
|
||||
weight +
|
||||
target.value.slice(selectionEnd + 1 + end - 1);
|
||||
|
||||
target.focus();
|
||||
target.selectionStart = selectionStart;
|
||||
target.selectionEnd = selectionEnd;
|
||||
}
|
||||
// Since we've modified a Gradio Textbox component manually, we need to simulate an `input` DOM event to ensure its
|
||||
// internal Svelte data binding remains in sync.
|
||||
target.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
});
|
||||
@ -0,0 +1,19 @@
|
||||
window.onload = (function(){
|
||||
window.addEventListener('drop', e => {
|
||||
const target = e.composedPath()[0];
|
||||
const idx = selected_gallery_index();
|
||||
if (target.placeholder != "Prompt") return;
|
||||
|
||||
let prompt_target = get_tab_index('tabs') == 1 ? "img2img_prompt_image" : "txt2img_prompt_image";
|
||||
|
||||
e.stopPropagation();
|
||||
e.preventDefault();
|
||||
const imgParent = gradioApp().getElementById(prompt_target);
|
||||
const files = e.dataTransfer.files;
|
||||
const fileInput = imgParent.querySelector('input[type="file"]');
|
||||
if ( fileInput ) {
|
||||
fileInput.files = files;
|
||||
fileInput.dispatchEvent(new Event('change'));
|
||||
}
|
||||
});
|
||||
});
|
||||
@ -0,0 +1,206 @@
|
||||
var images_history_click_image = function(){
|
||||
if (!this.classList.contains("transform")){
|
||||
var gallery = images_history_get_parent_by_class(this, "images_history_cantainor");
|
||||
var buttons = gallery.querySelectorAll(".gallery-item");
|
||||
var i = 0;
|
||||
var hidden_list = [];
|
||||
buttons.forEach(function(e){
|
||||
if (e.style.display == "none"){
|
||||
hidden_list.push(i);
|
||||
}
|
||||
i += 1;
|
||||
})
|
||||
if (hidden_list.length > 0){
|
||||
setTimeout(images_history_hide_buttons, 10, hidden_list, gallery);
|
||||
}
|
||||
}
|
||||
images_history_set_image_info(this);
|
||||
}
|
||||
|
||||
var images_history_click_tab = function(){
|
||||
var tabs_box = gradioApp().getElementById("images_history_tab");
|
||||
if (!tabs_box.classList.contains(this.getAttribute("tabname"))) {
|
||||
gradioApp().getElementById(this.getAttribute("tabname") + "_images_history_renew_page").click();
|
||||
tabs_box.classList.add(this.getAttribute("tabname"))
|
||||
}
|
||||
}
|
||||
|
||||
function images_history_disabled_del(){
|
||||
gradioApp().querySelectorAll(".images_history_del_button").forEach(function(btn){
|
||||
btn.setAttribute('disabled','disabled');
|
||||
});
|
||||
}
|
||||
|
||||
function images_history_get_parent_by_class(item, class_name){
|
||||
var parent = item.parentElement;
|
||||
while(!parent.classList.contains(class_name)){
|
||||
parent = parent.parentElement;
|
||||
}
|
||||
return parent;
|
||||
}
|
||||
|
||||
function images_history_get_parent_by_tagname(item, tagname){
|
||||
var parent = item.parentElement;
|
||||
tagname = tagname.toUpperCase()
|
||||
while(parent.tagName != tagname){
|
||||
console.log(parent.tagName, tagname)
|
||||
parent = parent.parentElement;
|
||||
}
|
||||
return parent;
|
||||
}
|
||||
|
||||
function images_history_hide_buttons(hidden_list, gallery){
|
||||
var buttons = gallery.querySelectorAll(".gallery-item");
|
||||
var num = 0;
|
||||
buttons.forEach(function(e){
|
||||
if (e.style.display == "none"){
|
||||
num += 1;
|
||||
}
|
||||
});
|
||||
if (num == hidden_list.length){
|
||||
setTimeout(images_history_hide_buttons, 10, hidden_list, gallery);
|
||||
}
|
||||
for( i in hidden_list){
|
||||
buttons[hidden_list[i]].style.display = "none";
|
||||
}
|
||||
}
|
||||
|
||||
function images_history_set_image_info(button){
|
||||
var buttons = images_history_get_parent_by_tagname(button, "DIV").querySelectorAll(".gallery-item");
|
||||
var index = -1;
|
||||
var i = 0;
|
||||
buttons.forEach(function(e){
|
||||
if(e == button){
|
||||
index = i;
|
||||
}
|
||||
if(e.style.display != "none"){
|
||||
i += 1;
|
||||
}
|
||||
});
|
||||
var gallery = images_history_get_parent_by_class(button, "images_history_cantainor");
|
||||
var set_btn = gallery.querySelector(".images_history_set_index");
|
||||
var curr_idx = set_btn.getAttribute("img_index", index);
|
||||
if (curr_idx != index) {
|
||||
set_btn.setAttribute("img_index", index);
|
||||
images_history_disabled_del();
|
||||
}
|
||||
set_btn.click();
|
||||
|
||||
}
|
||||
|
||||
function images_history_get_current_img(tabname, image_path, files){
|
||||
return [
|
||||
gradioApp().getElementById(tabname + '_images_history_set_index').getAttribute("img_index"),
|
||||
image_path,
|
||||
files
|
||||
];
|
||||
}
|
||||
|
||||
function images_history_delete(del_num, tabname, img_path, img_file_name, page_index, filenames, image_index){
|
||||
image_index = parseInt(image_index);
|
||||
var tab = gradioApp().getElementById(tabname + '_images_history');
|
||||
var set_btn = tab.querySelector(".images_history_set_index");
|
||||
var buttons = [];
|
||||
tab.querySelectorAll(".gallery-item").forEach(function(e){
|
||||
if (e.style.display != 'none'){
|
||||
buttons.push(e);
|
||||
}
|
||||
});
|
||||
var img_num = buttons.length / 2;
|
||||
if (img_num <= del_num){
|
||||
setTimeout(function(tabname){
|
||||
gradioApp().getElementById(tabname + '_images_history_renew_page').click();
|
||||
}, 30, tabname);
|
||||
} else {
|
||||
var next_img
|
||||
for (var i = 0; i < del_num; i++){
|
||||
if (image_index + i < image_index + img_num){
|
||||
buttons[image_index + i].style.display = 'none';
|
||||
buttons[image_index + img_num + 1].style.display = 'none';
|
||||
next_img = image_index + i + 1
|
||||
}
|
||||
}
|
||||
var bnt;
|
||||
if (next_img >= img_num){
|
||||
btn = buttons[image_index - del_num];
|
||||
} else {
|
||||
btn = buttons[next_img];
|
||||
}
|
||||
setTimeout(function(btn){btn.click()}, 30, btn);
|
||||
}
|
||||
images_history_disabled_del();
|
||||
return [del_num, tabname, img_path, img_file_name, page_index, filenames, image_index];
|
||||
}
|
||||
|
||||
function images_history_turnpage(img_path, page_index, image_index, tabname){
|
||||
var buttons = gradioApp().getElementById(tabname + '_images_history').querySelectorAll(".gallery-item");
|
||||
buttons.forEach(function(elem) {
|
||||
elem.style.display = 'block';
|
||||
})
|
||||
return [img_path, page_index, image_index, tabname];
|
||||
}
|
||||
|
||||
function images_history_enable_del_buttons(){
|
||||
gradioApp().querySelectorAll(".images_history_del_button").forEach(function(btn){
|
||||
btn.removeAttribute('disabled');
|
||||
})
|
||||
}
|
||||
|
||||
function images_history_init(){
|
||||
var load_txt2img_button = gradioApp().getElementById('txt2img_images_history_renew_page')
|
||||
if (load_txt2img_button){
|
||||
for (var i in images_history_tab_list ){
|
||||
tab = images_history_tab_list[i];
|
||||
gradioApp().getElementById(tab + '_images_history').classList.add("images_history_cantainor");
|
||||
gradioApp().getElementById(tab + '_images_history_set_index').classList.add("images_history_set_index");
|
||||
gradioApp().getElementById(tab + '_images_history_del_button').classList.add("images_history_del_button");
|
||||
gradioApp().getElementById(tab + '_images_history_gallery').classList.add("images_history_gallery");
|
||||
|
||||
}
|
||||
var tabs_box = gradioApp().getElementById("tab_images_history").querySelector("div").querySelector("div").querySelector("div");
|
||||
tabs_box.setAttribute("id", "images_history_tab");
|
||||
var tab_btns = tabs_box.querySelectorAll("button");
|
||||
for (var i in images_history_tab_list){
|
||||
var tabname = images_history_tab_list[i]
|
||||
tab_btns[i].setAttribute("tabname", tabname);
|
||||
|
||||
// this refreshes history upon tab switch
|
||||
// until the history is known to work well, which is not the case now, we do not do this at startup
|
||||
//tab_btns[i].addEventListener('click', images_history_click_tab);
|
||||
}
|
||||
tabs_box.classList.add(images_history_tab_list[0]);
|
||||
|
||||
// same as above, at page load
|
||||
//load_txt2img_button.click();
|
||||
} else {
|
||||
setTimeout(images_history_init, 500);
|
||||
}
|
||||
}
|
||||
|
||||
var images_history_tab_list = ["txt2img", "img2img", "extras"];
|
||||
setTimeout(images_history_init, 500);
|
||||
document.addEventListener("DOMContentLoaded", function() {
|
||||
var mutationObserver = new MutationObserver(function(m){
|
||||
for (var i in images_history_tab_list ){
|
||||
let tabname = images_history_tab_list[i]
|
||||
var buttons = gradioApp().querySelectorAll('#' + tabname + '_images_history .gallery-item');
|
||||
buttons.forEach(function(bnt){
|
||||
bnt.addEventListener('click', images_history_click_image, true);
|
||||
});
|
||||
|
||||
// same as load_txt2img_button.click() above
|
||||
/*
|
||||
var cls_btn = gradioApp().getElementById(tabname + '_images_history_gallery').querySelector("svg");
|
||||
if (cls_btn){
|
||||
cls_btn.addEventListener('click', function(){
|
||||
gradioApp().getElementById(tabname + '_images_history_renew_page').click();
|
||||
}, false);
|
||||
}*/
|
||||
|
||||
}
|
||||
});
|
||||
mutationObserver.observe( gradioApp(), { childList:true, subtree:true });
|
||||
|
||||
});
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
|
||||
|
||||
function start_training_textual_inversion(){
|
||||
requestProgress('ti')
|
||||
gradioApp().querySelector('#ti_error').innerHTML=''
|
||||
|
||||
return args_to_array(arguments)
|
||||
}
|
||||
@ -0,0 +1,173 @@
|
||||
import os.path
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
import multiprocessing
|
||||
import time
|
||||
import re
|
||||
|
||||
re_special = re.compile(r'([\\()])')
|
||||
|
||||
def get_deepbooru_tags(pil_image):
|
||||
"""
|
||||
This method is for running only one image at a time for simple use. Used to the img2img interrogate.
|
||||
"""
|
||||
from modules import shared # prevents circular reference
|
||||
|
||||
try:
|
||||
create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts())
|
||||
return get_tags_from_process(pil_image)
|
||||
finally:
|
||||
release_process()
|
||||
|
||||
|
||||
OPT_INCLUDE_RANKS = "include_ranks"
|
||||
def create_deepbooru_opts():
|
||||
from modules import shared
|
||||
|
||||
return {
|
||||
"use_spaces": shared.opts.deepbooru_use_spaces,
|
||||
"use_escape": shared.opts.deepbooru_escape,
|
||||
"alpha_sort": shared.opts.deepbooru_sort_alpha,
|
||||
OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks,
|
||||
}
|
||||
|
||||
|
||||
def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts):
|
||||
model, tags = get_deepbooru_tags_model()
|
||||
while True: # while process is running, keep monitoring queue for new image
|
||||
pil_image = queue.get()
|
||||
if pil_image == "QUIT":
|
||||
break
|
||||
else:
|
||||
deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts)
|
||||
|
||||
|
||||
def create_deepbooru_process(threshold, deepbooru_opts):
|
||||
"""
|
||||
Creates deepbooru process. A queue is created to send images into the process. This enables multiple images
|
||||
to be processed in a row without reloading the model or creating a new process. To return the data, a shared
|
||||
dictionary is created to hold the tags created. To wait for tags to be returned, a value of -1 is assigned
|
||||
to the dictionary and the method adding the image to the queue should wait for this value to be updated with
|
||||
the tags.
|
||||
"""
|
||||
from modules import shared # prevents circular reference
|
||||
shared.deepbooru_process_manager = multiprocessing.Manager()
|
||||
shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue()
|
||||
shared.deepbooru_process_return = shared.deepbooru_process_manager.dict()
|
||||
shared.deepbooru_process_return["value"] = -1
|
||||
shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts))
|
||||
shared.deepbooru_process.start()
|
||||
|
||||
|
||||
def get_tags_from_process(image):
|
||||
from modules import shared
|
||||
|
||||
shared.deepbooru_process_return["value"] = -1
|
||||
shared.deepbooru_process_queue.put(image)
|
||||
while shared.deepbooru_process_return["value"] == -1:
|
||||
time.sleep(0.2)
|
||||
caption = shared.deepbooru_process_return["value"]
|
||||
shared.deepbooru_process_return["value"] = -1
|
||||
|
||||
return caption
|
||||
|
||||
|
||||
def release_process():
|
||||
"""
|
||||
Stops the deepbooru process to return used memory
|
||||
"""
|
||||
from modules import shared # prevents circular reference
|
||||
shared.deepbooru_process_queue.put("QUIT")
|
||||
shared.deepbooru_process.join()
|
||||
shared.deepbooru_process_queue = None
|
||||
shared.deepbooru_process = None
|
||||
shared.deepbooru_process_return = None
|
||||
shared.deepbooru_process_manager = None
|
||||
|
||||
def get_deepbooru_tags_model():
|
||||
import deepdanbooru as dd
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
this_folder = os.path.dirname(__file__)
|
||||
model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
|
||||
if not os.path.exists(os.path.join(model_path, 'project.json')):
|
||||
# there is no point importing these every time
|
||||
import zipfile
|
||||
from basicsr.utils.download_util import load_file_from_url
|
||||
load_file_from_url(
|
||||
r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
|
||||
model_path)
|
||||
with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
|
||||
zip_ref.extractall(model_path)
|
||||
os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))
|
||||
|
||||
tags = dd.project.load_tags_from_project(model_path)
|
||||
model = dd.project.load_model_from_project(
|
||||
model_path, compile_model=False
|
||||
)
|
||||
return model, tags
|
||||
|
||||
|
||||
def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts):
|
||||
import deepdanbooru as dd
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
alpha_sort = deepbooru_opts['alpha_sort']
|
||||
use_spaces = deepbooru_opts['use_spaces']
|
||||
use_escape = deepbooru_opts['use_escape']
|
||||
include_ranks = deepbooru_opts['include_ranks']
|
||||
|
||||
width = model.input_shape[2]
|
||||
height = model.input_shape[1]
|
||||
image = np.array(pil_image)
|
||||
image = tf.image.resize(
|
||||
image,
|
||||
size=(height, width),
|
||||
method=tf.image.ResizeMethod.AREA,
|
||||
preserve_aspect_ratio=True,
|
||||
)
|
||||
image = image.numpy() # EagerTensor to np.array
|
||||
image = dd.image.transform_and_pad_image(image, width, height)
|
||||
image = image / 255.0
|
||||
image_shape = image.shape
|
||||
image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
|
||||
|
||||
y = model.predict(image)[0]
|
||||
|
||||
result_dict = {}
|
||||
|
||||
for i, tag in enumerate(tags):
|
||||
result_dict[tag] = y[i]
|
||||
|
||||
unsorted_tags_in_theshold = []
|
||||
result_tags_print = []
|
||||
for tag in tags:
|
||||
if result_dict[tag] >= threshold:
|
||||
if tag.startswith("rating:"):
|
||||
continue
|
||||
unsorted_tags_in_theshold.append((result_dict[tag], tag))
|
||||
result_tags_print.append(f'{result_dict[tag]} {tag}')
|
||||
|
||||
# sort tags
|
||||
result_tags_out = []
|
||||
sort_ndx = 0
|
||||
if alpha_sort:
|
||||
sort_ndx = 1
|
||||
|
||||
# sort by reverse by likelihood and normal for alpha, and format tag text as requested
|
||||
unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort))
|
||||
for weight, tag in unsorted_tags_in_theshold:
|
||||
# note: tag_outformat will still have a colon if include_ranks is True
|
||||
tag_outformat = tag.replace(':', ' ')
|
||||
if use_spaces:
|
||||
tag_outformat = tag_outformat.replace('_', ' ')
|
||||
if use_escape:
|
||||
tag_outformat = re.sub(re_special, r'\\\1', tag_outformat)
|
||||
if include_ranks:
|
||||
tag_outformat = f"({tag_outformat}:{weight:.3f})"
|
||||
|
||||
result_tags_out.append(tag_outformat)
|
||||
|
||||
print('\n'.join(sorted(result_tags_print, reverse=True)))
|
||||
|
||||
return ', '.join(result_tags_out)
|
||||
@ -0,0 +1,347 @@
|
||||
import datetime
|
||||
import glob
|
||||
import html
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
import tqdm
|
||||
import csv
|
||||
|
||||
import torch
|
||||
|
||||
from ldm.util import default
|
||||
from modules import devices, shared, processing, sd_models
|
||||
import torch
|
||||
from torch import einsum
|
||||
from einops import rearrange, repeat
|
||||
import modules.textual_inversion.dataset
|
||||
from modules.textual_inversion import textual_inversion
|
||||
from modules.textual_inversion.learn_schedule import LearnRateScheduler
|
||||
|
||||
|
||||
class HypernetworkModule(torch.nn.Module):
|
||||
multiplier = 1.0
|
||||
|
||||
def __init__(self, dim, state_dict=None):
|
||||
super().__init__()
|
||||
|
||||
self.linear1 = torch.nn.Linear(dim, dim * 2)
|
||||
self.linear2 = torch.nn.Linear(dim * 2, dim)
|
||||
|
||||
if state_dict is not None:
|
||||
self.load_state_dict(state_dict, strict=True)
|
||||
else:
|
||||
|
||||
self.linear1.weight.data.normal_(mean=0.0, std=0.01)
|
||||
self.linear1.bias.data.zero_()
|
||||
self.linear2.weight.data.normal_(mean=0.0, std=0.01)
|
||||
self.linear2.bias.data.zero_()
|
||||
|
||||
self.to(devices.device)
|
||||
|
||||
def forward(self, x):
|
||||
return x + (self.linear2(self.linear1(x))) * self.multiplier
|
||||
|
||||
|
||||
def apply_strength(value=None):
|
||||
HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength
|
||||
|
||||
|
||||
class Hypernetwork:
|
||||
filename = None
|
||||
name = None
|
||||
|
||||
def __init__(self, name=None, enable_sizes=None):
|
||||
self.filename = None
|
||||
self.name = name
|
||||
self.layers = {}
|
||||
self.step = 0
|
||||
self.sd_checkpoint = None
|
||||
self.sd_checkpoint_name = None
|
||||
|
||||
for size in enable_sizes or []:
|
||||
self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size))
|
||||
|
||||
def weights(self):
|
||||
res = []
|
||||
|
||||
for k, layers in self.layers.items():
|
||||
for layer in layers:
|
||||
layer.train()
|
||||
res += [layer.linear1.weight, layer.linear1.bias, layer.linear2.weight, layer.linear2.bias]
|
||||
|
||||
return res
|
||||
|
||||
def save(self, filename):
|
||||
state_dict = {}
|
||||
|
||||
for k, v in self.layers.items():
|
||||
state_dict[k] = (v[0].state_dict(), v[1].state_dict())
|
||||
|
||||
state_dict['step'] = self.step
|
||||
state_dict['name'] = self.name
|
||||
state_dict['sd_checkpoint'] = self.sd_checkpoint
|
||||
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
|
||||
|
||||
torch.save(state_dict, filename)
|
||||
|
||||
def load(self, filename):
|
||||
self.filename = filename
|
||||
if self.name is None:
|
||||
self.name = os.path.splitext(os.path.basename(filename))[0]
|
||||
|
||||
state_dict = torch.load(filename, map_location='cpu')
|
||||
|
||||
for size, sd in state_dict.items():
|
||||
if type(size) == int:
|
||||
self.layers[size] = (HypernetworkModule(size, sd[0]), HypernetworkModule(size, sd[1]))
|
||||
|
||||
self.name = state_dict.get('name', self.name)
|
||||
self.step = state_dict.get('step', 0)
|
||||
self.sd_checkpoint = state_dict.get('sd_checkpoint', None)
|
||||
self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None)
|
||||
|
||||
|
||||
def list_hypernetworks(path):
|
||||
res = {}
|
||||
for filename in glob.iglob(os.path.join(path, '**/*.pt'), recursive=True):
|
||||
name = os.path.splitext(os.path.basename(filename))[0]
|
||||
res[name] = filename
|
||||
return res
|
||||
|
||||
|
||||
def load_hypernetwork(filename):
|
||||
path = shared.hypernetworks.get(filename, None)
|
||||
if path is not None:
|
||||
print(f"Loading hypernetwork {filename}")
|
||||
try:
|
||||
shared.loaded_hypernetwork = Hypernetwork()
|
||||
shared.loaded_hypernetwork.load(path)
|
||||
|
||||
except Exception:
|
||||
print(f"Error loading hypernetwork {path}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
else:
|
||||
if shared.loaded_hypernetwork is not None:
|
||||
print(f"Unloading hypernetwork")
|
||||
|
||||
shared.loaded_hypernetwork = None
|
||||
|
||||
|
||||
def find_closest_hypernetwork_name(search: str):
|
||||
if not search:
|
||||
return None
|
||||
search = search.lower()
|
||||
applicable = [name for name in shared.hypernetworks if search in name.lower()]
|
||||
if not applicable:
|
||||
return None
|
||||
applicable = sorted(applicable, key=lambda name: len(name))
|
||||
return applicable[0]
|
||||
|
||||
|
||||
def apply_hypernetwork(hypernetwork, context, layer=None):
|
||||
hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None)
|
||||
|
||||
if hypernetwork_layers is None:
|
||||
return context, context
|
||||
|
||||
if layer is not None:
|
||||
layer.hyper_k = hypernetwork_layers[0]
|
||||
layer.hyper_v = hypernetwork_layers[1]
|
||||
|
||||
context_k = hypernetwork_layers[0](context)
|
||||
context_v = hypernetwork_layers[1](context)
|
||||
return context_k, context_v
|
||||
|
||||
|
||||
def attention_CrossAttention_forward(self, x, context=None, mask=None):
|
||||
h = self.heads
|
||||
|
||||
q = self.to_q(x)
|
||||
context = default(context, x)
|
||||
|
||||
context_k, context_v = apply_hypernetwork(shared.loaded_hypernetwork, context, self)
|
||||
k = self.to_k(context_k)
|
||||
v = self.to_v(context_v)
|
||||
|
||||
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))
|
||||
|
||||
sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
|
||||
|
||||
if mask is not None:
|
||||
mask = rearrange(mask, 'b ... -> b (...)')
|
||||
max_neg_value = -torch.finfo(sim.dtype).max
|
||||
mask = repeat(mask, 'b j -> (b h) () j', h=h)
|
||||
sim.masked_fill_(~mask, max_neg_value)
|
||||
|
||||
# attention, what we cannot get enough of
|
||||
attn = sim.softmax(dim=-1)
|
||||
|
||||
out = einsum('b i j, b j d -> b i d', attn, v)
|
||||
out = rearrange(out, '(b h) n d -> b n (h d)', h=h)
|
||||
return self.to_out(out)
|
||||
|
||||
|
||||
def stack_conds(conds):
|
||||
if len(conds) == 1:
|
||||
return torch.stack(conds)
|
||||
|
||||
# same as in reconstruct_multicond_batch
|
||||
token_count = max([x.shape[0] for x in conds])
|
||||
for i in range(len(conds)):
|
||||
if conds[i].shape[0] != token_count:
|
||||
last_vector = conds[i][-1:]
|
||||
last_vector_repeated = last_vector.repeat([token_count - conds[i].shape[0], 1])
|
||||
conds[i] = torch.vstack([conds[i], last_vector_repeated])
|
||||
|
||||
return torch.stack(conds)
|
||||
|
||||
def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
|
||||
assert hypernetwork_name, 'hypernetwork not selected'
|
||||
|
||||
path = shared.hypernetworks.get(hypernetwork_name, None)
|
||||
shared.loaded_hypernetwork = Hypernetwork()
|
||||
shared.loaded_hypernetwork.load(path)
|
||||
|
||||
shared.state.textinfo = "Initializing hypernetwork training..."
|
||||
shared.state.job_count = steps
|
||||
|
||||
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
|
||||
|
||||
log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), hypernetwork_name)
|
||||
unload = shared.opts.unload_models_when_training
|
||||
|
||||
if save_hypernetwork_every > 0:
|
||||
hypernetwork_dir = os.path.join(log_directory, "hypernetworks")
|
||||
os.makedirs(hypernetwork_dir, exist_ok=True)
|
||||
else:
|
||||
hypernetwork_dir = None
|
||||
|
||||
if create_image_every > 0:
|
||||
images_dir = os.path.join(log_directory, "images")
|
||||
os.makedirs(images_dir, exist_ok=True)
|
||||
else:
|
||||
images_dir = None
|
||||
|
||||
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
||||
with torch.autocast("cuda"):
|
||||
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size)
|
||||
|
||||
if unload:
|
||||
shared.sd_model.cond_stage_model.to(devices.cpu)
|
||||
shared.sd_model.first_stage_model.to(devices.cpu)
|
||||
|
||||
hypernetwork = shared.loaded_hypernetwork
|
||||
weights = hypernetwork.weights()
|
||||
for weight in weights:
|
||||
weight.requires_grad = True
|
||||
|
||||
losses = torch.zeros((32,))
|
||||
|
||||
last_saved_file = "<none>"
|
||||
last_saved_image = "<none>"
|
||||
|
||||
ititial_step = hypernetwork.step or 0
|
||||
if ititial_step > steps:
|
||||
return hypernetwork, filename
|
||||
|
||||
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
|
||||
optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
|
||||
|
||||
pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step)
|
||||
for i, entries in pbar:
|
||||
hypernetwork.step = i + ititial_step
|
||||
|
||||
scheduler.apply(optimizer, hypernetwork.step)
|
||||
if scheduler.finished:
|
||||
break
|
||||
|
||||
if shared.state.interrupted:
|
||||
break
|
||||
|
||||
with torch.autocast("cuda"):
|
||||
c = stack_conds([entry.cond for entry in entries]).to(devices.device)
|
||||
# c = torch.vstack([entry.cond for entry in entries]).to(devices.device)
|
||||
x = torch.stack([entry.latent for entry in entries]).to(devices.device)
|
||||
loss = shared.sd_model(x, c)[0]
|
||||
del x
|
||||
del c
|
||||
|
||||
losses[hypernetwork.step % losses.shape[0]] = loss.item()
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
pbar.set_description(f"loss: {losses.mean():.7f}")
|
||||
|
||||
if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0:
|
||||
last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt')
|
||||
hypernetwork.save(last_saved_file)
|
||||
|
||||
textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), {
|
||||
"loss": f"{losses.mean():.7f}",
|
||||
"learn_rate": scheduler.learn_rate
|
||||
})
|
||||
|
||||
if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0:
|
||||
last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png')
|
||||
|
||||
optimizer.zero_grad()
|
||||
shared.sd_model.cond_stage_model.to(devices.device)
|
||||
shared.sd_model.first_stage_model.to(devices.device)
|
||||
|
||||
p = processing.StableDiffusionProcessingTxt2Img(
|
||||
sd_model=shared.sd_model,
|
||||
do_not_save_grid=True,
|
||||
do_not_save_samples=True,
|
||||
)
|
||||
|
||||
if preview_from_txt2img:
|
||||
p.prompt = preview_prompt
|
||||
p.negative_prompt = preview_negative_prompt
|
||||
p.steps = preview_steps
|
||||
p.sampler_index = preview_sampler_index
|
||||
p.cfg_scale = preview_cfg_scale
|
||||
p.seed = preview_seed
|
||||
p.width = preview_width
|
||||
p.height = preview_height
|
||||
else:
|
||||
p.prompt = entries[0].cond_text
|
||||
p.steps = 20
|
||||
|
||||
preview_text = p.prompt
|
||||
|
||||
processed = processing.process_images(p)
|
||||
image = processed.images[0] if len(processed.images)>0 else None
|
||||
|
||||
if unload:
|
||||
shared.sd_model.cond_stage_model.to(devices.cpu)
|
||||
shared.sd_model.first_stage_model.to(devices.cpu)
|
||||
|
||||
if image is not None:
|
||||
shared.state.current_image = image
|
||||
image.save(last_saved_image)
|
||||
last_saved_image += f", prompt: {preview_text}"
|
||||
|
||||
shared.state.job_no = hypernetwork.step
|
||||
|
||||
shared.state.textinfo = f"""
|
||||
<p>
|
||||
Loss: {losses.mean():.7f}<br/>
|
||||
Step: {hypernetwork.step}<br/>
|
||||
Last prompt: {html.escape(entries[0].cond_text)}<br/>
|
||||
Last saved embedding: {html.escape(last_saved_file)}<br/>
|
||||
Last saved image: {html.escape(last_saved_image)}<br/>
|
||||
</p>
|
||||
"""
|
||||
|
||||
checkpoint = sd_models.select_checkpoint()
|
||||
|
||||
hypernetwork.sd_checkpoint = checkpoint.hash
|
||||
hypernetwork.sd_checkpoint_name = checkpoint.model_name
|
||||
hypernetwork.save(filename)
|
||||
|
||||
return hypernetwork, filename
|
||||
|
||||
|
||||
@ -0,0 +1,47 @@
|
||||
import html
|
||||
import os
|
||||
|
||||
import gradio as gr
|
||||
|
||||
import modules.textual_inversion.textual_inversion
|
||||
import modules.textual_inversion.preprocess
|
||||
from modules import sd_hijack, shared, devices
|
||||
from modules.hypernetworks import hypernetwork
|
||||
|
||||
|
||||
def create_hypernetwork(name, enable_sizes):
|
||||
fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt")
|
||||
assert not os.path.exists(fn), f"file {fn} already exists"
|
||||
|
||||
hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(name=name, enable_sizes=[int(x) for x in enable_sizes])
|
||||
hypernet.save(fn)
|
||||
|
||||
shared.reload_hypernetworks()
|
||||
|
||||
return gr.Dropdown.update(choices=sorted([x for x in shared.hypernetworks.keys()])), f"Created: {fn}", ""
|
||||
|
||||
|
||||
def train_hypernetwork(*args):
|
||||
|
||||
initial_hypernetwork = shared.loaded_hypernetwork
|
||||
|
||||
assert not shared.cmd_opts.lowvram, 'Training models with lowvram is not possible'
|
||||
|
||||
try:
|
||||
sd_hijack.undo_optimizations()
|
||||
|
||||
hypernetwork, filename = modules.hypernetworks.hypernetwork.train_hypernetwork(*args)
|
||||
|
||||
res = f"""
|
||||
Training {'interrupted' if shared.state.interrupted else 'finished'} at {hypernetwork.step} steps.
|
||||
Hypernetwork saved to {html.escape(filename)}
|
||||
"""
|
||||
return res, ""
|
||||
except Exception:
|
||||
raise
|
||||
finally:
|
||||
shared.loaded_hypernetwork = initial_hypernetwork
|
||||
shared.sd_model.cond_stage_model.to(devices.device)
|
||||
shared.sd_model.first_stage_model.to(devices.device)
|
||||
sd_hijack.apply_optimizations()
|
||||
|
||||
@ -0,0 +1,181 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
|
||||
def traverse_all_files(output_dir, image_list, curr_dir=None):
|
||||
curr_path = output_dir if curr_dir is None else os.path.join(output_dir, curr_dir)
|
||||
try:
|
||||
f_list = os.listdir(curr_path)
|
||||
except:
|
||||
if curr_dir[-10:].rfind(".") > 0 and curr_dir[-4:] != ".txt":
|
||||
image_list.append(curr_dir)
|
||||
return image_list
|
||||
for file in f_list:
|
||||
file = file if curr_dir is None else os.path.join(curr_dir, file)
|
||||
file_path = os.path.join(curr_path, file)
|
||||
if file[-4:] == ".txt":
|
||||
pass
|
||||
elif os.path.isfile(file_path) and file[-10:].rfind(".") > 0:
|
||||
image_list.append(file)
|
||||
else:
|
||||
image_list = traverse_all_files(output_dir, image_list, file)
|
||||
return image_list
|
||||
|
||||
|
||||
def get_recent_images(dir_name, page_index, step, image_index, tabname):
|
||||
page_index = int(page_index)
|
||||
f_list = os.listdir(dir_name)
|
||||
image_list = []
|
||||
image_list = traverse_all_files(dir_name, image_list)
|
||||
image_list = sorted(image_list, key=lambda file: -os.path.getctime(os.path.join(dir_name, file)))
|
||||
num = 48 if tabname != "extras" else 12
|
||||
max_page_index = len(image_list) // num + 1
|
||||
page_index = max_page_index if page_index == -1 else page_index + step
|
||||
page_index = 1 if page_index < 1 else page_index
|
||||
page_index = max_page_index if page_index > max_page_index else page_index
|
||||
idx_frm = (page_index - 1) * num
|
||||
image_list = image_list[idx_frm:idx_frm + num]
|
||||
image_index = int(image_index)
|
||||
if image_index < 0 or image_index > len(image_list) - 1:
|
||||
current_file = None
|
||||
hidden = None
|
||||
else:
|
||||
current_file = image_list[int(image_index)]
|
||||
hidden = os.path.join(dir_name, current_file)
|
||||
return [os.path.join(dir_name, file) for file in image_list], page_index, image_list, current_file, hidden, ""
|
||||
|
||||
|
||||
def first_page_click(dir_name, page_index, image_index, tabname):
|
||||
return get_recent_images(dir_name, 1, 0, image_index, tabname)
|
||||
|
||||
|
||||
def end_page_click(dir_name, page_index, image_index, tabname):
|
||||
return get_recent_images(dir_name, -1, 0, image_index, tabname)
|
||||
|
||||
|
||||
def prev_page_click(dir_name, page_index, image_index, tabname):
|
||||
return get_recent_images(dir_name, page_index, -1, image_index, tabname)
|
||||
|
||||
|
||||
def next_page_click(dir_name, page_index, image_index, tabname):
|
||||
return get_recent_images(dir_name, page_index, 1, image_index, tabname)
|
||||
|
||||
|
||||
def page_index_change(dir_name, page_index, image_index, tabname):
|
||||
return get_recent_images(dir_name, page_index, 0, image_index, tabname)
|
||||
|
||||
|
||||
def show_image_info(num, image_path, filenames):
|
||||
# print(f"select image {num}")
|
||||
file = filenames[int(num)]
|
||||
return file, num, os.path.join(image_path, file)
|
||||
|
||||
|
||||
def delete_image(delete_num, tabname, dir_name, name, page_index, filenames, image_index):
|
||||
if name == "":
|
||||
return filenames, delete_num
|
||||
else:
|
||||
delete_num = int(delete_num)
|
||||
index = list(filenames).index(name)
|
||||
i = 0
|
||||
new_file_list = []
|
||||
for name in filenames:
|
||||
if i >= index and i < index + delete_num:
|
||||
path = os.path.join(dir_name, name)
|
||||
if os.path.exists(path):
|
||||
print(f"Delete file {path}")
|
||||
os.remove(path)
|
||||
txt_file = os.path.splitext(path)[0] + ".txt"
|
||||
if os.path.exists(txt_file):
|
||||
os.remove(txt_file)
|
||||
else:
|
||||
print(f"Not exists file {path}")
|
||||
else:
|
||||
new_file_list.append(name)
|
||||
i += 1
|
||||
return new_file_list, 1
|
||||
|
||||
|
||||
def show_images_history(gr, opts, tabname, run_pnginfo, switch_dict):
|
||||
if opts.outdir_samples != "":
|
||||
dir_name = opts.outdir_samples
|
||||
elif tabname == "txt2img":
|
||||
dir_name = opts.outdir_txt2img_samples
|
||||
elif tabname == "img2img":
|
||||
dir_name = opts.outdir_img2img_samples
|
||||
elif tabname == "extras":
|
||||
dir_name = opts.outdir_extras_samples
|
||||
d = dir_name.split("/")
|
||||
dir_name = "/" if dir_name.startswith("/") else d[0]
|
||||
for p in d[1:]:
|
||||
dir_name = os.path.join(dir_name, p)
|
||||
with gr.Row():
|
||||
renew_page = gr.Button('Renew Page', elem_id=tabname + "_images_history_renew_page")
|
||||
first_page = gr.Button('First Page')
|
||||
prev_page = gr.Button('Prev Page')
|
||||
page_index = gr.Number(value=1, label="Page Index")
|
||||
next_page = gr.Button('Next Page')
|
||||
end_page = gr.Button('End Page')
|
||||
with gr.Row(elem_id=tabname + "_images_history"):
|
||||
with gr.Row():
|
||||
with gr.Column(scale=2):
|
||||
history_gallery = gr.Gallery(show_label=False, elem_id=tabname + "_images_history_gallery").style(grid=6)
|
||||
with gr.Row():
|
||||
delete_num = gr.Number(value=1, interactive=True, label="number of images to delete consecutively next")
|
||||
delete = gr.Button('Delete', elem_id=tabname + "_images_history_del_button")
|
||||
with gr.Column():
|
||||
with gr.Row():
|
||||
pnginfo_send_to_txt2img = gr.Button('Send to txt2img')
|
||||
pnginfo_send_to_img2img = gr.Button('Send to img2img')
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
img_file_info = gr.Textbox(label="Generate Info", interactive=False)
|
||||
img_file_name = gr.Textbox(label="File Name", interactive=False)
|
||||
with gr.Row():
|
||||
# hiden items
|
||||
|
||||
img_path = gr.Textbox(dir_name.rstrip("/"), visible=False)
|
||||
tabname_box = gr.Textbox(tabname, visible=False)
|
||||
image_index = gr.Textbox(value=-1, visible=False)
|
||||
set_index = gr.Button('set_index', elem_id=tabname + "_images_history_set_index", visible=False)
|
||||
filenames = gr.State()
|
||||
hidden = gr.Image(type="pil", visible=False)
|
||||
info1 = gr.Textbox(visible=False)
|
||||
info2 = gr.Textbox(visible=False)
|
||||
|
||||
# turn pages
|
||||
gallery_inputs = [img_path, page_index, image_index, tabname_box]
|
||||
gallery_outputs = [history_gallery, page_index, filenames, img_file_name, hidden, img_file_name]
|
||||
|
||||
first_page.click(first_page_click, _js="images_history_turnpage", inputs=gallery_inputs, outputs=gallery_outputs)
|
||||
next_page.click(next_page_click, _js="images_history_turnpage", inputs=gallery_inputs, outputs=gallery_outputs)
|
||||
prev_page.click(prev_page_click, _js="images_history_turnpage", inputs=gallery_inputs, outputs=gallery_outputs)
|
||||
end_page.click(end_page_click, _js="images_history_turnpage", inputs=gallery_inputs, outputs=gallery_outputs)
|
||||
page_index.submit(page_index_change, _js="images_history_turnpage", inputs=gallery_inputs, outputs=gallery_outputs)
|
||||
renew_page.click(page_index_change, _js="images_history_turnpage", inputs=gallery_inputs, outputs=gallery_outputs)
|
||||
# page_index.change(page_index_change, inputs=[tabname_box, img_path, page_index], outputs=[history_gallery, page_index])
|
||||
|
||||
# other funcitons
|
||||
set_index.click(show_image_info, _js="images_history_get_current_img", inputs=[tabname_box, img_path, filenames], outputs=[img_file_name, image_index, hidden])
|
||||
img_file_name.change(fn=None, _js="images_history_enable_del_buttons", inputs=None, outputs=None)
|
||||
delete.click(delete_image, _js="images_history_delete", inputs=[delete_num, tabname_box, img_path, img_file_name, page_index, filenames, image_index], outputs=[filenames, delete_num])
|
||||
hidden.change(fn=run_pnginfo, inputs=[hidden], outputs=[info1, img_file_info, info2])
|
||||
|
||||
# pnginfo.click(fn=run_pnginfo, inputs=[hidden], outputs=[info1, img_file_info, info2])
|
||||
switch_dict["fn"](pnginfo_send_to_txt2img, switch_dict["t2i"], img_file_info, 'switch_to_txt2img')
|
||||
switch_dict["fn"](pnginfo_send_to_img2img, switch_dict["i2i"], img_file_info, 'switch_to_img2img_img2img')
|
||||
|
||||
|
||||
def create_history_tabs(gr, opts, run_pnginfo, switch_dict):
|
||||
with gr.Blocks(analytics_enabled=False) as images_history:
|
||||
with gr.Tabs() as tabs:
|
||||
with gr.Tab("txt2img history"):
|
||||
with gr.Blocks(analytics_enabled=False) as images_history_txt2img:
|
||||
show_images_history(gr, opts, "txt2img", run_pnginfo, switch_dict)
|
||||
with gr.Tab("img2img history"):
|
||||
with gr.Blocks(analytics_enabled=False) as images_history_img2img:
|
||||
show_images_history(gr, opts, "img2img", run_pnginfo, switch_dict)
|
||||
with gr.Tab("extras history"):
|
||||
with gr.Blocks(analytics_enabled=False) as images_history_img2img:
|
||||
show_images_history(gr, opts, "extras", run_pnginfo, switch_dict)
|
||||
return images_history
|
||||
@ -0,0 +1,15 @@
|
||||
from pyngrok import ngrok, conf, exception
|
||||
|
||||
|
||||
def connect(token, port):
|
||||
if token == None:
|
||||
token = 'None'
|
||||
conf.get_default().auth_token = token
|
||||
try:
|
||||
public_url = ngrok.connect(port).public_url
|
||||
except exception.PyngrokNgrokError:
|
||||
print(f'Invalid ngrok authtoken, ngrok connection aborted.\n'
|
||||
f'Your token: {token}, get the right one on https://dashboard.ngrok.com/get-started/your-authtoken')
|
||||
else:
|
||||
print(f'ngrok connected to localhost:{port}! URL: {public_url}\n'
|
||||
'You can use this link after the launch is complete.')
|
||||
@ -0,0 +1,117 @@
|
||||
# this code is adapted from the script contributed by anon from /h/
|
||||
|
||||
import io
|
||||
import pickle
|
||||
import collections
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import torch
|
||||
import numpy
|
||||
import _codecs
|
||||
import zipfile
|
||||
import re
|
||||
|
||||
|
||||
# PyTorch 1.13 and later have _TypedStorage renamed to TypedStorage
|
||||
TypedStorage = torch.storage.TypedStorage if hasattr(torch.storage, 'TypedStorage') else torch.storage._TypedStorage
|
||||
|
||||
|
||||
def encode(*args):
|
||||
out = _codecs.encode(*args)
|
||||
return out
|
||||
|
||||
|
||||
class RestrictedUnpickler(pickle.Unpickler):
|
||||
def persistent_load(self, saved_id):
|
||||
assert saved_id[0] == 'storage'
|
||||
return TypedStorage()
|
||||
|
||||
def find_class(self, module, name):
|
||||
if module == 'collections' and name == 'OrderedDict':
|
||||
return getattr(collections, name)
|
||||
if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']:
|
||||
return getattr(torch._utils, name)
|
||||
if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage']:
|
||||
return getattr(torch, name)
|
||||
if module == 'torch.nn.modules.container' and name in ['ParameterDict']:
|
||||
return getattr(torch.nn.modules.container, name)
|
||||
if module == 'numpy.core.multiarray' and name == 'scalar':
|
||||
return numpy.core.multiarray.scalar
|
||||
if module == 'numpy' and name == 'dtype':
|
||||
return numpy.dtype
|
||||
if module == '_codecs' and name == 'encode':
|
||||
return encode
|
||||
if module == "pytorch_lightning.callbacks" and name == 'model_checkpoint':
|
||||
import pytorch_lightning.callbacks
|
||||
return pytorch_lightning.callbacks.model_checkpoint
|
||||
if module == "pytorch_lightning.callbacks.model_checkpoint" and name == 'ModelCheckpoint':
|
||||
import pytorch_lightning.callbacks.model_checkpoint
|
||||
return pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint
|
||||
if module == "__builtin__" and name == 'set':
|
||||
return set
|
||||
|
||||
# Forbid everything else.
|
||||
raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden")
|
||||
|
||||
|
||||
allowed_zip_names = ["archive/data.pkl", "archive/version"]
|
||||
allowed_zip_names_re = re.compile(r"^archive/data/\d+$")
|
||||
|
||||
|
||||
def check_zip_filenames(filename, names):
|
||||
for name in names:
|
||||
if name in allowed_zip_names:
|
||||
continue
|
||||
if allowed_zip_names_re.match(name):
|
||||
continue
|
||||
|
||||
raise Exception(f"bad file inside {filename}: {name}")
|
||||
|
||||
|
||||
def check_pt(filename):
|
||||
try:
|
||||
|
||||
# new pytorch format is a zip file
|
||||
with zipfile.ZipFile(filename) as z:
|
||||
check_zip_filenames(filename, z.namelist())
|
||||
|
||||
with z.open('archive/data.pkl') as file:
|
||||
unpickler = RestrictedUnpickler(file)
|
||||
unpickler.load()
|
||||
|
||||
except zipfile.BadZipfile:
|
||||
|
||||
# if it's not a zip file, it's an olf pytorch format, with five objects written to pickle
|
||||
with open(filename, "rb") as file:
|
||||
unpickler = RestrictedUnpickler(file)
|
||||
for i in range(5):
|
||||
unpickler.load()
|
||||
|
||||
|
||||
def load(filename, *args, **kwargs):
|
||||
from modules import shared
|
||||
|
||||
try:
|
||||
if not shared.cmd_opts.disable_safe_unpickle:
|
||||
check_pt(filename)
|
||||
|
||||
except pickle.UnpicklingError:
|
||||
print(f"Error verifying pickled file from {filename}:", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
print(f"-----> !!!! The file is most likely corrupted !!!! <-----", file=sys.stderr)
|
||||
print(f"You can skip this check with --disable-safe-unpickle commandline argument, but that is not going to help you.\n\n", file=sys.stderr)
|
||||
return None
|
||||
|
||||
except Exception:
|
||||
print(f"Error verifying pickled file from {filename}:", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
print(f"\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr)
|
||||
print(f"You can skip this check with --disable-safe-unpickle commandline argument.\n\n", file=sys.stderr)
|
||||
return None
|
||||
|
||||
return unsafe_torch_load(filename, *args, **kwargs)
|
||||
|
||||
|
||||
unsafe_torch_load = torch.load
|
||||
torch.load = load
|
||||
@ -0,0 +1,88 @@
|
||||
import os.path
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import PIL.Image
|
||||
import numpy as np
|
||||
import torch
|
||||
from basicsr.utils.download_util import load_file_from_url
|
||||
|
||||
import modules.upscaler
|
||||
from modules import devices, modelloader
|
||||
from modules.scunet_model_arch import SCUNet as net
|
||||
|
||||
|
||||
class UpscalerScuNET(modules.upscaler.Upscaler):
|
||||
def __init__(self, dirname):
|
||||
self.name = "ScuNET"
|
||||
self.model_name = "ScuNET GAN"
|
||||
self.model_name2 = "ScuNET PSNR"
|
||||
self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_gan.pth"
|
||||
self.model_url2 = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_psnr.pth"
|
||||
self.user_path = dirname
|
||||
super().__init__()
|
||||
model_paths = self.find_models(ext_filter=[".pth"])
|
||||
scalers = []
|
||||
add_model2 = True
|
||||
for file in model_paths:
|
||||
if "http" in file:
|
||||
name = self.model_name
|
||||
else:
|
||||
name = modelloader.friendly_name(file)
|
||||
if name == self.model_name2 or file == self.model_url2:
|
||||
add_model2 = False
|
||||
try:
|
||||
scaler_data = modules.upscaler.UpscalerData(name, file, self, 4)
|
||||
scalers.append(scaler_data)
|
||||
except Exception:
|
||||
print(f"Error loading ScuNET model: {file}", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
if add_model2:
|
||||
scaler_data2 = modules.upscaler.UpscalerData(self.model_name2, self.model_url2, self)
|
||||
scalers.append(scaler_data2)
|
||||
self.scalers = scalers
|
||||
|
||||
def do_upscale(self, img: PIL.Image, selected_file):
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
model = self.load_model(selected_file)
|
||||
if model is None:
|
||||
return img
|
||||
|
||||
device = devices.device_scunet
|
||||
img = np.array(img)
|
||||
img = img[:, :, ::-1]
|
||||
img = np.moveaxis(img, 2, 0) / 255
|
||||
img = torch.from_numpy(img).float()
|
||||
img = img.unsqueeze(0).to(device)
|
||||
|
||||
img = img.to(device)
|
||||
with torch.no_grad():
|
||||
output = model(img)
|
||||
output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
|
||||
output = 255. * np.moveaxis(output, 0, 2)
|
||||
output = output.astype(np.uint8)
|
||||
output = output[:, :, ::-1]
|
||||
torch.cuda.empty_cache()
|
||||
return PIL.Image.fromarray(output, 'RGB')
|
||||
|
||||
def load_model(self, path: str):
|
||||
device = devices.device_scunet
|
||||
if "http" in path:
|
||||
filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="%s.pth" % self.name,
|
||||
progress=True)
|
||||
else:
|
||||
filename = path
|
||||
if not os.path.exists(os.path.join(self.model_path, filename)) or filename is None:
|
||||
print(f"ScuNET: Unable to load model from {filename}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
model = net(in_nc=3, config=[4, 4, 4, 4, 4, 4, 4], dim=64)
|
||||
model.load_state_dict(torch.load(filename), strict=True)
|
||||
model.eval()
|
||||
for k, v in model.named_parameters():
|
||||
v.requires_grad = False
|
||||
model = model.to(device)
|
||||
|
||||
return model
|
||||
|
||||
@ -0,0 +1,265 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from einops import rearrange
|
||||
from einops.layers.torch import Rearrange
|
||||
from timm.models.layers import trunc_normal_, DropPath
|
||||
|
||||
|
||||
class WMSA(nn.Module):
|
||||
""" Self-attention module in Swin Transformer
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, output_dim, head_dim, window_size, type):
|
||||
super(WMSA, self).__init__()
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.head_dim = head_dim
|
||||
self.scale = self.head_dim ** -0.5
|
||||
self.n_heads = input_dim // head_dim
|
||||
self.window_size = window_size
|
||||
self.type = type
|
||||
self.embedding_layer = nn.Linear(self.input_dim, 3 * self.input_dim, bias=True)
|
||||
|
||||
self.relative_position_params = nn.Parameter(
|
||||
torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads))
|
||||
|
||||
self.linear = nn.Linear(self.input_dim, self.output_dim)
|
||||
|
||||
trunc_normal_(self.relative_position_params, std=.02)
|
||||
self.relative_position_params = torch.nn.Parameter(
|
||||
self.relative_position_params.view(2 * window_size - 1, 2 * window_size - 1, self.n_heads).transpose(1,
|
||||
2).transpose(
|
||||
0, 1))
|
||||
|
||||
def generate_mask(self, h, w, p, shift):
|
||||
""" generating the mask of SW-MSA
|
||||
Args:
|
||||
shift: shift parameters in CyclicShift.
|
||||
Returns:
|
||||
attn_mask: should be (1 1 w p p),
|
||||
"""
|
||||
# supporting square.
|
||||
attn_mask = torch.zeros(h, w, p, p, p, p, dtype=torch.bool, device=self.relative_position_params.device)
|
||||
if self.type == 'W':
|
||||
return attn_mask
|
||||
|
||||
s = p - shift
|
||||
attn_mask[-1, :, :s, :, s:, :] = True
|
||||
attn_mask[-1, :, s:, :, :s, :] = True
|
||||
attn_mask[:, -1, :, :s, :, s:] = True
|
||||
attn_mask[:, -1, :, s:, :, :s] = True
|
||||
attn_mask = rearrange(attn_mask, 'w1 w2 p1 p2 p3 p4 -> 1 1 (w1 w2) (p1 p2) (p3 p4)')
|
||||
return attn_mask
|
||||
|
||||
def forward(self, x):
|
||||
""" Forward pass of Window Multi-head Self-attention module.
|
||||
Args:
|
||||
x: input tensor with shape of [b h w c];
|
||||
attn_mask: attention mask, fill -inf where the value is True;
|
||||
Returns:
|
||||
output: tensor shape [b h w c]
|
||||
"""
|
||||
if self.type != 'W': x = torch.roll(x, shifts=(-(self.window_size // 2), -(self.window_size // 2)), dims=(1, 2))
|
||||
x = rearrange(x, 'b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c', p1=self.window_size, p2=self.window_size)
|
||||
h_windows = x.size(1)
|
||||
w_windows = x.size(2)
|
||||
# square validation
|
||||
# assert h_windows == w_windows
|
||||
|
||||
x = rearrange(x, 'b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c', p1=self.window_size, p2=self.window_size)
|
||||
qkv = self.embedding_layer(x)
|
||||
q, k, v = rearrange(qkv, 'b nw np (threeh c) -> threeh b nw np c', c=self.head_dim).chunk(3, dim=0)
|
||||
sim = torch.einsum('hbwpc,hbwqc->hbwpq', q, k) * self.scale
|
||||
# Adding learnable relative embedding
|
||||
sim = sim + rearrange(self.relative_embedding(), 'h p q -> h 1 1 p q')
|
||||
# Using Attn Mask to distinguish different subwindows.
|
||||
if self.type != 'W':
|
||||
attn_mask = self.generate_mask(h_windows, w_windows, self.window_size, shift=self.window_size // 2)
|
||||
sim = sim.masked_fill_(attn_mask, float("-inf"))
|
||||
|
||||
probs = nn.functional.softmax(sim, dim=-1)
|
||||
output = torch.einsum('hbwij,hbwjc->hbwic', probs, v)
|
||||
output = rearrange(output, 'h b w p c -> b w p (h c)')
|
||||
output = self.linear(output)
|
||||
output = rearrange(output, 'b (w1 w2) (p1 p2) c -> b (w1 p1) (w2 p2) c', w1=h_windows, p1=self.window_size)
|
||||
|
||||
if self.type != 'W': output = torch.roll(output, shifts=(self.window_size // 2, self.window_size // 2),
|
||||
dims=(1, 2))
|
||||
return output
|
||||
|
||||
def relative_embedding(self):
|
||||
cord = torch.tensor(np.array([[i, j] for i in range(self.window_size) for j in range(self.window_size)]))
|
||||
relation = cord[:, None, :] - cord[None, :, :] + self.window_size - 1
|
||||
# negative is allowed
|
||||
return self.relative_position_params[:, relation[:, :, 0].long(), relation[:, :, 1].long()]
|
||||
|
||||
|
||||
class Block(nn.Module):
|
||||
def __init__(self, input_dim, output_dim, head_dim, window_size, drop_path, type='W', input_resolution=None):
|
||||
""" SwinTransformer Block
|
||||
"""
|
||||
super(Block, self).__init__()
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
assert type in ['W', 'SW']
|
||||
self.type = type
|
||||
if input_resolution <= window_size:
|
||||
self.type = 'W'
|
||||
|
||||
self.ln1 = nn.LayerNorm(input_dim)
|
||||
self.msa = WMSA(input_dim, input_dim, head_dim, window_size, self.type)
|
||||
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
|
||||
self.ln2 = nn.LayerNorm(input_dim)
|
||||
self.mlp = nn.Sequential(
|
||||
nn.Linear(input_dim, 4 * input_dim),
|
||||
nn.GELU(),
|
||||
nn.Linear(4 * input_dim, output_dim),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = x + self.drop_path(self.msa(self.ln1(x)))
|
||||
x = x + self.drop_path(self.mlp(self.ln2(x)))
|
||||
return x
|
||||
|
||||
|
||||
class ConvTransBlock(nn.Module):
|
||||
def __init__(self, conv_dim, trans_dim, head_dim, window_size, drop_path, type='W', input_resolution=None):
|
||||
""" SwinTransformer and Conv Block
|
||||
"""
|
||||
super(ConvTransBlock, self).__init__()
|
||||
self.conv_dim = conv_dim
|
||||
self.trans_dim = trans_dim
|
||||
self.head_dim = head_dim
|
||||
self.window_size = window_size
|
||||
self.drop_path = drop_path
|
||||
self.type = type
|
||||
self.input_resolution = input_resolution
|
||||
|
||||
assert self.type in ['W', 'SW']
|
||||
if self.input_resolution <= self.window_size:
|
||||
self.type = 'W'
|
||||
|
||||
self.trans_block = Block(self.trans_dim, self.trans_dim, self.head_dim, self.window_size, self.drop_path,
|
||||
self.type, self.input_resolution)
|
||||
self.conv1_1 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True)
|
||||
self.conv1_2 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True)
|
||||
|
||||
self.conv_block = nn.Sequential(
|
||||
nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False),
|
||||
nn.ReLU(True),
|
||||
nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
conv_x, trans_x = torch.split(self.conv1_1(x), (self.conv_dim, self.trans_dim), dim=1)
|
||||
conv_x = self.conv_block(conv_x) + conv_x
|
||||
trans_x = Rearrange('b c h w -> b h w c')(trans_x)
|
||||
trans_x = self.trans_block(trans_x)
|
||||
trans_x = Rearrange('b h w c -> b c h w')(trans_x)
|
||||
res = self.conv1_2(torch.cat((conv_x, trans_x), dim=1))
|
||||
x = x + res
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class SCUNet(nn.Module):
|
||||
# def __init__(self, in_nc=3, config=[2, 2, 2, 2, 2, 2, 2], dim=64, drop_path_rate=0.0, input_resolution=256):
|
||||
def __init__(self, in_nc=3, config=None, dim=64, drop_path_rate=0.0, input_resolution=256):
|
||||
super(SCUNet, self).__init__()
|
||||
if config is None:
|
||||
config = [2, 2, 2, 2, 2, 2, 2]
|
||||
self.config = config
|
||||
self.dim = dim
|
||||
self.head_dim = 32
|
||||
self.window_size = 8
|
||||
|
||||
# drop path rate for each layer
|
||||
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(config))]
|
||||
|
||||
self.m_head = [nn.Conv2d(in_nc, dim, 3, 1, 1, bias=False)]
|
||||
|
||||
begin = 0
|
||||
self.m_down1 = [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin],
|
||||
'W' if not i % 2 else 'SW', input_resolution)
|
||||
for i in range(config[0])] + \
|
||||
[nn.Conv2d(dim, 2 * dim, 2, 2, 0, bias=False)]
|
||||
|
||||
begin += config[0]
|
||||
self.m_down2 = [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin],
|
||||
'W' if not i % 2 else 'SW', input_resolution // 2)
|
||||
for i in range(config[1])] + \
|
||||
[nn.Conv2d(2 * dim, 4 * dim, 2, 2, 0, bias=False)]
|
||||
|
||||
begin += config[1]
|
||||
self.m_down3 = [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin],
|
||||
'W' if not i % 2 else 'SW', input_resolution // 4)
|
||||
for i in range(config[2])] + \
|
||||
[nn.Conv2d(4 * dim, 8 * dim, 2, 2, 0, bias=False)]
|
||||
|
||||
begin += config[2]
|
||||
self.m_body = [ConvTransBlock(4 * dim, 4 * dim, self.head_dim, self.window_size, dpr[i + begin],
|
||||
'W' if not i % 2 else 'SW', input_resolution // 8)
|
||||
for i in range(config[3])]
|
||||
|
||||
begin += config[3]
|
||||
self.m_up3 = [nn.ConvTranspose2d(8 * dim, 4 * dim, 2, 2, 0, bias=False), ] + \
|
||||
[ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin],
|
||||
'W' if not i % 2 else 'SW', input_resolution // 4)
|
||||
for i in range(config[4])]
|
||||
|
||||
begin += config[4]
|
||||
self.m_up2 = [nn.ConvTranspose2d(4 * dim, 2 * dim, 2, 2, 0, bias=False), ] + \
|
||||
[ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin],
|
||||
'W' if not i % 2 else 'SW', input_resolution // 2)
|
||||
for i in range(config[5])]
|
||||
|
||||
begin += config[5]
|
||||
self.m_up1 = [nn.ConvTranspose2d(2 * dim, dim, 2, 2, 0, bias=False), ] + \
|
||||
[ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin],
|
||||
'W' if not i % 2 else 'SW', input_resolution)
|
||||
for i in range(config[6])]
|
||||
|
||||
self.m_tail = [nn.Conv2d(dim, in_nc, 3, 1, 1, bias=False)]
|
||||
|
||||
self.m_head = nn.Sequential(*self.m_head)
|
||||
self.m_down1 = nn.Sequential(*self.m_down1)
|
||||
self.m_down2 = nn.Sequential(*self.m_down2)
|
||||
self.m_down3 = nn.Sequential(*self.m_down3)
|
||||
self.m_body = nn.Sequential(*self.m_body)
|
||||
self.m_up3 = nn.Sequential(*self.m_up3)
|
||||
self.m_up2 = nn.Sequential(*self.m_up2)
|
||||
self.m_up1 = nn.Sequential(*self.m_up1)
|
||||
self.m_tail = nn.Sequential(*self.m_tail)
|
||||
# self.apply(self._init_weights)
|
||||
|
||||
def forward(self, x0):
|
||||
|
||||
h, w = x0.size()[-2:]
|
||||
paddingBottom = int(np.ceil(h / 64) * 64 - h)
|
||||
paddingRight = int(np.ceil(w / 64) * 64 - w)
|
||||
x0 = nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x0)
|
||||
|
||||
x1 = self.m_head(x0)
|
||||
x2 = self.m_down1(x1)
|
||||
x3 = self.m_down2(x2)
|
||||
x4 = self.m_down3(x3)
|
||||
x = self.m_body(x4)
|
||||
x = self.m_up3(x + x4)
|
||||
x = self.m_up2(x + x3)
|
||||
x = self.m_up1(x + x2)
|
||||
x = self.m_tail(x + x1)
|
||||
|
||||
x = x[..., :h, :w]
|
||||
|
||||
return x
|
||||
|
||||
def _init_weights(self, m):
|
||||
if isinstance(m, nn.Linear):
|
||||
trunc_normal_(m.weight, std=.02)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
nn.init.constant_(m.bias, 0)
|
||||
nn.init.constant_(m.weight, 1.0)
|
||||
@ -0,0 +1,306 @@
|
||||
import math
|
||||
import sys
|
||||
import traceback
|
||||
import importlib
|
||||
|
||||
import torch
|
||||
from torch import einsum
|
||||
|
||||
from ldm.util import default
|
||||
from einops import rearrange
|
||||
|
||||
from modules import shared
|
||||
from modules.hypernetworks import hypernetwork
|
||||
|
||||
|
||||
if shared.cmd_opts.xformers or shared.cmd_opts.force_enable_xformers:
|
||||
try:
|
||||
import xformers.ops
|
||||
shared.xformers_available = True
|
||||
except Exception:
|
||||
print("Cannot import xformers", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
|
||||
|
||||
# see https://github.com/basujindal/stable-diffusion/pull/117 for discussion
|
||||
def split_cross_attention_forward_v1(self, x, context=None, mask=None):
|
||||
h = self.heads
|
||||
|
||||
q_in = self.to_q(x)
|
||||
context = default(context, x)
|
||||
|
||||
context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context)
|
||||
k_in = self.to_k(context_k)
|
||||
v_in = self.to_v(context_v)
|
||||
del context, context_k, context_v, x
|
||||
|
||||
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in))
|
||||
del q_in, k_in, v_in
|
||||
|
||||
r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device)
|
||||
for i in range(0, q.shape[0], 2):
|
||||
end = i + 2
|
||||
s1 = einsum('b i d, b j d -> b i j', q[i:end], k[i:end])
|
||||
s1 *= self.scale
|
||||
|
||||
s2 = s1.softmax(dim=-1)
|
||||
del s1
|
||||
|
||||
r1[i:end] = einsum('b i j, b j d -> b i d', s2, v[i:end])
|
||||
del s2
|
||||
del q, k, v
|
||||
|
||||
r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h)
|
||||
del r1
|
||||
|
||||
return self.to_out(r2)
|
||||
|
||||
|
||||
# taken from https://github.com/Doggettx/stable-diffusion and modified
|
||||
def split_cross_attention_forward(self, x, context=None, mask=None):
|
||||
h = self.heads
|
||||
|
||||
q_in = self.to_q(x)
|
||||
context = default(context, x)
|
||||
|
||||
context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context)
|
||||
k_in = self.to_k(context_k)
|
||||
v_in = self.to_v(context_v)
|
||||
|
||||
k_in *= self.scale
|
||||
|
||||
del context, x
|
||||
|
||||
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in))
|
||||
del q_in, k_in, v_in
|
||||
|
||||
r1 = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)
|
||||
|
||||
stats = torch.cuda.memory_stats(q.device)
|
||||
mem_active = stats['active_bytes.all.current']
|
||||
mem_reserved = stats['reserved_bytes.all.current']
|
||||
mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
|
||||
mem_free_torch = mem_reserved - mem_active
|
||||
mem_free_total = mem_free_cuda + mem_free_torch
|
||||
|
||||
gb = 1024 ** 3
|
||||
tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size()
|
||||
modifier = 3 if q.element_size() == 2 else 2.5
|
||||
mem_required = tensor_size * modifier
|
||||
steps = 1
|
||||
|
||||
if mem_required > mem_free_total:
|
||||
steps = 2 ** (math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
# print(f"Expected tensor size:{tensor_size/gb:0.1f}GB, cuda free:{mem_free_cuda/gb:0.1f}GB "
|
||||
# f"torch free:{mem_free_torch/gb:0.1f} total:{mem_free_total/gb:0.1f} steps:{steps}")
|
||||
|
||||
if steps > 64:
|
||||
max_res = math.floor(math.sqrt(math.sqrt(mem_free_total / 2.5)) / 8) * 64
|
||||
raise RuntimeError(f'Not enough memory, use lower resolution (max approx. {max_res}x{max_res}). '
|
||||
f'Need: {mem_required / 64 / gb:0.1f}GB free, Have:{mem_free_total / gb:0.1f}GB free')
|
||||
|
||||
slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
|
||||
for i in range(0, q.shape[1], slice_size):
|
||||
end = i + slice_size
|
||||
s1 = einsum('b i d, b j d -> b i j', q[:, i:end], k)
|
||||
|
||||
s2 = s1.softmax(dim=-1, dtype=q.dtype)
|
||||
del s1
|
||||
|
||||
r1[:, i:end] = einsum('b i j, b j d -> b i d', s2, v)
|
||||
del s2
|
||||
|
||||
del q, k, v
|
||||
|
||||
r2 = rearrange(r1, '(b h) n d -> b n (h d)', h=h)
|
||||
del r1
|
||||
|
||||
return self.to_out(r2)
|
||||
|
||||
|
||||
def check_for_psutil():
|
||||
try:
|
||||
spec = importlib.util.find_spec('psutil')
|
||||
return spec is not None
|
||||
except ModuleNotFoundError:
|
||||
return False
|
||||
|
||||
invokeAI_mps_available = check_for_psutil()
|
||||
|
||||
# -- Taken from https://github.com/invoke-ai/InvokeAI --
|
||||
if invokeAI_mps_available:
|
||||
import psutil
|
||||
mem_total_gb = psutil.virtual_memory().total // (1 << 30)
|
||||
|
||||
def einsum_op_compvis(q, k, v):
|
||||
s = einsum('b i d, b j d -> b i j', q, k)
|
||||
s = s.softmax(dim=-1, dtype=s.dtype)
|
||||
return einsum('b i j, b j d -> b i d', s, v)
|
||||
|
||||
def einsum_op_slice_0(q, k, v, slice_size):
|
||||
r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)
|
||||
for i in range(0, q.shape[0], slice_size):
|
||||
end = i + slice_size
|
||||
r[i:end] = einsum_op_compvis(q[i:end], k[i:end], v[i:end])
|
||||
return r
|
||||
|
||||
def einsum_op_slice_1(q, k, v, slice_size):
|
||||
r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype)
|
||||
for i in range(0, q.shape[1], slice_size):
|
||||
end = i + slice_size
|
||||
r[:, i:end] = einsum_op_compvis(q[:, i:end], k, v)
|
||||
return r
|
||||
|
||||
def einsum_op_mps_v1(q, k, v):
|
||||
if q.shape[1] <= 4096: # (512x512) max q.shape[1]: 4096
|
||||
return einsum_op_compvis(q, k, v)
|
||||
else:
|
||||
slice_size = math.floor(2**30 / (q.shape[0] * q.shape[1]))
|
||||
return einsum_op_slice_1(q, k, v, slice_size)
|
||||
|
||||
def einsum_op_mps_v2(q, k, v):
|
||||
if mem_total_gb > 8 and q.shape[1] <= 4096:
|
||||
return einsum_op_compvis(q, k, v)
|
||||
else:
|
||||
return einsum_op_slice_0(q, k, v, 1)
|
||||
|
||||
def einsum_op_tensor_mem(q, k, v, max_tensor_mb):
|
||||
size_mb = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size() // (1 << 20)
|
||||
if size_mb <= max_tensor_mb:
|
||||
return einsum_op_compvis(q, k, v)
|
||||
div = 1 << int((size_mb - 1) / max_tensor_mb).bit_length()
|
||||
if div <= q.shape[0]:
|
||||
return einsum_op_slice_0(q, k, v, q.shape[0] // div)
|
||||
return einsum_op_slice_1(q, k, v, max(q.shape[1] // div, 1))
|
||||
|
||||
def einsum_op_cuda(q, k, v):
|
||||
stats = torch.cuda.memory_stats(q.device)
|
||||
mem_active = stats['active_bytes.all.current']
|
||||
mem_reserved = stats['reserved_bytes.all.current']
|
||||
mem_free_cuda, _ = torch.cuda.mem_get_info(q.device)
|
||||
mem_free_torch = mem_reserved - mem_active
|
||||
mem_free_total = mem_free_cuda + mem_free_torch
|
||||
# Divide factor of safety as there's copying and fragmentation
|
||||
return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
|
||||
|
||||
def einsum_op(q, k, v):
|
||||
if q.device.type == 'cuda':
|
||||
return einsum_op_cuda(q, k, v)
|
||||
|
||||
if q.device.type == 'mps':
|
||||
if mem_total_gb >= 32:
|
||||
return einsum_op_mps_v1(q, k, v)
|
||||
return einsum_op_mps_v2(q, k, v)
|
||||
|
||||
# Smaller slices are faster due to L2/L3/SLC caches.
|
||||
# Tested on i7 with 8MB L3 cache.
|
||||
return einsum_op_tensor_mem(q, k, v, 32)
|
||||
|
||||
def split_cross_attention_forward_invokeAI(self, x, context=None, mask=None):
|
||||
h = self.heads
|
||||
|
||||
q = self.to_q(x)
|
||||
context = default(context, x)
|
||||
|
||||
context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context)
|
||||
k = self.to_k(context_k) * self.scale
|
||||
v = self.to_v(context_v)
|
||||
del context, context_k, context_v, x
|
||||
|
||||
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))
|
||||
r = einsum_op(q, k, v)
|
||||
return self.to_out(rearrange(r, '(b h) n d -> b n (h d)', h=h))
|
||||
|
||||
# -- End of code from https://github.com/invoke-ai/InvokeAI --
|
||||
|
||||
def xformers_attention_forward(self, x, context=None, mask=None):
|
||||
h = self.heads
|
||||
q_in = self.to_q(x)
|
||||
context = default(context, x)
|
||||
|
||||
context_k, context_v = hypernetwork.apply_hypernetwork(shared.loaded_hypernetwork, context)
|
||||
k_in = self.to_k(context_k)
|
||||
v_in = self.to_v(context_v)
|
||||
|
||||
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b n h d', h=h), (q_in, k_in, v_in))
|
||||
del q_in, k_in, v_in
|
||||
out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None)
|
||||
|
||||
out = rearrange(out, 'b n h d -> b n (h d)', h=h)
|
||||
return self.to_out(out)
|
||||
|
||||
def cross_attention_attnblock_forward(self, x):
|
||||
h_ = x
|
||||
h_ = self.norm(h_)
|
||||
q1 = self.q(h_)
|
||||
k1 = self.k(h_)
|
||||
v = self.v(h_)
|
||||
|
||||
# compute attention
|
||||
b, c, h, w = q1.shape
|
||||
|
||||
q2 = q1.reshape(b, c, h*w)
|
||||
del q1
|
||||
|
||||
q = q2.permute(0, 2, 1) # b,hw,c
|
||||
del q2
|
||||
|
||||
k = k1.reshape(b, c, h*w) # b,c,hw
|
||||
del k1
|
||||
|
||||
h_ = torch.zeros_like(k, device=q.device)
|
||||
|
||||
stats = torch.cuda.memory_stats(q.device)
|
||||
mem_active = stats['active_bytes.all.current']
|
||||
mem_reserved = stats['reserved_bytes.all.current']
|
||||
mem_free_cuda, _ = torch.cuda.mem_get_info(torch.cuda.current_device())
|
||||
mem_free_torch = mem_reserved - mem_active
|
||||
mem_free_total = mem_free_cuda + mem_free_torch
|
||||
|
||||
tensor_size = q.shape[0] * q.shape[1] * k.shape[2] * q.element_size()
|
||||
mem_required = tensor_size * 2.5
|
||||
steps = 1
|
||||
|
||||
if mem_required > mem_free_total:
|
||||
steps = 2**(math.ceil(math.log(mem_required / mem_free_total, 2)))
|
||||
|
||||
slice_size = q.shape[1] // steps if (q.shape[1] % steps) == 0 else q.shape[1]
|
||||
for i in range(0, q.shape[1], slice_size):
|
||||
end = i + slice_size
|
||||
|
||||
w1 = torch.bmm(q[:, i:end], k) # b,hw,hw w[b,i,j]=sum_c q[b,i,c]k[b,c,j]
|
||||
w2 = w1 * (int(c)**(-0.5))
|
||||
del w1
|
||||
w3 = torch.nn.functional.softmax(w2, dim=2, dtype=q.dtype)
|
||||
del w2
|
||||
|
||||
# attend to values
|
||||
v1 = v.reshape(b, c, h*w)
|
||||
w4 = w3.permute(0, 2, 1) # b,hw,hw (first hw of k, second of q)
|
||||
del w3
|
||||
|
||||
h_[:, :, i:end] = torch.bmm(v1, w4) # b, c,hw (hw of q) h_[b,c,j] = sum_i v[b,c,i] w_[b,i,j]
|
||||
del v1, w4
|
||||
|
||||
h2 = h_.reshape(b, c, h, w)
|
||||
del h_
|
||||
|
||||
h3 = self.proj_out(h2)
|
||||
del h2
|
||||
|
||||
h3 += x
|
||||
|
||||
return h3
|
||||
|
||||
def xformers_attnblock_forward(self, x):
|
||||
try:
|
||||
h_ = x
|
||||
h_ = self.norm(h_)
|
||||
q1 = self.q(h_).contiguous()
|
||||
k1 = self.k(h_).contiguous()
|
||||
v = self.v(h_).contiguous()
|
||||
out = xformers.ops.memory_efficient_attention(q1, k1, v)
|
||||
out = self.proj_out(out)
|
||||
return x + out
|
||||
except NotImplementedError:
|
||||
return cross_attention_attnblock_forward(self, x)
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,121 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import PIL
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.utils.data import Dataset
|
||||
from torchvision import transforms
|
||||
|
||||
import random
|
||||
import tqdm
|
||||
from modules import devices, shared
|
||||
import re
|
||||
|
||||
re_numbers_at_start = re.compile(r"^[-\d]+\s*")
|
||||
|
||||
|
||||
class DatasetEntry:
|
||||
def __init__(self, filename=None, latent=None, filename_text=None):
|
||||
self.filename = filename
|
||||
self.latent = latent
|
||||
self.filename_text = filename_text
|
||||
self.cond = None
|
||||
self.cond_text = None
|
||||
|
||||
|
||||
class PersonalizedBase(Dataset):
|
||||
def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False, batch_size=1):
|
||||
re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex) > 0 else None
|
||||
|
||||
self.placeholder_token = placeholder_token
|
||||
|
||||
self.batch_size = batch_size
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.flip = transforms.RandomHorizontalFlip(p=flip_p)
|
||||
|
||||
self.dataset = []
|
||||
|
||||
with open(template_file, "r") as file:
|
||||
lines = [x.strip() for x in file.readlines()]
|
||||
|
||||
self.lines = lines
|
||||
|
||||
assert data_root, 'dataset directory not specified'
|
||||
|
||||
cond_model = shared.sd_model.cond_stage_model
|
||||
|
||||
self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]
|
||||
print("Preparing dataset...")
|
||||
for path in tqdm.tqdm(self.image_paths):
|
||||
try:
|
||||
image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
text_filename = os.path.splitext(path)[0] + ".txt"
|
||||
filename = os.path.basename(path)
|
||||
|
||||
if os.path.exists(text_filename):
|
||||
with open(text_filename, "r", encoding="utf8") as file:
|
||||
filename_text = file.read()
|
||||
else:
|
||||
filename_text = os.path.splitext(filename)[0]
|
||||
filename_text = re.sub(re_numbers_at_start, '', filename_text)
|
||||
if re_word:
|
||||
tokens = re_word.findall(filename_text)
|
||||
filename_text = (shared.opts.dataset_filename_join_string or "").join(tokens)
|
||||
|
||||
npimage = np.array(image).astype(np.uint8)
|
||||
npimage = (npimage / 127.5 - 1.0).astype(np.float32)
|
||||
|
||||
torchdata = torch.from_numpy(npimage).to(device=device, dtype=torch.float32)
|
||||
torchdata = torch.moveaxis(torchdata, 2, 0)
|
||||
|
||||
init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze()
|
||||
init_latent = init_latent.to(devices.cpu)
|
||||
|
||||
entry = DatasetEntry(filename=path, filename_text=filename_text, latent=init_latent)
|
||||
|
||||
if include_cond:
|
||||
entry.cond_text = self.create_text(filename_text)
|
||||
entry.cond = cond_model([entry.cond_text]).to(devices.cpu).squeeze(0)
|
||||
|
||||
self.dataset.append(entry)
|
||||
|
||||
assert len(self.dataset) > 1, "No images have been found in the dataset."
|
||||
self.length = len(self.dataset) * repeats // batch_size
|
||||
|
||||
self.initial_indexes = np.arange(len(self.dataset))
|
||||
self.indexes = None
|
||||
self.shuffle()
|
||||
|
||||
def shuffle(self):
|
||||
self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])]
|
||||
|
||||
def create_text(self, filename_text):
|
||||
text = random.choice(self.lines)
|
||||
text = text.replace("[name]", self.placeholder_token)
|
||||
text = text.replace("[filewords]", filename_text)
|
||||
return text
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def __getitem__(self, i):
|
||||
res = []
|
||||
|
||||
for j in range(self.batch_size):
|
||||
position = i * self.batch_size + j
|
||||
if position % len(self.indexes) == 0:
|
||||
self.shuffle()
|
||||
|
||||
index = self.indexes[position % len(self.indexes)]
|
||||
entry = self.dataset[index]
|
||||
|
||||
if entry.cond is None:
|
||||
entry.cond_text = self.create_text(entry.filename_text)
|
||||
|
||||
res.append(entry)
|
||||
|
||||
return res
|
||||
@ -0,0 +1,219 @@
|
||||
import base64
|
||||
import json
|
||||
import numpy as np
|
||||
import zlib
|
||||
from PIL import Image, PngImagePlugin, ImageDraw, ImageFont
|
||||
from fonts.ttf import Roboto
|
||||
import torch
|
||||
|
||||
|
||||
class EmbeddingEncoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
if isinstance(obj, torch.Tensor):
|
||||
return {'TORCHTENSOR': obj.cpu().detach().numpy().tolist()}
|
||||
return json.JSONEncoder.default(self, obj)
|
||||
|
||||
|
||||
class EmbeddingDecoder(json.JSONDecoder):
|
||||
def __init__(self, *args, **kwargs):
|
||||
json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
|
||||
|
||||
def object_hook(self, d):
|
||||
if 'TORCHTENSOR' in d:
|
||||
return torch.from_numpy(np.array(d['TORCHTENSOR']))
|
||||
return d
|
||||
|
||||
|
||||
def embedding_to_b64(data):
|
||||
d = json.dumps(data, cls=EmbeddingEncoder)
|
||||
return base64.b64encode(d.encode())
|
||||
|
||||
|
||||
def embedding_from_b64(data):
|
||||
d = base64.b64decode(data)
|
||||
return json.loads(d, cls=EmbeddingDecoder)
|
||||
|
||||
|
||||
def lcg(m=2**32, a=1664525, c=1013904223, seed=0):
|
||||
while True:
|
||||
seed = (a * seed + c) % m
|
||||
yield seed % 255
|
||||
|
||||
|
||||
def xor_block(block):
|
||||
g = lcg()
|
||||
randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape)
|
||||
return np.bitwise_xor(block.astype(np.uint8), randblock & 0x0F)
|
||||
|
||||
|
||||
def style_block(block, sequence):
|
||||
im = Image.new('RGB', (block.shape[1], block.shape[0]))
|
||||
draw = ImageDraw.Draw(im)
|
||||
i = 0
|
||||
for x in range(-6, im.size[0], 8):
|
||||
for yi, y in enumerate(range(-6, im.size[1], 8)):
|
||||
offset = 0
|
||||
if yi % 2 == 0:
|
||||
offset = 4
|
||||
shade = sequence[i % len(sequence)]
|
||||
i += 1
|
||||
draw.ellipse((x+offset, y, x+6+offset, y+6), fill=(shade, shade, shade))
|
||||
|
||||
fg = np.array(im).astype(np.uint8) & 0xF0
|
||||
|
||||
return block ^ fg
|
||||
|
||||
|
||||
def insert_image_data_embed(image, data):
|
||||
d = 3
|
||||
data_compressed = zlib.compress(json.dumps(data, cls=EmbeddingEncoder).encode(), level=9)
|
||||
data_np_ = np.frombuffer(data_compressed, np.uint8).copy()
|
||||
data_np_high = data_np_ >> 4
|
||||
data_np_low = data_np_ & 0x0F
|
||||
|
||||
h = image.size[1]
|
||||
next_size = data_np_low.shape[0] + (h-(data_np_low.shape[0] % h))
|
||||
next_size = next_size + ((h*d)-(next_size % (h*d)))
|
||||
|
||||
data_np_low.resize(next_size)
|
||||
data_np_low = data_np_low.reshape((h, -1, d))
|
||||
|
||||
data_np_high.resize(next_size)
|
||||
data_np_high = data_np_high.reshape((h, -1, d))
|
||||
|
||||
edge_style = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024]
|
||||
edge_style = (np.abs(edge_style)/np.max(np.abs(edge_style))*255).astype(np.uint8)
|
||||
|
||||
data_np_low = style_block(data_np_low, sequence=edge_style)
|
||||
data_np_low = xor_block(data_np_low)
|
||||
data_np_high = style_block(data_np_high, sequence=edge_style[::-1])
|
||||
data_np_high = xor_block(data_np_high)
|
||||
|
||||
im_low = Image.fromarray(data_np_low, mode='RGB')
|
||||
im_high = Image.fromarray(data_np_high, mode='RGB')
|
||||
|
||||
background = Image.new('RGB', (image.size[0]+im_low.size[0]+im_high.size[0]+2, image.size[1]), (0, 0, 0))
|
||||
background.paste(im_low, (0, 0))
|
||||
background.paste(image, (im_low.size[0]+1, 0))
|
||||
background.paste(im_high, (im_low.size[0]+1+image.size[0]+1, 0))
|
||||
|
||||
return background
|
||||
|
||||
|
||||
def crop_black(img, tol=0):
|
||||
mask = (img > tol).all(2)
|
||||
mask0, mask1 = mask.any(0), mask.any(1)
|
||||
col_start, col_end = mask0.argmax(), mask.shape[1]-mask0[::-1].argmax()
|
||||
row_start, row_end = mask1.argmax(), mask.shape[0]-mask1[::-1].argmax()
|
||||
return img[row_start:row_end, col_start:col_end]
|
||||
|
||||
|
||||
def extract_image_data_embed(image):
|
||||
d = 3
|
||||
outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1], image.size[0], d).astype(np.uint8)) & 0x0F
|
||||
black_cols = np.where(np.sum(outarr, axis=(0, 2)) == 0)
|
||||
if black_cols[0].shape[0] < 2:
|
||||
print('No Image data blocks found.')
|
||||
return None
|
||||
|
||||
data_block_lower = outarr[:, :black_cols[0].min(), :].astype(np.uint8)
|
||||
data_block_upper = outarr[:, black_cols[0].max()+1:, :].astype(np.uint8)
|
||||
|
||||
data_block_lower = xor_block(data_block_lower)
|
||||
data_block_upper = xor_block(data_block_upper)
|
||||
|
||||
data_block = (data_block_upper << 4) | (data_block_lower)
|
||||
data_block = data_block.flatten().tobytes()
|
||||
|
||||
data = zlib.decompress(data_block)
|
||||
return json.loads(data, cls=EmbeddingDecoder)
|
||||
|
||||
|
||||
def caption_image_overlay(srcimage, title, footerLeft, footerMid, footerRight, textfont=None):
|
||||
from math import cos
|
||||
|
||||
image = srcimage.copy()
|
||||
|
||||
if textfont is None:
|
||||
try:
|
||||
textfont = ImageFont.truetype(opts.font or Roboto, fontsize)
|
||||
textfont = opts.font or Roboto
|
||||
except Exception:
|
||||
textfont = Roboto
|
||||
|
||||
factor = 1.5
|
||||
gradient = Image.new('RGBA', (1, image.size[1]), color=(0, 0, 0, 0))
|
||||
for y in range(image.size[1]):
|
||||
mag = 1-cos(y/image.size[1]*factor)
|
||||
mag = max(mag, 1-cos((image.size[1]-y)/image.size[1]*factor*1.1))
|
||||
gradient.putpixel((0, y), (0, 0, 0, int(mag*255)))
|
||||
image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size))
|
||||
|
||||
draw = ImageDraw.Draw(image)
|
||||
fontsize = 32
|
||||
font = ImageFont.truetype(textfont, fontsize)
|
||||
padding = 10
|
||||
|
||||
_, _, w, h = draw.textbbox((0, 0), title, font=font)
|
||||
fontsize = min(int(fontsize * (((image.size[0]*0.75)-(padding*4))/w)), 72)
|
||||
font = ImageFont.truetype(textfont, fontsize)
|
||||
_, _, w, h = draw.textbbox((0, 0), title, font=font)
|
||||
draw.text((padding, padding), title, anchor='lt', font=font, fill=(255, 255, 255, 230))
|
||||
|
||||
_, _, w, h = draw.textbbox((0, 0), footerLeft, font=font)
|
||||
fontsize_left = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72)
|
||||
_, _, w, h = draw.textbbox((0, 0), footerMid, font=font)
|
||||
fontsize_mid = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72)
|
||||
_, _, w, h = draw.textbbox((0, 0), footerRight, font=font)
|
||||
fontsize_right = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72)
|
||||
|
||||
font = ImageFont.truetype(textfont, min(fontsize_left, fontsize_mid, fontsize_right))
|
||||
|
||||
draw.text((padding, image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255, 255, 255, 230))
|
||||
draw.text((image.size[0]/2, image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255, 255, 255, 230))
|
||||
draw.text((image.size[0]-padding, image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255, 255, 255, 230))
|
||||
|
||||
return image
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
testEmbed = Image.open('test_embedding.png')
|
||||
data = extract_image_data_embed(testEmbed)
|
||||
assert data is not None
|
||||
|
||||
data = embedding_from_b64(testEmbed.text['sd-ti-embedding'])
|
||||
assert data is not None
|
||||
|
||||
image = Image.new('RGBA', (512, 512), (255, 255, 200, 255))
|
||||
cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight')
|
||||
|
||||
test_embed = {'string_to_param': {'*': torch.from_numpy(np.random.random((2, 4096)))}}
|
||||
|
||||
embedded_image = insert_image_data_embed(cap_image, test_embed)
|
||||
|
||||
retrived_embed = extract_image_data_embed(embedded_image)
|
||||
|
||||
assert str(retrived_embed) == str(test_embed)
|
||||
|
||||
embedded_image2 = insert_image_data_embed(cap_image, retrived_embed)
|
||||
|
||||
assert embedded_image == embedded_image2
|
||||
|
||||
g = lcg()
|
||||
shared_random = np.array([next(g) for _ in range(100)]).astype(np.uint8).tolist()
|
||||
|
||||
reference_random = [253, 242, 127, 44, 157, 27, 239, 133, 38, 79, 167, 4, 177,
|
||||
95, 130, 79, 78, 14, 52, 215, 220, 194, 126, 28, 240, 179,
|
||||
160, 153, 149, 50, 105, 14, 21, 218, 199, 18, 54, 198, 193,
|
||||
38, 128, 19, 53, 195, 124, 75, 205, 12, 6, 145, 0, 28,
|
||||
30, 148, 8, 45, 218, 171, 55, 249, 97, 166, 12, 35, 0,
|
||||
41, 221, 122, 215, 170, 31, 113, 186, 97, 119, 31, 23, 185,
|
||||
66, 140, 30, 41, 37, 63, 137, 109, 216, 55, 159, 145, 82,
|
||||
204, 86, 73, 222, 44, 198, 118, 240, 97]
|
||||
|
||||
assert shared_random == reference_random
|
||||
|
||||
hunna_kay_random_sum = sum(np.array([next(g) for _ in range(100000)]).astype(np.uint8).tolist())
|
||||
|
||||
assert 12731374 == hunna_kay_random_sum
|
||||
@ -0,0 +1,69 @@
|
||||
import tqdm
|
||||
|
||||
|
||||
class LearnScheduleIterator:
|
||||
def __init__(self, learn_rate, max_steps, cur_step=0):
|
||||
"""
|
||||
specify learn_rate as "0.001:100, 0.00001:1000, 1e-5:10000" to have lr of 0.001 until step 100, 0.00001 until 1000, 1e-5:10000 until 10000
|
||||
"""
|
||||
|
||||
pairs = learn_rate.split(',')
|
||||
self.rates = []
|
||||
self.it = 0
|
||||
self.maxit = 0
|
||||
for i, pair in enumerate(pairs):
|
||||
tmp = pair.split(':')
|
||||
if len(tmp) == 2:
|
||||
step = int(tmp[1])
|
||||
if step > cur_step:
|
||||
self.rates.append((float(tmp[0]), min(step, max_steps)))
|
||||
self.maxit += 1
|
||||
if step > max_steps:
|
||||
return
|
||||
elif step == -1:
|
||||
self.rates.append((float(tmp[0]), max_steps))
|
||||
self.maxit += 1
|
||||
return
|
||||
else:
|
||||
self.rates.append((float(tmp[0]), max_steps))
|
||||
self.maxit += 1
|
||||
return
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self.it < self.maxit:
|
||||
self.it += 1
|
||||
return self.rates[self.it - 1]
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
|
||||
class LearnRateScheduler:
|
||||
def __init__(self, learn_rate, max_steps, cur_step=0, verbose=True):
|
||||
self.schedules = LearnScheduleIterator(learn_rate, max_steps, cur_step)
|
||||
(self.learn_rate, self.end_step) = next(self.schedules)
|
||||
self.verbose = verbose
|
||||
|
||||
if self.verbose:
|
||||
print(f'Training at rate of {self.learn_rate} until step {self.end_step}')
|
||||
|
||||
self.finished = False
|
||||
|
||||
def apply(self, optimizer, step_number):
|
||||
if step_number <= self.end_step:
|
||||
return
|
||||
|
||||
try:
|
||||
(self.learn_rate, self.end_step) = next(self.schedules)
|
||||
except Exception:
|
||||
self.finished = True
|
||||
return
|
||||
|
||||
if self.verbose:
|
||||
tqdm.tqdm.write(f'Training at rate of {self.learn_rate} until step {self.end_step}')
|
||||
|
||||
for pg in optimizer.param_groups:
|
||||
pg['lr'] = self.learn_rate
|
||||
|
||||
@ -0,0 +1,116 @@
|
||||
import os
|
||||
from PIL import Image, ImageOps
|
||||
import platform
|
||||
import sys
|
||||
import tqdm
|
||||
import time
|
||||
|
||||
from modules import shared, images
|
||||
from modules.shared import opts, cmd_opts
|
||||
if cmd_opts.deepdanbooru:
|
||||
import modules.deepbooru as deepbooru
|
||||
|
||||
|
||||
def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False):
|
||||
try:
|
||||
if process_caption:
|
||||
shared.interrogator.load()
|
||||
|
||||
if process_caption_deepbooru:
|
||||
db_opts = deepbooru.create_deepbooru_opts()
|
||||
db_opts[deepbooru.OPT_INCLUDE_RANKS] = False
|
||||
deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts)
|
||||
|
||||
preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru)
|
||||
|
||||
finally:
|
||||
|
||||
if process_caption:
|
||||
shared.interrogator.send_blip_to_ram()
|
||||
|
||||
if process_caption_deepbooru:
|
||||
deepbooru.release_process()
|
||||
|
||||
|
||||
|
||||
def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False):
|
||||
width = process_width
|
||||
height = process_height
|
||||
src = os.path.abspath(process_src)
|
||||
dst = os.path.abspath(process_dst)
|
||||
|
||||
assert src != dst, 'same directory specified as source and destination'
|
||||
|
||||
os.makedirs(dst, exist_ok=True)
|
||||
|
||||
files = os.listdir(src)
|
||||
|
||||
shared.state.textinfo = "Preprocessing..."
|
||||
shared.state.job_count = len(files)
|
||||
|
||||
def save_pic_with_caption(image, index):
|
||||
caption = ""
|
||||
|
||||
if process_caption:
|
||||
caption += shared.interrogator.generate_caption(image)
|
||||
|
||||
if process_caption_deepbooru:
|
||||
if len(caption) > 0:
|
||||
caption += ", "
|
||||
caption += deepbooru.get_tags_from_process(image)
|
||||
|
||||
filename_part = filename
|
||||
filename_part = os.path.splitext(filename_part)[0]
|
||||
filename_part = os.path.basename(filename_part)
|
||||
|
||||
basename = f"{index:05}-{subindex[0]}-{filename_part}"
|
||||
image.save(os.path.join(dst, f"{basename}.png"))
|
||||
|
||||
if len(caption) > 0:
|
||||
with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file:
|
||||
file.write(caption)
|
||||
|
||||
subindex[0] += 1
|
||||
|
||||
def save_pic(image, index):
|
||||
save_pic_with_caption(image, index)
|
||||
|
||||
if process_flip:
|
||||
save_pic_with_caption(ImageOps.mirror(image), index)
|
||||
|
||||
for index, imagefile in enumerate(tqdm.tqdm(files)):
|
||||
subindex = [0]
|
||||
filename = os.path.join(src, imagefile)
|
||||
try:
|
||||
img = Image.open(filename).convert("RGB")
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if shared.state.interrupted:
|
||||
break
|
||||
|
||||
ratio = img.height / img.width
|
||||
is_tall = ratio > 1.35
|
||||
is_wide = ratio < 1 / 1.35
|
||||
|
||||
if process_split and is_tall:
|
||||
img = img.resize((width, height * img.height // img.width))
|
||||
|
||||
top = img.crop((0, 0, width, height))
|
||||
save_pic(top, index)
|
||||
|
||||
bot = img.crop((0, img.height - height, width, img.height))
|
||||
save_pic(bot, index)
|
||||
elif process_split and is_wide:
|
||||
img = img.resize((width * img.width // img.height, height))
|
||||
|
||||
left = img.crop((0, 0, width, height))
|
||||
save_pic(left, index)
|
||||
|
||||
right = img.crop((img.width - width, 0, img.width, height))
|
||||
save_pic(right, index)
|
||||
else:
|
||||
img = images.resize_image(1, img, width, height)
|
||||
save_pic(img, index)
|
||||
|
||||
shared.state.nextjob()
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 478 KiB |
@ -0,0 +1,363 @@
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import torch
|
||||
import tqdm
|
||||
import html
|
||||
import datetime
|
||||
import csv
|
||||
|
||||
from PIL import Image, PngImagePlugin
|
||||
|
||||
from modules import shared, devices, sd_hijack, processing, sd_models
|
||||
import modules.textual_inversion.dataset
|
||||
from modules.textual_inversion.learn_schedule import LearnRateScheduler
|
||||
|
||||
from modules.textual_inversion.image_embedding import (embedding_to_b64, embedding_from_b64,
|
||||
insert_image_data_embed, extract_image_data_embed,
|
||||
caption_image_overlay)
|
||||
|
||||
class Embedding:
|
||||
def __init__(self, vec, name, step=None):
|
||||
self.vec = vec
|
||||
self.name = name
|
||||
self.step = step
|
||||
self.cached_checksum = None
|
||||
self.sd_checkpoint = None
|
||||
self.sd_checkpoint_name = None
|
||||
|
||||
def save(self, filename):
|
||||
embedding_data = {
|
||||
"string_to_token": {"*": 265},
|
||||
"string_to_param": {"*": self.vec},
|
||||
"name": self.name,
|
||||
"step": self.step,
|
||||
"sd_checkpoint": self.sd_checkpoint,
|
||||
"sd_checkpoint_name": self.sd_checkpoint_name,
|
||||
}
|
||||
|
||||
torch.save(embedding_data, filename)
|
||||
|
||||
def checksum(self):
|
||||
if self.cached_checksum is not None:
|
||||
return self.cached_checksum
|
||||
|
||||
def const_hash(a):
|
||||
r = 0
|
||||
for v in a:
|
||||
r = (r * 281 ^ int(v) * 997) & 0xFFFFFFFF
|
||||
return r
|
||||
|
||||
self.cached_checksum = f'{const_hash(self.vec.reshape(-1) * 100) & 0xffff:04x}'
|
||||
return self.cached_checksum
|
||||
|
||||
|
||||
class EmbeddingDatabase:
|
||||
def __init__(self, embeddings_dir):
|
||||
self.ids_lookup = {}
|
||||
self.word_embeddings = {}
|
||||
self.dir_mtime = None
|
||||
self.embeddings_dir = embeddings_dir
|
||||
|
||||
def register_embedding(self, embedding, model):
|
||||
|
||||
self.word_embeddings[embedding.name] = embedding
|
||||
|
||||
ids = model.cond_stage_model.tokenizer([embedding.name], add_special_tokens=False)['input_ids'][0]
|
||||
|
||||
first_id = ids[0]
|
||||
if first_id not in self.ids_lookup:
|
||||
self.ids_lookup[first_id] = []
|
||||
|
||||
self.ids_lookup[first_id] = sorted(self.ids_lookup[first_id] + [(ids, embedding)], key=lambda x: len(x[0]), reverse=True)
|
||||
|
||||
return embedding
|
||||
|
||||
def load_textual_inversion_embeddings(self):
|
||||
mt = os.path.getmtime(self.embeddings_dir)
|
||||
if self.dir_mtime is not None and mt <= self.dir_mtime:
|
||||
return
|
||||
|
||||
self.dir_mtime = mt
|
||||
self.ids_lookup.clear()
|
||||
self.word_embeddings.clear()
|
||||
|
||||
def process_file(path, filename):
|
||||
name = os.path.splitext(filename)[0]
|
||||
|
||||
data = []
|
||||
|
||||
if filename.upper().endswith('.PNG'):
|
||||
embed_image = Image.open(path)
|
||||
if 'sd-ti-embedding' in embed_image.text:
|
||||
data = embedding_from_b64(embed_image.text['sd-ti-embedding'])
|
||||
name = data.get('name', name)
|
||||
else:
|
||||
data = extract_image_data_embed(embed_image)
|
||||
name = data.get('name', name)
|
||||
else:
|
||||
data = torch.load(path, map_location="cpu")
|
||||
|
||||
# textual inversion embeddings
|
||||
if 'string_to_param' in data:
|
||||
param_dict = data['string_to_param']
|
||||
if hasattr(param_dict, '_parameters'):
|
||||
param_dict = getattr(param_dict, '_parameters') # fix for torch 1.12.1 loading saved file from torch 1.11
|
||||
assert len(param_dict) == 1, 'embedding file has multiple terms in it'
|
||||
emb = next(iter(param_dict.items()))[1]
|
||||
# diffuser concepts
|
||||
elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor:
|
||||
assert len(data.keys()) == 1, 'embedding file has multiple terms in it'
|
||||
|
||||
emb = next(iter(data.values()))
|
||||
if len(emb.shape) == 1:
|
||||
emb = emb.unsqueeze(0)
|
||||
else:
|
||||
raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.")
|
||||
|
||||
vec = emb.detach().to(devices.device, dtype=torch.float32)
|
||||
embedding = Embedding(vec, name)
|
||||
embedding.step = data.get('step', None)
|
||||
embedding.sd_checkpoint = data.get('hash', None)
|
||||
embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None)
|
||||
self.register_embedding(embedding, shared.sd_model)
|
||||
|
||||
for fn in os.listdir(self.embeddings_dir):
|
||||
try:
|
||||
fullfn = os.path.join(self.embeddings_dir, fn)
|
||||
|
||||
if os.stat(fullfn).st_size == 0:
|
||||
continue
|
||||
|
||||
process_file(fullfn, fn)
|
||||
except Exception:
|
||||
print(f"Error loading emedding {fn}:", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
continue
|
||||
|
||||
print(f"Loaded a total of {len(self.word_embeddings)} textual inversion embeddings.")
|
||||
|
||||
def find_embedding_at_position(self, tokens, offset):
|
||||
token = tokens[offset]
|
||||
possible_matches = self.ids_lookup.get(token, None)
|
||||
|
||||
if possible_matches is None:
|
||||
return None, None
|
||||
|
||||
for ids, embedding in possible_matches:
|
||||
if tokens[offset:offset + len(ids)] == ids:
|
||||
return embedding, len(ids)
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def create_embedding(name, num_vectors_per_token, init_text='*'):
|
||||
cond_model = shared.sd_model.cond_stage_model
|
||||
embedding_layer = cond_model.wrapped.transformer.text_model.embeddings
|
||||
|
||||
ids = cond_model.tokenizer(init_text, max_length=num_vectors_per_token, return_tensors="pt", add_special_tokens=False)["input_ids"]
|
||||
embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
|
||||
vec = torch.zeros((num_vectors_per_token, embedded.shape[1]), device=devices.device)
|
||||
|
||||
for i in range(num_vectors_per_token):
|
||||
vec[i] = embedded[i * int(embedded.shape[0]) // num_vectors_per_token]
|
||||
|
||||
fn = os.path.join(shared.cmd_opts.embeddings_dir, f"{name}.pt")
|
||||
assert not os.path.exists(fn), f"file {fn} already exists"
|
||||
|
||||
embedding = Embedding(vec, name)
|
||||
embedding.step = 0
|
||||
embedding.save(fn)
|
||||
|
||||
return fn
|
||||
|
||||
|
||||
def write_loss(log_directory, filename, step, epoch_len, values):
|
||||
if shared.opts.training_write_csv_every == 0:
|
||||
return
|
||||
|
||||
if step % shared.opts.training_write_csv_every != 0:
|
||||
return
|
||||
|
||||
write_csv_header = False if os.path.exists(os.path.join(log_directory, filename)) else True
|
||||
|
||||
with open(os.path.join(log_directory, filename), "a+", newline='') as fout:
|
||||
csv_writer = csv.DictWriter(fout, fieldnames=["step", "epoch", "epoch_step", *(values.keys())])
|
||||
|
||||
if write_csv_header:
|
||||
csv_writer.writeheader()
|
||||
|
||||
epoch = step // epoch_len
|
||||
epoch_step = step - epoch * epoch_len
|
||||
|
||||
csv_writer.writerow({
|
||||
"step": step + 1,
|
||||
"epoch": epoch + 1,
|
||||
"epoch_step": epoch_step + 1,
|
||||
**values,
|
||||
})
|
||||
|
||||
|
||||
def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
|
||||
assert embedding_name, 'embedding not selected'
|
||||
|
||||
shared.state.textinfo = "Initializing textual inversion training..."
|
||||
shared.state.job_count = steps
|
||||
|
||||
filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt')
|
||||
|
||||
log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), embedding_name)
|
||||
|
||||
if save_embedding_every > 0:
|
||||
embedding_dir = os.path.join(log_directory, "embeddings")
|
||||
os.makedirs(embedding_dir, exist_ok=True)
|
||||
else:
|
||||
embedding_dir = None
|
||||
|
||||
if create_image_every > 0:
|
||||
images_dir = os.path.join(log_directory, "images")
|
||||
os.makedirs(images_dir, exist_ok=True)
|
||||
else:
|
||||
images_dir = None
|
||||
|
||||
if create_image_every > 0 and save_image_with_stored_embedding:
|
||||
images_embeds_dir = os.path.join(log_directory, "image_embeddings")
|
||||
os.makedirs(images_embeds_dir, exist_ok=True)
|
||||
else:
|
||||
images_embeds_dir = None
|
||||
|
||||
cond_model = shared.sd_model.cond_stage_model
|
||||
|
||||
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
|
||||
with torch.autocast("cuda"):
|
||||
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file, batch_size=batch_size)
|
||||
|
||||
hijack = sd_hijack.model_hijack
|
||||
|
||||
embedding = hijack.embedding_db.word_embeddings[embedding_name]
|
||||
embedding.vec.requires_grad = True
|
||||
|
||||
losses = torch.zeros((32,))
|
||||
|
||||
last_saved_file = "<none>"
|
||||
last_saved_image = "<none>"
|
||||
|
||||
ititial_step = embedding.step or 0
|
||||
if ititial_step > steps:
|
||||
return embedding, filename
|
||||
|
||||
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
|
||||
optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate)
|
||||
|
||||
pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step)
|
||||
for i, entries in pbar:
|
||||
embedding.step = i + ititial_step
|
||||
|
||||
scheduler.apply(optimizer, embedding.step)
|
||||
if scheduler.finished:
|
||||
break
|
||||
|
||||
if shared.state.interrupted:
|
||||
break
|
||||
|
||||
with torch.autocast("cuda"):
|
||||
c = cond_model([entry.cond_text for entry in entries])
|
||||
x = torch.stack([entry.latent for entry in entries]).to(devices.device)
|
||||
loss = shared.sd_model(x, c)[0]
|
||||
del x
|
||||
|
||||
losses[embedding.step % losses.shape[0]] = loss.item()
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
epoch_num = embedding.step // len(ds)
|
||||
epoch_step = embedding.step - (epoch_num * len(ds)) + 1
|
||||
|
||||
pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{len(ds)}]loss: {losses.mean():.7f}")
|
||||
|
||||
if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0:
|
||||
last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt')
|
||||
embedding.save(last_saved_file)
|
||||
|
||||
write_loss(log_directory, "textual_inversion_loss.csv", embedding.step, len(ds), {
|
||||
"loss": f"{losses.mean():.7f}",
|
||||
"learn_rate": scheduler.learn_rate
|
||||
})
|
||||
|
||||
if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0:
|
||||
last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png')
|
||||
|
||||
p = processing.StableDiffusionProcessingTxt2Img(
|
||||
sd_model=shared.sd_model,
|
||||
do_not_save_grid=True,
|
||||
do_not_save_samples=True,
|
||||
)
|
||||
|
||||
if preview_from_txt2img:
|
||||
p.prompt = preview_prompt
|
||||
p.negative_prompt = preview_negative_prompt
|
||||
p.steps = preview_steps
|
||||
p.sampler_index = preview_sampler_index
|
||||
p.cfg_scale = preview_cfg_scale
|
||||
p.seed = preview_seed
|
||||
p.width = preview_width
|
||||
p.height = preview_height
|
||||
else:
|
||||
p.prompt = entries[0].cond_text
|
||||
p.steps = 20
|
||||
p.width = training_width
|
||||
p.height = training_height
|
||||
|
||||
preview_text = p.prompt
|
||||
|
||||
processed = processing.process_images(p)
|
||||
image = processed.images[0]
|
||||
|
||||
shared.state.current_image = image
|
||||
|
||||
if save_image_with_stored_embedding and os.path.exists(last_saved_file):
|
||||
|
||||
last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{embedding.step}.png')
|
||||
|
||||
info = PngImagePlugin.PngInfo()
|
||||
data = torch.load(last_saved_file)
|
||||
info.add_text("sd-ti-embedding", embedding_to_b64(data))
|
||||
|
||||
title = "<{}>".format(data.get('name', '???'))
|
||||
checkpoint = sd_models.select_checkpoint()
|
||||
footer_left = checkpoint.model_name
|
||||
footer_mid = '[{}]'.format(checkpoint.hash)
|
||||
footer_right = '{}'.format(embedding.step)
|
||||
|
||||
captioned_image = caption_image_overlay(image, title, footer_left, footer_mid, footer_right)
|
||||
captioned_image = insert_image_data_embed(captioned_image, data)
|
||||
|
||||
captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info)
|
||||
|
||||
image.save(last_saved_image)
|
||||
|
||||
last_saved_image += f", prompt: {preview_text}"
|
||||
|
||||
shared.state.job_no = embedding.step
|
||||
|
||||
shared.state.textinfo = f"""
|
||||
<p>
|
||||
Loss: {losses.mean():.7f}<br/>
|
||||
Step: {embedding.step}<br/>
|
||||
Last prompt: {html.escape(entries[0].cond_text)}<br/>
|
||||
Last saved embedding: {html.escape(last_saved_file)}<br/>
|
||||
Last saved image: {html.escape(last_saved_image)}<br/>
|
||||
</p>
|
||||
"""
|
||||
|
||||
checkpoint = sd_models.select_checkpoint()
|
||||
|
||||
embedding.sd_checkpoint = checkpoint.hash
|
||||
embedding.sd_checkpoint_name = checkpoint.model_name
|
||||
embedding.cached_checksum = None
|
||||
embedding.save(filename)
|
||||
|
||||
return embedding, filename
|
||||
@ -0,0 +1,42 @@
|
||||
import html
|
||||
|
||||
import gradio as gr
|
||||
|
||||
import modules.textual_inversion.textual_inversion
|
||||
import modules.textual_inversion.preprocess
|
||||
from modules import sd_hijack, shared
|
||||
|
||||
|
||||
def create_embedding(name, initialization_text, nvpt):
|
||||
filename = modules.textual_inversion.textual_inversion.create_embedding(name, nvpt, init_text=initialization_text)
|
||||
|
||||
sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings()
|
||||
|
||||
return gr.Dropdown.update(choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())), f"Created: {filename}", ""
|
||||
|
||||
|
||||
def preprocess(*args):
|
||||
modules.textual_inversion.preprocess.preprocess(*args)
|
||||
|
||||
return "Preprocessing finished.", ""
|
||||
|
||||
|
||||
def train_embedding(*args):
|
||||
|
||||
assert not shared.cmd_opts.lowvram, 'Training models with lowvram not possible'
|
||||
|
||||
try:
|
||||
sd_hijack.undo_optimizations()
|
||||
|
||||
embedding, filename = modules.textual_inversion.textual_inversion.train_embedding(*args)
|
||||
|
||||
res = f"""
|
||||
Training {'interrupted' if shared.state.interrupted else 'finished'} at {embedding.step} steps.
|
||||
Embedding saved to {html.escape(filename)}
|
||||
"""
|
||||
return res, ""
|
||||
except Exception:
|
||||
raise
|
||||
finally:
|
||||
sd_hijack.apply_optimizations()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,27 @@
|
||||
a photo of a [filewords]
|
||||
a rendering of a [filewords]
|
||||
a cropped photo of the [filewords]
|
||||
the photo of a [filewords]
|
||||
a photo of a clean [filewords]
|
||||
a photo of a dirty [filewords]
|
||||
a dark photo of the [filewords]
|
||||
a photo of my [filewords]
|
||||
a photo of the cool [filewords]
|
||||
a close-up photo of a [filewords]
|
||||
a bright photo of the [filewords]
|
||||
a cropped photo of a [filewords]
|
||||
a photo of the [filewords]
|
||||
a good photo of the [filewords]
|
||||
a photo of one [filewords]
|
||||
a close-up photo of the [filewords]
|
||||
a rendition of the [filewords]
|
||||
a photo of the clean [filewords]
|
||||
a rendition of a [filewords]
|
||||
a photo of a nice [filewords]
|
||||
a good photo of a [filewords]
|
||||
a photo of the nice [filewords]
|
||||
a photo of the small [filewords]
|
||||
a photo of the weird [filewords]
|
||||
a photo of the large [filewords]
|
||||
a photo of a cool [filewords]
|
||||
a photo of a small [filewords]
|
||||
@ -0,0 +1 @@
|
||||
picture
|
||||
@ -0,0 +1,19 @@
|
||||
a painting, art by [name]
|
||||
a rendering, art by [name]
|
||||
a cropped painting, art by [name]
|
||||
the painting, art by [name]
|
||||
a clean painting, art by [name]
|
||||
a dirty painting, art by [name]
|
||||
a dark painting, art by [name]
|
||||
a picture, art by [name]
|
||||
a cool painting, art by [name]
|
||||
a close-up painting, art by [name]
|
||||
a bright painting, art by [name]
|
||||
a cropped painting, art by [name]
|
||||
a good painting, art by [name]
|
||||
a close-up painting, art by [name]
|
||||
a rendition, art by [name]
|
||||
a nice painting, art by [name]
|
||||
a small painting, art by [name]
|
||||
a weird painting, art by [name]
|
||||
a large painting, art by [name]
|
||||
@ -0,0 +1,19 @@
|
||||
a painting of [filewords], art by [name]
|
||||
a rendering of [filewords], art by [name]
|
||||
a cropped painting of [filewords], art by [name]
|
||||
the painting of [filewords], art by [name]
|
||||
a clean painting of [filewords], art by [name]
|
||||
a dirty painting of [filewords], art by [name]
|
||||
a dark painting of [filewords], art by [name]
|
||||
a picture of [filewords], art by [name]
|
||||
a cool painting of [filewords], art by [name]
|
||||
a close-up painting of [filewords], art by [name]
|
||||
a bright painting of [filewords], art by [name]
|
||||
a cropped painting of [filewords], art by [name]
|
||||
a good painting of [filewords], art by [name]
|
||||
a close-up painting of [filewords], art by [name]
|
||||
a rendition of [filewords], art by [name]
|
||||
a nice painting of [filewords], art by [name]
|
||||
a small painting of [filewords], art by [name]
|
||||
a weird painting of [filewords], art by [name]
|
||||
a large painting of [filewords], art by [name]
|
||||
@ -0,0 +1,27 @@
|
||||
a photo of a [name]
|
||||
a rendering of a [name]
|
||||
a cropped photo of the [name]
|
||||
the photo of a [name]
|
||||
a photo of a clean [name]
|
||||
a photo of a dirty [name]
|
||||
a dark photo of the [name]
|
||||
a photo of my [name]
|
||||
a photo of the cool [name]
|
||||
a close-up photo of a [name]
|
||||
a bright photo of the [name]
|
||||
a cropped photo of a [name]
|
||||
a photo of the [name]
|
||||
a good photo of the [name]
|
||||
a photo of one [name]
|
||||
a close-up photo of the [name]
|
||||
a rendition of the [name]
|
||||
a photo of the clean [name]
|
||||
a rendition of a [name]
|
||||
a photo of a nice [name]
|
||||
a good photo of a [name]
|
||||
a photo of the nice [name]
|
||||
a photo of the small [name]
|
||||
a photo of the weird [name]
|
||||
a photo of the large [name]
|
||||
a photo of a cool [name]
|
||||
a photo of a small [name]
|
||||
@ -0,0 +1,27 @@
|
||||
a photo of a [name], [filewords]
|
||||
a rendering of a [name], [filewords]
|
||||
a cropped photo of the [name], [filewords]
|
||||
the photo of a [name], [filewords]
|
||||
a photo of a clean [name], [filewords]
|
||||
a photo of a dirty [name], [filewords]
|
||||
a dark photo of the [name], [filewords]
|
||||
a photo of my [name], [filewords]
|
||||
a photo of the cool [name], [filewords]
|
||||
a close-up photo of a [name], [filewords]
|
||||
a bright photo of the [name], [filewords]
|
||||
a cropped photo of a [name], [filewords]
|
||||
a photo of the [name], [filewords]
|
||||
a good photo of the [name], [filewords]
|
||||
a photo of one [name], [filewords]
|
||||
a close-up photo of the [name], [filewords]
|
||||
a rendition of the [name], [filewords]
|
||||
a photo of the clean [name], [filewords]
|
||||
a rendition of a [name], [filewords]
|
||||
a photo of a nice [name], [filewords]
|
||||
a good photo of a [name], [filewords]
|
||||
a photo of the nice [name], [filewords]
|
||||
a photo of the small [name], [filewords]
|
||||
a photo of the weird [name], [filewords]
|
||||
a photo of the large [name], [filewords]
|
||||
a photo of a cool [name], [filewords]
|
||||
a photo of a small [name], [filewords]
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 526 KiB After Width: | Height: | Size: 329 KiB |
Loading…
Reference in New Issue