Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Prolific Zero123 #131

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions configs/prolificdreamer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ data:
batch_size: 1
# 0-4999: 64x64, >=5000: 512x512
# this drastically reduces VRAM usage as empty space is pruned in early training
width: [64, 512]
height: [64, 512]
width: [128, 512]
height: [128, 512]
resolution_milestones: [5000]
camera_distance_range: [1.0, 1.5]
fovy_range: [40, 70]
Expand All @@ -19,6 +19,7 @@ data:
up_perturb: 0.
eval_camera_distance: 1.5
eval_fovy_deg: 70.
n_val_views: 30

system_type: "prolificdreamer-system"
system:
Expand Down Expand Up @@ -107,7 +108,7 @@ trainer:
max_steps: 25000
log_every_n_steps: 1
num_sanity_val_steps: 0
val_check_interval: 200
val_check_interval: 100
enable_progress_bar: true
precision: 32

Expand Down
145 changes: 145 additions & 0 deletions configs/prolificzero123.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
name: "prolificzero123"
tag: "PDdef_${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_${rmspace:${system.prompt_processor.prompt},_}"
exp_root_dir: "outputs"
seed: 0

data_type: "single-image-datamodule"
data: # threestudio/data/image.py -> SingleImageDataModuleConfig
image_path: ???
height: 192
width: 192
default_elevation_deg: 0.0
default_azimuth_deg: 0.0
default_camera_distance: 1.5
default_fovy_deg: 70.0
random_camera: # threestudio/data/uncond.py -> RandomCameraDataModuleConfig
batch_size: 1
width: [128, 512]
height: [128, 512]
resolution_milestones: [5000]
camera_distance_range: [1.0, 1.5]
fovy_range: [70.0, 70.0] # Zero123 has fixed fovy
elevation_range: [-10, 45]
camera_perturb: 0.
center_perturb: 0.
up_perturb: 0.
eval_elevation_deg: ${data.default_elevation_deg}
eval_camera_distance: ${data.default_camera_distance}
eval_fovy_deg: ${data.default_fovy_deg}
# light_position_perturb: 1.0
# light_distance_range: [2.8, 3.3]
# batch_uniform_azimuth: False
n_val_views: 30
n_test_views: 120

system_type: "prolificzero123-system"
system:
geometry_type: "implicit-volume"
geometry:
radius: 2.0
normal_type: null # voletiv: CHECK

density_bias: "blob_magic3d"
density_activation: softplus
density_blob_scale: 10.
density_blob_std: 0.5

pos_encoding_config:
otype: HashGrid
n_levels: 16
n_features_per_level: 2
log2_hashmap_size: 19
base_resolution: 16
per_level_scale: 1.447269237440378 # max resolution 4096

material_type: "no-material"
material:
n_output_dims: 3
color_activation: sigmoid

background_type: "neural-environment-map-background"
background:
color_activation: sigmoid
random_aug: true # voletiv: CHECK

renderer_type: "nerf-volume-renderer"
renderer:
radius: ${system.geometry.radius}
num_samples_per_ray: 512
# return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}} # voletiv: CHECK!
# return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}} # voletiv: CHECK!

prompt_processor_type: "no-prompt-processor"
prompt_processor:
pretrained_model_name_or_path: ""
prompt: ""

guidance_type: "zero123-vsd-guidance"
guidance:
pretrained_model_name_or_path: "./load/zero123/105000.ckpt"
pretrained_config: "./load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
# pretrained_model_name_or_path_lora: "./load/zero123/105000.ckpt"
pretrained_model_name_or_path_lora: "/admin/home-vikram/zero123xl/20230605_last.ckpt"
pretrained_config_lora: "./load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
vram_O: ${not:${gt0:${system.guidance_eval_freq}}}
cond_image_path: ${data.image_path}
cond_elevation_deg: ${data.default_elevation_deg}
cond_azimuth_deg: ${data.default_azimuth_deg}
cond_camera_distance: ${data.default_camera_distance}
guidance_scale: 3.0
min_step_percent: 0.02
max_step_percent: 0.98
max_step_percent_annealed: 0.5
anneal_start_step: 5000
guidance_eval_freq: 13

loggers:
wandb:
enable: false
project: "threestudio"

loss:
lambda_rgb: 0. # was 200
# lambda_mask: 50.
# lambda_depth: 1.
lambda_vsd: 1.
lambda_lora: 1.
lambda_orient: 0.
lambda_sparsity: 10. # 2.0 was 10.
lambda_opaque: [10000, 0.0, 1000.0, 10001] # [0, 1., 20., 5000] # was [10000, 0.0, 1000.0, 10001]
lambda_z_variance: 0.
optimizer:
name: AdamW
args:
betas: [0.9, 0.99]
eps: 1.e-8
# name: Adan
# args:
# lr: 0.01
# max_grad_norm: 5.0
# eps: 1.e-8
# weight_decay: 1e-5
params:
geometry.encoding:
lr: 0.01
geometry.density_network:
lr: 0.001
geometry.feature_network:
lr: 0.001
background:
lr: 0.001
guidance:
lr: 0.0001

trainer:
max_steps: 25000
log_every_n_steps: 1
num_sanity_val_steps: 0
val_check_interval: 100
enable_progress_bar: true
precision: 32

checkpoint:
save_last: true
save_top_k: -1
every_n_train_steps: ${trainer.max_steps}
138 changes: 138 additions & 0 deletions configs/prolificzero123SD.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
name: "prolificzero123SD"
tag: "${data.random_camera.height}_${rmspace:${basename:${data.image_path}},_}_${rmspace:${system.prompt_processor.prompt},_}"
exp_root_dir: "outputs"
seed: 0

data_type: "single-image-datamodule"
data: # threestudio/data/image.py -> SingleImageDataModuleConfig
image_path: ???
height: 192
width: 192
default_elevation_deg: 0.0
default_azimuth_deg: 0.0
default_camera_distance: 3.2
default_fovy_deg: 20.0
random_camera: # threestudio/data/uncond.py -> RandomCameraDataModuleConfig
batch_size: 1
width: 128
height: 128
eval_height: 512
eval_width: 512
camera_distance_range: [3.2, 3.3]
fovy_range: [20.0, 20.0] # Zero123 has fixed fovy
elevation_range: [-10, 60]
camera_perturb: 0.
center_perturb: 0.
up_perturb: 0.
eval_elevation_deg: ${data.default_elevation_deg}
eval_camera_distance: ${data.default_camera_distance}
eval_fovy_deg: ${data.default_fovy_deg}
light_position_perturb: 1.0
light_distance_range: [7.5, 10.0]
batch_uniform_azimuth: False
n_val_views: 30
n_test_views: 120

system_type: "prolificzero123-system"
system:
geometry_type: "implicit-volume"
geometry:
radius: 2.0
normal_type: null # voletiv: CHECK

density_bias: "blob_magic3d"
density_activation: softplus
density_blob_scale: 10.
density_blob_std: 0.5

pos_encoding_config:
otype: HashGrid
n_levels: 16
n_features_per_level: 2
log2_hashmap_size: 19
base_resolution: 16
per_level_scale: 1.447269237440378 # max resolution 4096

material_type: "no-material"
material:
n_output_dims: 3
color_activation: sigmoid

background_type: "neural-environment-map-background"
background:
color_activation: sigmoid
random_aug: true # voletiv: CHECK

renderer_type: "nerf-volume-renderer"
renderer:
radius: ${system.geometry.radius}
num_samples_per_ray: 512
# return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}} # voletiv: CHECK!
# return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}} # voletiv: CHECK!

prompt_processor_type: "stable-diffusion-prompt-processor"
prompt_processor:
pretrained_model_name_or_path: "stabilityai/stable-diffusion-2-1-base"
prompt: ???

guidance_type: "zero123sd-vsd-guidance"
guidance:
pretrained_model_name_or_path: "./load/zero123/105000.ckpt"
pretrained_config: "./load/zero123/sd-objaverse-finetune-c_concat-256.yaml"
vram_O: ${not:${gt0:${system.guidance_eval_freq}}}
cond_image_path: ${data.image_path}
cond_elevation_deg: ${data.default_elevation_deg}
cond_azimuth_deg: ${data.default_azimuth_deg}
cond_camera_distance: ${data.default_camera_distance}
pretrained_model_name_or_path_lora: "stabilityai/stable-diffusion-2-1"
guidance_scale: 3.0
min_step_percent: 0.02
max_step_percent: 0.98
max_step_percent_annealed: 0.5
anneal_start_step: 5000
guidance_eval_freq: 0

loggers:
wandb:
enable: false
project: "threestudio"

loss:
lambda_rgb: 0. # was 200
# lambda_mask: 50.
# lambda_depth: 1.
lambda_vsd: 1.
lambda_lora: 1.
lambda_orient: 0.
lambda_sparsity: 10.
lambda_opaque: [10000, 0.0, 1000.0, 10001]
lambda_z_variance: 0.
optimizer:
name: AdamW
args:
betas: [0.9, 0.99]
eps: 1.e-15
params:
geometry.encoding:
lr: 0.01
geometry.density_network:
lr: 0.001
geometry.feature_network:
lr: 0.001
background:
lr: 0.001
guidance:
lr: 0.0001

trainer:
max_steps: 25000
log_every_n_steps: 1
num_sanity_val_steps: 0
val_check_interval: 100
enable_progress_bar: true
precision: 32

checkpoint:
save_last: true
save_top_k: -1
every_n_train_steps: ${trainer.max_steps}
3 changes: 1 addition & 2 deletions configs/zero123.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ data: # threestudio/data/image.py -> SingleImageDataModuleConfig
eval_elevation_deg: ${data.default_elevation_deg}
eval_camera_distance: ${data.default_camera_distance}
eval_fovy_deg: ${data.default_fovy_deg}
light_sample_strategy: "dreamfusion"
batch_uniform_azimuth: False
n_val_views: 30
n_test_views: 120
Expand Down Expand Up @@ -86,7 +85,7 @@ system:
return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}}
return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}}

prompt_processor_type: "zero123-prompt-processor"
prompt_processor_type: "no-prompt-processor"
prompt_processor:
pretrained_model_name_or_path: ""
prompt: ""
Expand Down
1 change: 0 additions & 1 deletion extern/ldm_zero123/models/diffusion/ddim.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import numpy as np
import torch
from einops import rearrange
from tqdm import tqdm

from extern.ldm_zero123.models.diffusion.sampling_util import (
Expand Down
Loading