train.py

import matplotlib
matplotlib.use('Agg')

import torch, os, sys, numpy as np, pprint, utils, shutil, argparse, random
import shapedata, imgutils, meshutils, graphicsutils
import logging, datetime, matplotlib
import matplotlib.pyplot as plt
from torch.nn import DataParallel
from renderer import SrRenderer
from model import CyclicGenRen
from losses import Cycle_1_loss_calculator, Cycle_2_loss_calculator
from vector_adversaries import *
from image_adversaries import *
from shape_adversaries import *
from utils import rfp, bool_string_type, AccumTimer
from options import *
from torch.utils.tensorboard import SummaryWriter

def main():

    # Core parser arguments
    parser = argparse.ArgumentParser(description='GenRen entry point')
    parser.add_argument('options_choice', type = str, 
            help = 'Name of options set')
    parser.add_argument('outpath', type = str, 
            help = 'Path to output dir')
    parser.add_argument('--devices', type = str, 
            help = 'GPU device numbers (as comma separated list)', default='0')
    # Autogenerated parser arguments based on options
    movable_key_names = []
    for key in all_keys():
        if key.startswith('w_') or key.endswith('_lr') or key.endswith('_fp') or ('_lr_' in key): # float parameters
            parser.add_argument('--' + key, type = float) 
            movable_key_names.append(key)
        elif key in str_keys():
            parser.add_argument('--' + key, type = str)
            movable_key_names.append(key)
        elif key in int_keys() or key.startswith('dim_'):
            parser.add_argument('--' + key, type = int)
            movable_key_names.append(key)
        elif key in bool_keys():
            parser.add_argument('--' + key, type = bool_string_type) # must pass true or false (the string)
            movable_key_names.append(key)
    args = parser.parse_args()
    
    # Read inputs arguments
    options = get_options( args.options_choice )

    # Modify output path
    true_outpath = os.path.join(options['out_dir_prepen'], args.outpath)
    
    # Check: handle output existence
    _folder_deletion = options['allow_overwrite']
    _h_msg = "Output directory already exists (%s)" % true_outpath
    if _folder_deletion: # Delete and re-create if present
        if os.path.isdir(true_outpath): shutil.rmtree( true_outpath )
    else: # Enforce the folder is not pre-existing
        assert not os.path.isdir(true_outpath), _h_msg
    os.makedirs(true_outpath)
    
    ### Set up logging ###
    # Standard log file
    logfile = os.path.join(true_outpath, 'genren.log')
    root = logging.getLogger()
    root.setLevel(logging.INFO)
    hdlr = root.handlers[0]  
    fmt = logging.Formatter('%(message)s')
    hdlr.setFormatter(fmt)  
    fh = logging.FileHandler(logfile)
    fh.setLevel(logging.INFO)
    fh.setFormatter(fmt)
    root.addHandler(fh)    
    # Tensorboard setup
    board_writer = SummaryWriter(log_dir = os.path.join(true_outpath, 'tb_log'), 
                                 filename_suffix = args.options_choice)  
    options['board_writer'] = board_writer

    logging.info('Entering GenRen Main')
    logging.info("Date & Time: %s", datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))
    utils.log_current_git_revision_safe()
    logging.info('Option choice: %s', args.options_choice)
    logging.info('Devices: %s', args.devices)
    logging.info('Output path: %s', true_outpath)
    logging.info('Num logging handlers: %d', len(root.handlers))  

    # Now that the true outpath has been used for tensorboard and the logger,
    # Let's create an inner folder for the saved files
    outdir_core = true_outpath
    true_outpath = os.path.join(true_outpath, 'progress')

    # Device handling
    devices = [ torch.device('cuda:%s' % d) for d in args.devices.strip().split(',') ]
    device_main = devices[0]
    
    options['devices']         = devices
    options['device_main']     = device_main
    options['output_dir']      = true_outpath
    options['output_dir_core'] = outdir_core
    options['options_choice']  = args.options_choice

    # Overwrite specified options with cmd line specified key
    for key in movable_key_names:
        in_arg = getattr(args, key)
        if not in_arg is None:
            curr_val = options[key]
            logging.info( ('Altering base options for %s:' % key) + str(curr_val) 
                            + ' -> ' + str(in_arg))
            options[key] = in_arg

    logging.info('\nOPTIONS')
    logging.info(optformat(options))
    
    #-----------------------------------------------------------------------------------#

    assert options['B_imgs'] == options['B_shapes'], "Currently, batch sizes must be equal"
    assert options['pose_buffer_size'] >= options['B_shapes'], "Pose buffer should be as large as the batch"

    IMSIZE           = options['img_size']
    NUM_PC_POINTS    = options['n_pc_points'] # sampling from orig meshes
    IMG_BATCH_SIZE   = options['B_imgs'] 
    SHAPE_BATCH_SIZE = options['B_shapes'] 
    img_data_dir     = options['img_data_dir']
    shape_data_dir   = options['shape_data_dir']
    
    # Dataset specific settings
    data_scale = options['data_scale']
    R_angle    = options['data_R_angle']
    R_axis     = options['data_R_axis']
    
    # Initialize model
    use_predefined_template = not ( options['template_path'] is None )
    template_path = options['template_path']
    if use_predefined_template:
        assert not options['manual_template_UV'] is None, "Specify manual UV!"
        assert options['manual_template_UV'] == template_path, "For BFF, use the same file for geom and uv!"
        template, t_faces = meshutils.read_surface_mesh(template_path, 
                                to_torch=True, subdivide=options['subdivide_template'])
        template_scale    = options['template_scale']
        template_R_angle  = options['template_R_angle']
        template_R_axis   = options['template_R_axis']
        #template = tmesh.normalize(scale_divisor=SCALE, rotation=(R_axis, R_angle))
        template       = meshutils.norm_mesh(template, scale = template_scale)
        template       = meshutils.rotate(template_R_angle, template_R_axis, template)
        template_mesh  = (template, t_faces)
        #learn_template = False
    else:
        assert options['manual_template_UV'] is None, "No manual UV!"
        template_mesh  = None
        template_scale = None
        #learn_template = True
    learn_template = options['learn_template']

    # Initialize renderer
    renderer = SrRenderer(IMSIZE).to(device_main)
    renderer.set_eye( torch.FloatTensor(options['fixed_eye']) )
    renderer.move_at_up(device_main)
    
    # Initialize datasets and generate dataloaders
    shapedata_type = options['shape_data_type'].strip().lower()
    if   shapedata_type == 'obj': shapedata_type = shapedata.ObjToPcsNormalsDataset
    elif shapedata_type == 'pn':  shapedata_type = shapedata.DirectPointsAndNormals
    else: raise ValueError('Unknown shape dataset type ' + options['shape_data_type'])
    shape_dataset = shapedata_type(shape_data_dir,     
                                   NUM_PC_POINTS,
                                   duplicate      = options['shape_data_dup'],
                                   pre_transform  = True,
                                   subset_num     = options['subset_num'],
                                   scale          = data_scale,
                                   rot_angle      = R_angle,
                                   rot_axis       = R_axis)
    if options['preload_img_data']:
        img_data_class = imgutils.SinglePreloadedDirImageDataset
    else:
        img_data_class = imgutils.SingleDirImageDataset
    img_dataset        = img_data_class(img_data_dir, 
                                      use_alpha          = options['use_alpha'],
                                      resize             = IMSIZE, 
                                      load_gt_file       = options['USE_GT'],
                                      num_fixed_to_store = options['B_imgs'],
                                      take_subset        = options['img_data_subset'] ) 
    shape_dataloader = shape_dataset.get_infinite_dataloader(SHAPE_BATCH_SIZE)
    img_dataloader   = img_dataset.get_infinite_dataloader(IMG_BATCH_SIZE)

    # Create the cyclic genren model
    cyclic_genren = CyclicGenRen(
                        dim_xi_T                     = options['dim_xi_T'],
                        dim_xi_p                     = options['dim_xi_p'],
                        dim_lat_pert                 = options['dim_lat_pert'],
                        dim_backbone                 = options['dim_backbone'],
                        num_impulses                 = options['nts'],
                        num_hypotheses               = options['num_pose_hypotheses'],
                        use_alpha                    = options['use_alpha'],
                        options                      = options,
                        template_mesh                = template_mesh,
                        learn_template               = learn_template,
                        parallelize                  = devices,
                        rotation_representation_mode = options['rrm'],
                        FDR_pixel_distribution       = img_dataset.unmasked_pixel_set(),
                        renderer                     = renderer )
    n_template_verts = cyclic_genren.nV
    cyclic_genren    = cyclic_genren.to(device_main)

    ##### Initialize critics #####
    #-------------------------------------------------------------------------------------------------------------------#
    logging.info('Initializing critics')
    #>> Image critic <<#
    #img_critic   = ImageAdversarySimple(img_size=IMSIZE).to(device)
    if   options['img_critic_type'] == 'lsgan':
        logging.info('Img critic: SLSGAN')
        img_critic = ImageAdversarySimpleLSGAN(critic_arch = options['img_critic_arch']) # 'spec'
    elif options['img_critic_type'] == 'wgangp':
        logging.info('Img critic: WGAN-GP')
        img_critic_gp_pen_weight = options['img_critic_gp_pen_weight_fp']
        img_critic = ImageAdversarySimpleWganGp(lambda_weight    = img_critic_gp_pen_weight, 
                                                drift_mag_weight = 0.01,
                                                critic_type      = options['wgan_img_mini_critic'] )
    #img_critic = ImageAdversarySimpleHingeGan()
    #img_critic   = ImageAdversarySimpleWganGp().to(device)

    #>> Shape Critic <<#
    #shape_critic = ComTwoStageShapeAdversary(n_template_verts, options['dim_lat_pert'])
    #shape_critic = TwoStageShapeAdversary(n_template_verts, options['dim_lat_pert'])
    if options['shape_critic_type'].strip().lower() == 'single_stage':    
        logging.info('Using single stage latent shape critic (dim(v) = %d)' % options['dim_lat_pert'])
        shape_critic = SingleStageShapeAdversary(options['dim_lat_pert'], 
                                                 (512, 256, 128), 
                                                 wgan_gp_pen_weight = 10, 
                                                 drift_mag_weight   = 0.01)
    elif options['shape_critic_type'].strip().lower() == 'com':
        logging.info('Using single stage latent shape critic (dim(v) = %d, |V| = %d)' % 
                        (options['dim_lat_pert'], n_template_verts))
        shape_critic = ComSingleStageShapeAdversary(options['dim_lat_pert'], 
                                                     n_template_verts,
                                                     (512, 256, 128), 
                                                     wgan_gp_pen_weight = 10, 
                                                     drift_mag_weight   = 0.01)
    #shape_critic = FcTemplatePositionShapeAdversary(n_template_verts)
    #shape_critic = DglGcnSimpleMeshAdversary().to(device)
    
    #>> Critic on Sampled Poses <<#
    # Pose critic (for cycle 1, matching cycle 2)
    _pose_buffer_size = options['pose_buffer_size'] # 100
    logging.info('Using Sinkhorn pose buffer critic (|B| = %d)' % _pose_buffer_size)
    pose_critic = Wasserstein3dPoseAdversary(buffer_size = _pose_buffer_size)

    #>> Latent texture critic <<#
    # Latent texture critic (pushing cy2 inference to N(0,I))
    if options['vae_for_xi_T']:
        xi_T_critic = None 
    else:
        if options['use_SWD_loss_for_xi_T']:
            _swd_nprojs = options['num_swd_projections_xi_T']
            logging.info('Using SWD critic for xi_T (n_projs = %d)' % _swd_nprojs)
            xi_T_critic = StdNormalSlicedWassersteinMatcher(num_projections = _swd_nprojs,
                                                            expected_dim = options['dim_xi_T'])    
        else:
            logging.info('Using WGAN-GP critic for xi_T')
            xi_T_critic = VectorAdversaryLinWGANGP(options['dim_xi_T'], (512, 256, 128), 
                                wgan_gp_pen_weight=10.0, drift_mag_weight=0.01)
        #_swd_nprojs = 64
        #logging.info('Using SWD critic for xi_T (n_projs = %d)' % _swd_nprojs)
        #xi_T_critic = StdNormalSlicedWassersteinMatcher(num_projections = _swd_nprojs)

    #>> Texture critic <<#
    # Decoded texture adversary (pushing cy1 outputs to look like cy2 inferences)
    if   options['texture_critic_arch'] == 'lsgan':
        assert False
        logging.info('Using SLSGAN for texture critic')
        texture_critic = VectorAdversaryLin(cyclic_genren.nV * 3, (512, 256, 128))
    elif options['texture_critic_arch'] == 'wgan':
        texture_critic_gp_pen_weight = options['tex_critic_gp_pen_weight_fp']
        #logging.info('Using WGANGP for texture critic')
        fixed_batch_graph = cyclic_genren.generate_featureless_dgl_template_graph_batch(
                                            SHAPE_BATCH_SIZE, as_dglb=True)

        # Critic on the KDE histogram of the texture colours
        #texture_critic = WGANGPDiscriminator64(inchannels = 3)
        # texture_critic = ImageAdversarySimpleWganGp(lambda_weight    = texture_critic_gp_pen_weight, 
        #                                             drift_mag_weight = 0.01,
        #                                             inchannels       = 3,
        #                                             critic_type      = 'full' )

        texture_critic = UvTextureImagePlus2dHistoCritic(
                                 ## Histogram critic settings
                                 num_template_vertices = cyclic_genren.nV, 
                                 hidden_sizes          = (512, 256, 128), 
                                 batch_size            = SHAPE_BATCH_SIZE,
                                 ## UV texture image 
                                 inchannels            = 3,
                                 critic_type           = 'full',
                                 ## Shared parameters
                                 wgan_gp_pen_weight    = texture_critic_gp_pen_weight, 
                                 drift_mag_weight      = 0.01, 
                                 ## Meta-parameters
                                 texture_critic_type   = options['texture_critic_type'],
                                 options               = options,
                                 )

        # if   options['texture_critic_type'].lower() == 'histo': # global
        #     logging.info('Using global texture histogram for texture critic (WGANGP)')
        #     texture_critic = GlobalHistogramFixedSamplesLinWGANGP(
        #                          num_template_vertices = cyclic_genren.nV, 
        #                          hidden_sizes          = (512, 256, 128), 
        #                          wgan_gp_pen_weight    = texture_critic_gp_pen_weight, 
        #                          drift_mag_weight      = 0.01, 
        #                          num_fixed_samples     = options['num_histo_samples_per_dim']**3)
        # # Local patch critic on the mesh graph texture values
        # elif options['texture_critic_type'].lower() == 'graph_patch': # local
        #     logging.info('Using local graph patch critic (WGANGP)')
        #     fixed_batch_graph = cyclic_genren.generate_featureless_dgl_template_graph_batch(
        #                                     SHAPE_BATCH_SIZE, as_dglb=True)
        #     texture_critic = TextureGraphAdversaryWGANGP(
        #                         per_node_input_dim = 3, 
        #                         hidden_dims        = (16, 32, 64), # hidden nodal feature dimensionalities
        #                         GSB                = fixed_batch_graph, 
        #                         wgan_gp_pen_weight = texture_critic_gp_pen_weight, 
        #                         drift_mag_weight   = 0.01 )
        # elif options['texture_critic_type'].lower() == 'global+local':
        #     logging.info('Using both global texture histogram and local graph patch processor for texture critic (WGANGP)')
        #     texture_critic = GlobalAndLocalTextureCritic(
        #                                input_dim = 3,
        #                                num_template_vertices       = cyclic_genren.nV, 
        #                                fixed_graph_structure_batch = fixed_batch_graph,
        #                                hidden_sizes_local          = (16, 32, 64),
        #                                hidden_sizes_global         = (512, 256, 128), 
        #                                num_fixed_samples_global    = options['num_histo_samples_per_dim'] ** 3, 
        #                                wgan_gp_pen_weight          = texture_critic_gp_pen_weight, 
        #                                drift_mag_weight            = 0.01 )
        # elif options['texture_critic_type'].lower() == 'vec+histo':
        #     logging.info('Using vec+histo texture critic')
        #     texture_critic = VecAndHistoVecTextureCritic(
        #                                input_dim                   = 3,
        #                                num_template_vertices       = cyclic_genren.nV,
        #                                fixed_graph_structure_batch = fixed_batch_graph,
        #                                hidden_sizes_local          = None, # (16, 32, 64),
        #                                hidden_sizes_global         = (512, 256, 128),
        #                                num_fixed_samples_global    = options['num_histo_samples_per_dim'] ** 3,
        #                                wgan_gp_pen_weight          = texture_critic_gp_pen_weight,
        #                                drift_mag_weight            = 0.01 )
        # # Critic on the overall vector (unfolded) of the texture
        # elif options['texture_critic_type'].lower() == 'vector':
        #     logging.info('Using unfolded texture vector critic for texture critic (WGANGP)')
        #     texture_critic = VectorAdversaryLinWGANGP(cyclic_genren.nV * 3, (512, 256, 128), 
        #                                               wgan_gp_pen_weight = texture_critic_gp_pen_weight, 
        #                                               drift_mag_weight   = 0.01)
        # else:
        #     raise ValueError('Unknown texture critic type')
    
    ### </ End critic initializations /> ###
    #-------------------------------------------------------------------------------------------------------------------#

    # Parallelize critics
    img_critic   = DataParallel(   img_critic, device_ids=devices ).to(device_main)
    shape_critic = DataParallel( shape_critic, device_ids=devices ).to(device_main)
    pose_critic  = DataParallel(  pose_critic, device_ids=devices ).to(device_main)
    if not xi_T_critic is None:
        xi_T_critic  = DataParallel(  xi_T_critic, device_ids=devices ).to(device_main)
    #xi_p_critic  = DataParallel(  xi_p_critic, device_ids=devices ).to(device_main)
    texture_critic = DataParallel( texture_critic, device_ids=devices ).to(device_main)

    # Run training
    NAN_CHECK = False # False #True
    with torch.autograd.set_detect_anomaly(NAN_CHECK):
        run_training(cyclic_genren,
                     renderer,
                     img_dataloader,
                     shape_dataloader,
                     img_critic,
                     shape_critic,
                     pose_critic,
                     xi_T_critic,
                     None, #xi_p_critic,
                     texture_critic,
                     options)
    
#------------------------------------------------------------------------------#

def run_training(model, 
                 renderer,
                 img_dataloader, 
                 shape_dataloader, 
                 img_critic,
                 shape_critic,
                 pose_critic,
                 xi_T_critic,
                 xi_p_critic,
                 texture_critic,
                 options):
    
    # Unpack arguments
    n_gen_iters                     = options['n_gen_iters']
    img_critic_iters_per_gen_iter   = options['img_critic_iters_per_gen_iter']
    cy2_critics_update_per_gen_iter = options['cy2_critics_update_per_gen_iter']
    print_every                     = options['print_every']
    device_main                     = options['device_main']
    save_imgs_every                 = options['save_imgs_every']
    USE_GT                          = options['USE_GT']
    thresh                          = options['init_gt_iters']
    cham_only_iters                 = options['chamfer_only_iters']
    stage_2_pretrain_iters          = options['stage_2_pretrain_iters']
    outdir                          = options['output_dir']
    outdir_core                     = options['output_dir_core']
    B                               = options['B_imgs']
    NH                              = options['num_pose_hypotheses']
    num_frozen_mesh_ae_iters        = options['freeze_mesh_ae_iters']
    mesh_ae_load_path               = options['mesh_ae_load_path']
    mode_1_iters                    = options['mode_1_iters']
    mode_2_iters                    = options['mode_2_iters']
    cy2_annealing_period            = options['cy2_annealing_period']
    board_writer                    = options['board_writer'] 
    options_choice                  = options['options_choice']
    run_rerenders                   = options['run_rerenderings']

    if options['w_reren_adv_loss'] > 1e-8:
        assert run_rerenders

    assert options['shape_critic_updates'] <= cy2_critics_update_per_gen_iter
    assert options['shape_critic_updates'] >= 1

    _pt_its = n_gen_iters - cham_only_iters - stage_2_pretrain_iters - mode_1_iters - mode_2_iters
    S  = '\nTraining Phases (Total iters: %d)\n' % n_gen_iters
    S += ' (a) Mesh Autoencoder pretraining: %d iters\n' % cham_only_iters
    S += ' (b) Domain randomized pretraining: %d iters\n' % stage_2_pretrain_iters
    S += ' (c) Mode I: %d iters (cy2 annealing: %d)\n' % (mode_1_iters, cy2_annealing_period)
    S += ' (d) Mode II: %d iters\n' % mode_2_iters
    S += ' (e) Mode III: %d iters\n' % _pt_its
    logging.info(S) 

    if not USE_GT: 
        thresh = 0
    else:
        logging.info('GT Present')
        thresh += cham_only_iters 

    ### Loss functions ###
    # S -> I
    Cy1_loss = Cycle_1_loss_calculator(V = model.template_V, 
                                       E = model.template_E, 
                                       F = model.template_F,
                                       options = options
                                       ).to(device_main) # 
    # I -> S
    Cy2_loss = Cycle_2_loss_calculator(V = model.template_V,
                                       E = model.template_E, 
                                       F = model.template_F,
                                       options = options,
                                       mrl = Cy1_loss.get_mrl() # Avoid extra precomputations
                                       ).to(device_main) # 
    # Additional initializations
    logging.info('Optimization preparation')
    rot_loss_function = graphicsutils.MinAngleComposedRotationLoss()
    n_pose_critic_params = len(list(pose_critic.parameters()))
    n_xi_T_critic_params = 0 if xi_T_critic is None else len(list(xi_T_critic.parameters()))

    ## Optimizers ##
    # SAGAN betas = (0, 0.9), default betas = (0.9, 0.999)
    #BETAS = options['betas'] # (0.9, 0.99)
    BETAS = (options['beta1_fp'], options['beta2_fp'])
    gen_wd = 1e-6 # Generator weight decay
    opter = lambda p, lr, wd: torch.optim.Adam(p, lr=lr, weight_decay=wd, betas=BETAS)
    img_critic_optimizer       = opter(img_critic.parameters(),     lr = options['imc_lr'], wd=1e-3)
    shape_critic_optimizer     = opter(shape_critic.parameters(),   lr = options['shc_lr'], wd=1e-3)
    if n_pose_critic_params > 0:
        pose_critic_optimizer  = opter(pose_critic.parameters(),    lr = options['xip_lr'], wd=1e-6)
    #xi_p_critic_optimizer  = opter(xi_p_critic.parameters(),  lr=options['xip_lr'])
    if n_xi_T_critic_params > 0:
        xi_T_critic_optimizer  = opter(xi_T_critic.parameters(),    lr = options['xit_lr'], wd=1e-3)
    texture_critic_optimizer   = opter(texture_critic.parameters(), lr = options['C_lr'],   wd=1e-3)
    logging.info('Initialized optimizers')

    # CPTC critic
    if options['use_adversarial_cptc']:
        cptc_critic = RotationPredictorRN20().to(device_main)
        cptc_adversary_optimizer = opter(cptc_critic.parameters(), lr=0.0005, wd=1e-6)
    else:
        cptc_critic = None


    ### Function Definitions ###

    # Predefine the image critic update
    def run_img_critic_turn(gen_iter, include_fdr_batch, fake_gen_imgs, real_imgs, rerendered_real_images, P): #
        """
        Runs an update for the image critic.
        Can only run ONE iteration.

        P = pose probabilities

        cy2_chance: probability of replacing I_real with shape2img(img2shape(I_real))
        """
        cy2_chance = options['img_critic_use_cy2_renders_chance_fp']
        if cy2_chance > 1e-6:
            # Mask is one if we perform replacement, zero if we keep the original
            mask = (torch.rand(B) < cy2_chance).float().view(B,1,1,1).to(real_imgs.device)
            # We replace the corresponding true img batch member with its reconstruction
            #ic_images_f, P = model.run_cycle_2(, renderer, renders_and_probs_only=True)
            ic_images     = real_imgs
            ic_images_f   = rerendered_real_images
            B_NH, C, H, W = ic_images_f.shape
            assert B_NH == B * NH
            ic_images_f = ic_images_f.reshape(B,NH,C,H,W).gather(
                            dim   = 1,
                            index = P.detach().argmax(dim=1).view(B,1,1,1,1).expand(-1,-1,C,H,W) 
                        ).squeeze(1).detach()
            ic_images = ( ic_images * (1.0 - mask) + ic_images_f * mask )
        else:
            ic_images = real_imgs
        # Compute loss and update
        img_critic_optimizer.zero_grad()
        ic_loss = img_critic(I_fake=fake_gen_imgs.detach(), 
                             I_real=ic_images.detach(), 
                             for_gen=False).mean()
        ic_loss.backward()
        # if include_fdr_batch: # 
        #     fdr_loss = img_critic(I_fake = renders, I_real=fdr_renders, for_gen=False).mean()
        #     fdr_loss.backward()
        img_critic_optimizer.step()
        return ic_loss

    # Predefine the image critic update
    def run_img_critic_turn_old(gen_iter, include_fdr_batch): #
        """
        Runs an update for the image critic.

        cy2_chance: probability of replacing I_real with shape2img(img2shape(I_real))
        """
        cy2_chance = options['img_critic_use_cy2_renders_chance_fp']
        ic_loss = torch.tensor([0.0]).to(device_main)
        for img_critic_update in range(img_critic_iters_per_gen_iter):
            if gen_iter < thresh: return ic_loss
            # Sample image data
            if USE_GT: ic_images, gt_R, gt_t = img_dataloader.sample()
            else:      ic_images = img_dataloader.sample()
            ic_images = ic_images.to(device_main)
            # Sample shape data
            ic_shapes, ic_normals = shape_dataloader.sample()
            #_, ic_shapes = shape_dataloader.sample()
            ic_shapes  = ic_shapes.to(device_main)
            ic_normals = ic_normals.to(device_main)
            # Generate images
            with torch.no_grad():
                # Generate fakes
                renders = model.shape_to_image(ic_shapes, ic_normals, renderer)[0].detach()
                if include_fdr_batch:
                    assert False
                    fdr_renders = model.run_domain_randomized_cycle_1(ic_shapes, 
                                            ic_normals, renderer, learned_tex=False)[0]
                # If we are doing ANY replacements, enter the mixup, else don't bother (it's expensive...)
                if cy2_chance > 1e-6:
                    # Mask is one if we perform replacement, zero if we keep the original
                    mask = (torch.rand(B) < cy2_chance).float().view(B,1,1,1).to(renders.device)
                    # We replace the corresponding true img batch member with its reconstruction
                    ic_images_f, P = model.run_cycle_2(ic_images, renderer, renders_and_probs_only=True)
                    B_NH, C, H, W = ic_images_f.shape
                    assert B_NH == B * NH
                    ic_images_f = ic_images_f.reshape(B,NH,C,H,W).gather(dim = 1,
                                    index = P.detach().argmax(dim=1).view(B,1,1,1,1).expand(-1,-1,C,H,W) 
                                ).squeeze(1).detach()
                    ic_images = ( ic_images * (1.0 - mask) + ic_images_f * mask )
            # Compute loss and update
            img_critic_optimizer.zero_grad()
            ic_loss = img_critic(I_fake=renders, I_real=ic_images, for_gen=False).mean()
            ic_loss.backward()
            if include_fdr_batch: # 
                fdr_loss = img_critic(I_fake = renders, I_real=fdr_renders, for_gen=False).mean()
                fdr_loss.backward()
            img_critic_optimizer.step()
        return ic_loss

    # Predefine the domain randomized Cy1 loss computation and update
    DR_full_rand_iters = 0
    DR_LR_iters = 0
    def compute_domain_randomized_quantities(cy1_shapes, cy1_normals, learned_tex=False):
        """
        Maps input shapes (PC + normals) to generated renders (and intermediates) using domain randomized 
            poses and (optionally) textures
        """
        nonlocal DR_LR_iters; nonlocal DR_full_rand_iters 
        if learned_tex: DR_LR_iters += 1
        else:           DR_full_rand_iters += 1
        ( renders,         # Generated image renders
          texture,         # Random texture (from xi_T OR from random)
          V_new,           # Deformed nodal positions (incl. Euclidean transform)
          V_new_pe,        # Deformed template (before Euclidean transform)
          V_new_ints,      # Intermediate deformations (S -> I)
          delta,           # Nodal perturbation vector
          R, t,            # Random Euclidean transform (pose) [input]
          v,               # Random latent deformation [input]
          M_hat,           # Reconstructed mesh [output]
          M_hat_pe,        # Reconstructed mesh before Euclidean transform [output]
          M_hat_ints,      # Intermediate deformations reconstruction (I -> S)
          xi_T_hat,        # Inferred latent texture
          decoded_texture, # Decoded reconstructed texture
          v_hat,           # Inferred latent deformation
          R_hat,           # Inferred rotation
          t_hat,           # Inferred translation
          r_hat,           # Inferred intermediate rotation representation
          delta_hat,       # Inferred perturbation
          pose_probs,      # Pose probabilities from image inference
          _xi_T,           # initial latent texture [None if not learned_tex]
          mu_v, logvar_v,
          dr_texture_img_hat
          ) = model.run_domain_randomized_cycle_1(cy1_shapes, cy1_normals, renderer, learned_tex)

        # ACPTC generator loss calculations
        if options['use_adversarial_cptc']:
            _c2acptc_inds = pose_probs.detach().argmax(dim=1)
            B = _c2acptc_inds.shape[0]
            # Best rotations
            acptc_best_hyp_Rmats = R_hat.gather( # B x nH x 3 x 3 -> B x 3 x 3
                             dim = 1,
                             index = _c2acptc_inds.view(B,1,1,1).expand(-1, -1, 3, 3)
                            ).squeeze(1)
            B, nH, _nIC, H, W = dr_texture_img_hat.shape 
            acptc_best_inferred_texture_images = dr_texture_img_hat.gather(
                                             dim = 1, # B x nH x 3 x H x W
                                             index = _c2acptc_inds.view(B,1,1,1,1).expand(-1, -1, 3, H, W)
                                            ).squeeze(1)
            # Compute negative prediction error -> min this to get max prediction error
            # Do NOT detach the tex images: we need to backprop through them
            cptc_adv_genloss = -1.0 * cptc_critic.loss(
                                            TI     = acptc_best_inferred_texture_images, 
                                            R_true = acptc_best_hyp_Rmats.detach() ).mean()
        else:
            cptc_adv_genloss = None
        #
        total_loss, loss_dict = Cy1_loss.domain_randomized_loss( # 
                                    S               = V_new_pe, 
                                    S_hat           = M_hat_pe, 
                                    orig_M          = cy1_shapes, 
                                    orig_normals    = cy1_normals, 
                                    v               = v, 
                                    v_hat           = v_hat, 
                                    renders         = renders, 
                                    delta           = delta, 
                                    S_ints          = V_new_ints, 
                                    S_hat_ints      = M_hat_ints, 
                                    R               = R, 
                                    t               = t, 
                                    R_hat           = R_hat, 
                                    t_hat           = t_hat, 
                                    pose_probs      = pose_probs, 
                                    input_texture   = texture, 
                                    output_texture  = decoded_texture,
                                    _xi_T           = _xi_T if learned_tex else None,
                                    xi_T_hat        = xi_T_hat,
                                    img_critic      = img_critic if learned_tex else None,
                                    mu_v            = mu_v, 
                                    logvar_v        = logvar_v,
                                    adv_cptc_loss   = cptc_adv_genloss )
        return ( total_loss, loss_dict, renders, 
                (R_hat, t_hat, v_hat, delta_hat, xi_T_hat, pose_probs, V_new_pe) )

    # Partial currying of the domain randomized cycle 1 computation with learned texture
    compute_LT_DR_quantities = lambda s, n: compute_domain_randomized_quantities(s, n, learned_tex=True)  # LTDR
    compute_FDR_quantities   = lambda s, n: compute_domain_randomized_quantities(s, n, learned_tex=False) # FDR

    def run_domain_randomized_update(cy1_shapes, cy1_normals, opt, 
                                     learned_tex=False, ret_for_ren=False):
        """
        Performs a gradient descent step for the domain-randomized cycle 1
        """
        (total_loss, loss_dict, renders, 
            (R_hat, t_hat, v_hat, delta_hat, xi_T_hat, pose_probs, V_new_pe) 
            ) = compute_domain_randomized_quantities(cy1_shapes, cy1_normals, learned_tex)
        opt.zero_grad()
        total_loss.backward()
        opt.step()
        if ret_for_ren:
            return (  total_loss, loss_dict, renders, 
                      (R_hat, t_hat, v_hat, delta_hat, xi_T_hat, pose_probs)  )
        return total_loss, loss_dict, renders

    # Predefine the Cy1 loss computations -> BOTH pose and texture are learned
    def compute_cycle_1_quantities(cy1_shapes, cy1_normals, duplicated_half = False, mixed_reren_loss = False):
        """
        Classical cycle 1 computations (S -> I -> S)
        """
        # Run cycle 1 on input shapes
        ( cy1_renders,           # Generated image renders
          cy1_texture,           # Random texture
          cy1_V_new,             # Deformed nodal positions
          cy1_V_new_pe,          # Deformed template (before Euclidean transform)
          cy1_V_new_ints,        # Deformed template intermediates during impulse perturbations
          cy1_delta,             # Nodal perturbation vector
          cy1_R,                 # Random Euclidean rotation (pose) [input]
          cy1_t,                 # Random Euclidean translation (pose) [input]
          cy1_r,                 # Random rotation from sample in intermed representation (pose) [input]
          cy1_v,                 # Random latent deformation [input]
          cy1_xi_p,              # Random latent pose [input]
          cy1_xi_T,              # Random latent texture [input]
          cy1_M_hat,             # Reconstructed mesh [output]
          cy1_M_hat_pe,          # Reconstructed mesh [output] (before R,t applied)
          cy1_M_hat_ints,        # Reconstructed mesh impulse intermediates [output]
          cy1_xi_p_hat,          # Inferred latent pose
          cy1_xi_T_hat,          # Inferred latent texture
          cy1_C_dec_tex,         # Inferred reconstructed complete texture
          cy1_v_hat,             # Inferred latent deformation
          cy1_R_hat,             # Inferred rotation
          cy1_t_hat,             # Inferred translation
          cy1_r_hat,             # Inferred rotation in 6D representation (pose) 
          cy1_delta_hat,         # Inferred perturbation
          pose_probs,            # Inferred pose probabilities (loss weights)
          mu_v, logvar_v,        # VAE approx posterior param outputs
          sampled_texture_image, # Sampled texture image
          recon_texture_image,   # Reconstructed texture image
          ) = model.run_cycle_1(cy1_shapes, cy1_normals, renderer, 
                                duplicated_xi_T_half_batch = duplicated_half)

        # ACPTC generator loss calculations
        if options['use_adversarial_cptc']:
            _c2acptc_inds = pose_probs.detach().argmax(dim=1)
            B = _c2acptc_inds.shape[0]
            # Best rotations
            acptc_best_hyp_Rmats = cy1_R_hat.gather( # B x nH x 3 x 3 -> B x 3 x 3
                             dim = 1,
                             index = _c2acptc_inds.view(B,1,1,1).expand(-1, -1, 3, 3)
                            ).squeeze(1)
            B, nH, _nIC, H, W = recon_texture_image.shape 
            acptc_best_inferred_texture_images = recon_texture_image.gather(
                                                     dim = 1, # B x nH x 3 x H x W
                                                     index = _c2acptc_inds.view(B,1,1,1,1).expand(-1, -1, 3, H, W)
                                                 ).squeeze(1)
            # Compute negative prediction error -> min this to get max prediction error
            # Do NOT detach the tex images: we need to backprop through them
            cptc_adv_genloss = -1.0 * cptc_critic.loss(
                                            TI     = acptc_best_inferred_texture_images, 
                                            R_true = acptc_best_hyp_Rmats.detach() ).mean()
        else:
            cptc_adv_genloss = None

        # Compute the re-rendering mixture loss
        if mixed_reren_loss:
            assert duplicated_half
            B = cy1_M_hat.shape[0]
            coin = random.randint(0, 1)
            inds = pose_probs.detach().argmax(dim = 1)
            mls  = cy1_C_dec_tex.shape # B x nH x |V| x 3
            best_cy1_C_dec_tex = cy1_C_dec_tex.gather(dim = 1,
                                    index = inds.view(B,1,1,1).expand(-1,-1,mls[2],mls[3])
                                    ).squeeze(1)            
            best_cy1_M_hat = cy1_M_hat.gather(dim = 1,
                                    index = inds.view(B,1,1,1).expand(-1,-1,mls[2],mls[3])
                                    ).squeeze(1)
            if coin == 0:
                M_E_half      = best_cy1_M_hat[0 : B // 2].detach()
                texture_half  = best_cy1_C_dec_tex[B // 2 : ]
                target_render = cy1_renders[0 : B // 2]
            else:
                M_E_half      = best_cy1_M_hat[B // 2 : ].detach()
                texture_half  = best_cy1_C_dec_tex[0 : B // 2]
                target_render = cy1_renders[B // 2 : ]
            new_rens = model.render(M_E_half, texture_half, renderer)
            mixed_cy1_rerens_comparators = (new_rens, target_render)
        else:
            mixed_cy1_rerens_comparators = None

        # Compute the loss associated to cycle 1
        cy1_L, cy1_L_dict = Cy1_loss(gen_iter, cy1_V_new_pe, cy1_M_hat_pe, cy1_shapes, cy1_normals,
                                     img_critic, cy1_xi_p, cy1_xi_p_hat, 
                                     cy1_xi_T, cy1_xi_T_hat, cy1_v, cy1_v_hat,
                                     cy1_renders, cy1_delta, cy1_V_new_ints, cy1_M_hat_ints, 
                                     cy1_R, cy1_t, cy1_R_hat, cy1_t_hat, pose_probs, 
                                     pose_critic, cy1_r, cy1_r_hat,
                                     input_texture  = cy1_texture,
                                     output_texture = cy1_C_dec_tex,
                                     mu_v = mu_v, logvar_v = logvar_v, 
                                     texture_critic = texture_critic,
                                     sampled_texture_image = sampled_texture_image,
                                     reconstructed_texture_image = recon_texture_image,
                                     mixed_cy1_rerens_comparators = mixed_cy1_rerens_comparators,
                                     adv_cptc_loss = cptc_adv_genloss
                                      )
        cy1_quantities_oth = ( cy1_v_hat, cy1_R_hat, cy1_t_hat, cy1_C_dec_tex, 
                               cy1_delta_hat, pose_probs, cy1_M_hat_pe, 
                               cy1_r_hat, cy1_texture, sampled_texture_image, recon_texture_image )
        return cy1_L, cy1_L_dict, cy1_renders, cy1_quantities_oth

    # Function for weighted combined DR_full + DR_learned_tex, used in pretraining stage 2
    def run_combined_cy1_DR_update(rtex_weight, ltex_weight):
        """
        Performs a gradient update for *both* the standard cycle 1 and the domain-randomized version.

        rtex: random texture weight (DR)
        ltex: learned texture (standard cy1)
        """
        cy1_shapes, cy1_normals = shape_dataloader.sample()
        cy1_shapes  = cy1_shapes.to(device_main)
        cy1_normals = cy1_normals.to(device_main)
        gen_optimizer.zero_grad()
        # Run the PTS2 Domain-randomized cycle 1 (random_texture) and send the weighted loss backward
        (rtex_total_loss, rtex_loss_dict, rtex_renders, rtex_oth 
            ) = compute_domain_randomized_quantities(cy1_shapes, cy1_normals, learned_tex=False)
        rtex_total_loss = rtex_total_loss * rtex_weight
        rtex_total_loss.backward() # Clear the graph
        # Run the PTS2 Domain-randomized cycle 1 (learned texture) and send the weighted loss backward
        (ltex_total_loss, ltex_loss_dict, ltex_renders, ltex_oth
            ) = compute_domain_randomized_quantities(cy1_shapes, cy1_normals, learned_tex=True)
        ltex_total_loss = ltex_total_loss * ltex_weight
        ltex_total_loss.backward() # Clear the graph
        # Step the optimizer
        gen_optimizer.step()
        # Return everything
        return ( (rtex_total_loss, rtex_loss_dict, rtex_renders, rtex_oth), 
                 (ltex_total_loss, ltex_loss_dict, ltex_renders, ltex_oth) )

    def get_combined_FDR_and_fullCy1_quantities(w_full_cy1, w_fdr_cy1, cy1_shapes, cy1_normals):
        """
        Run (a) the full Cy1 and (b) the fully DR Cy1.
        Performs the backwards in here for each, but no gradient step.
        """
        # Run the full cycle 1 with immediate backward (avoid graph storage)
        cy1_L, cy1_L_dict, cy1_renders, cy1_quantities_oth = compute_cycle_1_quantities(cy1_shapes, cy1_normals)
        cy1_L = w_full_cy1 * cy1_L
        cy1_L.backward()
        # Run the FDR (fully domain randomized) cycle, with backward
        cy1_shapes, cy1_normals = shape_dataloader.sample()
        cy1_shapes  = cy1_shapes.to(device_main)
        cy1_normals = cy1_normals.to(device_main)
        ( total_loss, loss_dict, renders, 
                fdr_q #(R_hat, t_hat, delta_hat, xi_T_hat, pose_probs) 
            ) = compute_FDR_quantities(cy1_shapes, cy1_normals)
        total_loss = w_fdr_cy1 * total_loss
        total_loss.backward()
        # Return the results of both cycles
        return ( (cy1_L, cy1_L_dict, cy1_renders, cy1_quantities_oth),
                 (total_loss, loss_dict, renders, fdr_q) )

    def get_combined_LTDR_and_fullCy1_quantities(w_full_cy1, w_ltdr_cy1, cy1_normals):
        """
        Run (a) the full Cycle 1 and (b) the learned texture (LT) + random pose DR cycle.
        Returns (cy1_full_outs, LTDR_outs)
        """
        # Run the full cycle 1 with immediate backward (avoid graph storage)
        cy1_L, cy1_L_dict, cy1_renders, _ = compute_cycle_1_quantities(cy1_shapes, cy1_normals)
        cy1_L = w_full_cy1 * cy1_L
        cy1_L.backward()
        # Run the LTDR (learned texture domain randomized) cycle, with backward
        cy1_shapes, cy1_normals = shape_dataloader.sample()
        cy1_shapes = cy1_shapes.to(device_main)
        cy1_normals = cy1_normals.to(device_main)
        ( total_loss, loss_dict, renders, 
                ltdr_q #(R_hat, t_hat, delta_hat, xi_T_hat, pose_probs) 
            ) = compute_LT_DR_quantities(cy1_shapes, cy1_normals)
        total_loss = w_ltdr_cy1 * total_loss
        total_loss.backward()
        # Return the results of both cycles
        return ( (cy1_L, cy1_L_dict, cy1_renders),
                 (total_loss, loss_dict, renders, ltdr_q) )

    ###< Finished function definitions >###
    #-------------------------------------#

    # If required, preload the mesh AE model
    if not (mesh_ae_load_path is None):
        logging.info('Loading mesh AE ' + mesh_ae_load_path)
        model.load_mesh_ae(mesh_ae_load_path)

    # >>> Training iterations loop <<< #
    ### Setup pretraining stages ###
    pretraining             = True if cham_only_iters + stage_2_pretrain_iters > 0 else False
    pt_stage                = 1 if cham_only_iters > 0 else 2
    frozen_mesh_ae          = False
    frozen_iters_so_far     = 0
    cy2_loss_only_max_iters = options['cy2_loss_only_iters'] 
    cy2_loss_only           = cy2_loss_only_max_iters > 0 # Track current state/mode
    cy2_loss_only_iters     = 0 # Track number of cy2-only iters
    # Annealing of domain randomized pretraining probabilities
    START_PROB_LT = options['pts2_lt_start_prob'] # prob of using learned texturer at start of DR training
    END_PROB_LT   = options['pts2_lt_final_prob'] # prob of using learned texturer at end of DR training

#    def current_prob_and_prog(c_iter):
#""" Compute probability of choosing a batch from cycle 1 learned and DR (when annealing in the former) """
        # prob -> interpolates between start and end (depending on chosen end-points)
        # prob is the relative weight on the "learned texture" pretrained case
#        prog = (c_iter - cham_only_iters + 1) / stage_2_pretrain_iters
#        return np.clip( # prog = 0 --> start, prog = 1 --> end
#            prog * END_PROB_LT + (1.0 - prog) * START_PROB_LT,
#            a_min=0.001, a_max=0.999 ), prog

    ### Setup main training stages ###
    main_training_mode         = 1
    mode_1_training_iterations = 0
    mode_2_training_iterations = 0
    mode_3_training_iterations = 0

    # Annealing of Mode II balance (full cy1 vs FDR cy1) 
    def current_cy1_balancer(m2_iters_so_far):
        """ Compute convex combination loss weight balance between full and FDR cy1 """
        progress    = (m2_iters_so_far / options['mode_2_iters'])
        start       = 0.01 # Relative weight on full cycle 1 [start]
        end         = 0.99 # As above, at end
        full_weight = start * (1.0 - progress) + progress * end
        oth_weight  = 1.0 - full_weight
        return full_weight, oth_weight

    # Annealing of cy2 loss weights, starting in Mode I
    def cy2_total_loss_weight_and_prog(i):
        """ Compute the loss weight on the cy2 loss, when annealing it in """
        if options['cy2_annealing_period'] == 0: 
            return 1.0, 1.0 # No annealing
        progg   = (i - cham_only_iters - stage_2_pretrain_iters) / options['cy2_annealing_period']
        init_w  = options['initial_cy2_weight']
        final_w = 1.0
        return np.clip( # prog = 0 --> start, prog = 1 --> end
            progg * final_w + (1.0 - progg) * init_w,
            a_min=0.001, a_max=0.999 ), progg

    # The generator optimizer depends on the initial training stage
    if pretraining:
        if pt_stage == 1: # cham_only pretraining
            logging.info('Resetting optimizer [pt1] (lr = %f, gamma = %f, milestones = %s)' 
                % (options['gen_lr_pt1'],options['gen_lr_pt1_gamma'],str(options['gen_lr_pt1_ms'])) )
            gen_optimizer = opter(model.parameters(), lr=options['gen_lr_pt1'], wd=gen_wd)
            pt_1_scheduler = torch.optim.lr_scheduler.MultiStepLR(
                            gen_optimizer, options['gen_lr_pt1_ms'], 
                            gamma = options['gen_lr_pt1_gamma'])
        elif pt_stage == 2: # DR-based pretraining
            logging.info('Resetting std optimizer [pt2] (lr = %f)' % options['gen_lr_main'])
            gen_optimizer = opter(model.parameters(), lr=options['gen_lr_main'], wd=gen_wd)
        else:
            ValueError('Unexpected pt_stage ' + str(pt_stage))
    else: # Not in pretraining
        logging.info('Resetting std optimizer (lr = %f)' % options['gen_lr_main'])
        gen_optimizer = opter(model.parameters(), lr=options['gen_lr_main'], wd=gen_wd)

    # Create the fixed image batch and write it out before starting training
    os.makedirs(outdir)
    fixed_batch_gt_name = 'fixed_batch-imgs.png'
    fixed_img_batch = img_dataloader.dataset.get_fixed_batch() 
    imgutils.imswrite_t(fixed_img_batch, os.path.join(outdir, fixed_batch_gt_name))

    # Fixed texture values for cy1 fixed output saving
    fixed_cy1_batch_size = options['B_shapes']
    #fixed_cy1_num_cols   = 10
    fixed_cy1_xi_T_vecs  = model.sample_xi_T(fixed_cy1_batch_size).to(device_main)

    outdir_pared = os.path.basename( os.path.normpath( outdir_core ) ) # progress
    path_to_latest_folder = os.path.join(outdir_core, 'latest-' + outdir_pared)
    os.makedirs(path_to_latest_folder)
    shutil.copy(os.path.join(outdir, fixed_batch_gt_name), path_to_latest_folder)

    #########################
    ##### TRAINING LOOP #####
    #########################

    # Timer
    stopwatch = AccumTimer()

    # Start training iterations
    for gen_iter in range(n_gen_iters): #########################################################
        
        time_string = '-m%d' % main_training_mode

        stopwatch.start('gen_iter' + time_string)

        #----------------------------------------------------------------#
        ###### < Handle Pretraining > #####
        if pretraining:

            stopwatch.start('pts_iter')
            stopwatch.start('pts_load')

            #cy1_graphs, cy1_shapes = shape_dataloader.sample()
            cy1_shapes, cy1_normals = shape_dataloader.sample()
            cy1_shapes  = cy1_shapes.to(device_main)
            cy1_normals = cy1_normals.to(device_main)

            stopwatch.end('pts_load')

            # Compute loss depending on PT stage
            if pt_stage == 1:
                stopwatch.start('pts1_iter')
                ### Stage 1 pretraining: Just train the mesh AE ###
                # Perturbed template, intermeds, delta -> all pre-euc transform
                cy1_V_new_pt, cy1_V_new_ints_pt, delta_pt, v, mu_v, logvar_v = model.pretrain_iteration(cy1_shapes, cy1_normals)
                stopwatch.end('pts1_iter')

                stopwatch.start('pts1_loss')
                recon_loss, normals_loss, reg_loss, vreg = Cy1_loss.pretraining_loss(cy1_shapes, cy1_normals, 
                                                               cy1_V_new_pt, cy1_V_new_ints_pt, delta_pt, 
                                                               v, mu_v, logvar_v)
                total_loss = recon_loss + normals_loss + reg_loss + vreg
                stopwatch.end('pts1_loss')

                if gen_iter % print_every == 0:
                    rfpn = lambda ss: rfp(ss, s=False)
                    _LD = { 'total' : rfpn(total_loss), 'recon' : rfpn(recon_loss), 'shape_reg' : rfpn(reg_loss),
                            'normals' : rfpn(normals_loss), 'v_reg' : rfpn(vreg) }
                    logging.info('(PTS1-%d) Total: %s\n\tRecon: %s, MeshReg: %s, Normals: %s, vReg: %s [CurrLR: %f]' 
                            % ( gen_iter, rfp(total_loss), rfp(recon_loss), rfp(reg_loss), rfp(normals_loss), rfp(vreg), 
                                    pt_1_scheduler.get_lr()[0] ) )
                    utils.write_all_to_tensorboard( board_writer, _LD, gen_iter, prepend_string='PT1-')   
                    logging.info('Average Timings: ' + stopwatch.csv_means_string())
                    stopwatch.reset()

                # Update optimizer
                stopwatch.start('pts1_step')
                gen_optimizer.zero_grad()
                total_loss.backward()
                gen_optimizer.step()
                pt_1_scheduler.step()
                stopwatch.end('pts1_step')

            #---> Domain randomized pretraining <---#
            elif pt_stage == 2:
                ### Stage 2 pretraining: train fully domain randomized  
                # This is pretraining the inference network, with pseudo-3D supervision
                stopwatch.start('pts2_step')
                (drf_total_loss, drf_loss_dict, drf_renders, drf_oth) = run_domain_randomized_update(
                                                                          cy1_shapes, cy1_normals, gen_optimizer, 
                                                                          learned_tex=False, ret_for_ren=True)
                stopwatch.end('pts2_step')
                # Write to the log(s)
                if gen_iter % print_every == 0:
                    # Update classic log
                    logging.info("\n>>> On generator iteration %d [Pretraining Stage 2: DRFR %d]" 
                                        % (gen_iter, DR_full_rand_iters))
                    logging.info('Fully Domain Randomized Total Loss = %s', rfp(drf_total_loss))
                    utils.ppd('Fully Domain Randomized Cycle 1 losses (Sdr->I)\n', drf_loss_dict)
                    # Update tensorboard log
                    board_writer.add_scalar(tag='PT2-DRFR-', scalar_value=rfp(drf_total_loss,s=False), global_step=gen_iter)
                    utils.write_all_to_tensorboard(board_writer, drf_loss_dict, gen_iter, prepend_string='PT2-DRFR-') 
                    logging.info('Average Timings: ' + stopwatch.csv_means_string())
                    stopwatch.reset()

                # Save images
                if gen_iter % save_imgs_every == 0:
                    with torch.no_grad():
                        _R, _t, _v, _delta, _xi_T, _pose_prob_weights = drf_oth
                        _recon_renders = model.transformations_to_image(_R, _t, _v, _delta, 
                                                    xi_T = _xi_T, renderer = renderer)
                        logging.info('%d: %s %s', gen_iter, '(PTS2) Saving to', outdir)
                        imgutils.imswrite_t(drf_renders, 
                            os.path.join(outdir, '%d-pts2-fullrand-renders.png' % gen_iter))
                        pose_prob_strings = [ ( 'p=%.3f' % s ) 
                                              for s in _pose_prob_weights.reshape(B*NH).cpu().detach().numpy().tolist() ]
                        imgutils.imswrite_t(_recon_renders, 
                            os.path.join(outdir, '%d-pts2-fullrand-all-dr1-recons.png' % gen_iter), 
                            ncols=NH*4, corner_strings=pose_prob_strings)
                        if NH > 1:
                            NC = _recon_renders.shape[1]
                            most_likely = _recon_renders.view(B, NH, NC, _recon_renders.shape[-2], _recon_renders.shape[-1]) # NC = 3 or 4
                            inds = _pose_prob_weights.detach().argmax(dim=1)
                            mls = most_likely.shape
                            most_likely = most_likely.gather(dim=1, 
                                                index=inds.view(B,1,1,1,1).expand(-1,-1,mls[2],mls[3],mls[4])).squeeze(1)
                            imgutils.imswrite_t(most_likely, 
                                os.path.join(outdir, '%d-pts2-fullrand-ML-dr1-recons.png'  % gen_iter) )
            else:
                raise ValueError('Unexpected PT stage')

            ### Check for Pretraining Stage Completions ###
            # Check if pretraining stage or pretraining itself is finished
            if gen_iter == cham_only_iters: 
                logging.info('Completed chamfer-only pretraining')
                # Save a pretrained model checkpoint
                save_mesh_ae_pretrained = options['save_pretrained_mesh_ae']
                if save_mesh_ae_pretrained and (cham_only_iters > 0):
                    pretrained_ae_model_name = os.path.join(outdir, 
                            "model-%s-mesh_ae-pretrained-NI_%s.state_dict.pt" % 
                            (options_choice, gen_iter))
                    logging.info('Saving trained mesh AE ' + pretrained_ae_model_name)
                    torch.save(model.state_dict(), pretrained_ae_model_name)
                # Change pretraining stage
                pt_stage = 2 # Domain randomized pretraining stage 2
                # Freeze weights (potentially temporarily) if desired
                if num_frozen_mesh_ae_iters > 0:
                    frozen_mesh_ae = True
                    logging.info('Freezing mesh AE (for %d total iters)' % num_frozen_mesh_ae_iters)
                    model.freeze_mesh_ae()
                    Cy1_loss.set_mae_as_frozen()
                    logging.info('Resetting optimizer [frozen] (lr = %f)' % options['gen_lr_main'])
                    gen_optimizer = opter( filter(lambda p: p.requires_grad, model.parameters()), 
                                           options['gen_lr_main'], gen_wd)
                else:
                    # Reset the optimizer before the next pretraining stage
                    logging.info('Resetting std optimizer (lr = %f)' % options['gen_lr_main'])
                    gen_optimizer = opter(model.parameters(), options['gen_lr_main'], gen_wd)
            #---------#
            if gen_iter >= cham_only_iters + stage_2_pretrain_iters:
                logging.info('Completed domain randomized Cy1 pretraining')
                pt_stage = None
                pretraining = False
                save_dr_pretrained = options['save_dr_pretrained_model']
                # Save a pretrained model checkpoint
                if save_dr_pretrained and stage_2_pretrain_iters > 0:
                    drpretrained_ae_model_name = os.path.join(outdir, 
                            "model-%s-DR-pretrained-NI_%s.state_dict.pt" % 
                            (options_choice, gen_iter))
                    logging.info('Saving trained mesh AE ' + drpretrained_ae_model_name)
                    torch.save(model.state_dict(), drpretrained_ae_model_name)
                # If entering cy2-only mode
                if cy2_loss_only:
                    logging.info('Entering cy2-loss-only model mode (%d total iters)' % 
                                    cy2_loss_only_max_iters )
                else:
                    logging.info('Entering main training mode ' + str(main_training_mode))

            stopwatch.end('pts_iter')
            continue # Skip to the next generator iteration
        ##### </ End Pretraining /> #####
        #---------------------------------------------------------------------------#

        ################################
        #>>>>> PROGRESSION CHECKS <<<<<#
        ################################

        ## Handle the frozen mesh AE mode
        if frozen_mesh_ae:
            # Check if we should unfreeze the mesh AE
            if frozen_iters_so_far > num_frozen_mesh_ae_iters:
                frozen_mesh_ae = False
                logging.info('Unfreezing mesh AE')
                model.unfreeze_mesh_ae()
                Cy1_loss.set_mae_as_unfrozen()
                logging.info('Resetting std optimizer (lr = %f)' % options['gen_lr_main'])
                gen_optimizer = opter(model.parameters(), lr=options['gen_lr_main'], wd=gen_wd)
            else: # Otherwise, increment the counter
                frozen_iters_so_far += 1

        ## Handle the Cy2 loss only case
        if cy2_loss_only:
            assert False
            if cy2_loss_only_iters > cy2_loss_only_max_iters:
                cy2_loss_only = False
                logging.info('Leaving Cy2-losses-only mode')
            else:
                cy2_loss_only_iters += 1

        ## Change main phase training modes if necessary
        if main_training_mode == 1: # FDR Cy1 + anneal in cy2
            # Switch modes if we are done
            if mode_1_training_iterations >= options['mode_1_iters']:
                main_training_mode = 2
                # Activate pose adv loss
                Cy1_loss.allow_adversarial_decoupled_pose_loss()
                logging.info('Changing training mode to ' + str(main_training_mode))            
            # Else, increment the number of iterations
            else:
                mode_1_training_iterations += 1
        if main_training_mode == 2: # Anneal in true cy1 + anneal out FDR-cy1; cy2 is full
            # Switch modes if we are done
            if mode_2_training_iterations >= options['mode_2_iters']:
                main_training_mode = 3
                logging.info('Changing training mode to ' + str(main_training_mode))            
            # Else, increment the number of iterations
            else:
                mode_2_training_iterations += 1
        if main_training_mode == 3: # FULL cy1+cy2
            mode_3_training_iterations += 1

        ##############################
        #>>>>> GENERATOR UPDATE <<<<<#
        ##############################

        # Gradient reset
        gen_optimizer.zero_grad()

        ##### Cycle 1 #####
        ### Shapes -> renders -> 3D reconstructions ###
        stopwatch.start('cycle1_comp' + time_string)
        cy1_shapes, cy1_normals = shape_dataloader.sample()
        cy1_shapes              = cy1_shapes.to(device_main)
        cy1_normals             = cy1_normals.to(device_main)
        if not cy2_loss_only:
            ### MODE I: Fully DR Cy1 + Anneal in Cy2 ###
            if main_training_mode == 1:
                #cy1_shapes, cy1_normals = shape_dataloader.sample()
                #cy1_shapes  = cy1_shapes.to(device_main)
                #cy1_normals = cy1_normals.to(device_main)
                ( cy1_L, cy1_L_dict, fdr_renders, 
                        fdr_cy_1_extras #(R_hat, t_hat, delta_hat, xi_T_hat, pose_probs) 
                    ) = compute_FDR_quantities(cy1_shapes, cy1_normals)
            ### MODE II: Anneal in full cycle 1 + Anneal out Full DR Cy1 ###
            # When mode II begins, we start applying the adversarial pose loss
            # We also start applying the image critic
            elif main_training_mode == 2:
                _full_weight, _fdr_weight = current_cy1_balancer(mode_2_training_iterations)
                (   (cy1_L, cy1_L_dict, cy1_renders, cy1_quantities_oth),
                    (fdr_L, fdr_L_dict, fdr_renders, fdr_q) 
                  ) = get_combined_FDR_and_fullCy1_quantities(_full_weight, _fdr_weight, cy1_shapes, cy1_normals)
                ( fdr_R_hat, fdr_t_hat, fdr_v_hat, fdr_delta_hat, fdr_xi_T_hat, fdr_pose_probs,
                  _ ) = fdr_q
            ### MODE III: Regular Cycle 1 ###
            elif main_training_mode == 3:
                # If using Cycle 1 CPTC, we actually *duplicate* half the batch
                if options['use_CY1_cptc']:
                    B_over_2    = B // 2
                    cy1_shapes  = cy1_shapes[0 : B_over_2] 
                    cy1_normals = cy1_normals[0 : B_over_2]
                    cy1_shapes  = torch.cat( (cy1_shapes,  cy1_shapes),  dim = 0) 
                    cy1_normals = torch.cat( (cy1_normals, cy1_normals), dim = 0) 

                cy1_L, cy1_L_dict, cy1_renders, cy1_quantities_oth = compute_cycle_1_quantities(
                                                                        cy1_shapes, 
                                                                        cy1_normals,
                                                                        duplicated_half = options['use_CY1_cptc'],
                                                                        mixed_reren_loss = options['use_mixed_reren_loss'],
                                                                     )
            else:
                raise ValueError('Unknown main training mode ' + str(main_training_mode))
            stopwatch.end('cycle1_comp' + time_string)


        ##### Cycle 2 #####
        # Images -> Inferred shapes -> re-renders
        stopwatch.start('cy2_data' + time_string)
        if USE_GT: cy2_images, gt_R, gt_t = img_dataloader.sample()
        else:      cy2_images             = img_dataloader.sample() 
        cy2_images = cy2_images.to(device_main)
        stopwatch.end('cy2_data' + time_string)

        stopwatch.start('cy2_run' + time_string)
        (  cy2_M,           # Inferred mesh shape 
           cy2_M_pe,        # Inferred mesh shape before Euclidean transform
           cy2_M_ints,      # Inferred mesh deformation intermediates
           cy2_xi_p,        # Inferred latent pose
           cy2_xi_T,        # Inferred latent texture
           cy2_v,           # Inferred latent deformation
           cy2_R,           # Inferred rotation
           cy2_r,           # Inferred rotation (6D)
           cy2_t,           # Inferred translation
           cy2_delta,       # Inferred perturbation
           cy2_T,           # Reconstructed texture (from inferred latent)
           cy2_renders,     # Reconstructed image render
           cy2_pose_probs,  # Pose hypothesis probabilities
           mu_xi_T_hat,     # xi_T VAE mean param (if used)
           logvar_xi_T_hat, # xi_T VAE log-var param (if used)
           cy2_texture_img_hat # Inferred UV texture image
          ) = model.run_cycle_2(cy2_images, renderer, run_secondary_inference=False)
        stopwatch.end('cy2_run' + time_string)

        ### Cross-pose texture consistency ###
        ran_cptc = False
        if main_training_mode >= 1 and options['enforce_cross_pose_tex_consis']: 
            stopwatch.start('cy2_cp_tex_consis' + time_string)
            _c2rr_inds = cy2_pose_probs.detach().argmax(dim=1)
            cy2_T_best = cy2_T.gather( # Best textures: C_best (initially B x nH x |V| x 3)
                             dim   = 1,
                             index = _c2rr_inds.view(B,1,1,1).expand(-1, -1, model.nV, 3) 
                            ).squeeze(1)
            #( cptc_v_hat, cptc_xi_T_hat, cptc_I_new, cptc_decoded_texture_image, 
            #  cptc_decoded_texture ) 
            cptc_data = model.infer_on_random_new_view(
                                            canonical_shape        = cy2_M_pe, 
                                            texture                = cy2_T_best, 
                                            renderer               = renderer,
                                            domain_randomized_pose = (main_training_mode <= 2),
                                            zero_xy_translation    = False,
                                            return_decoded_texture = True, 
                                            detach_canonical_shape = True,
                                        )             
            cptc_I_reren = cptc_data[2]
            ran_cptc = True
            stopwatch.end('cy2_cp_tex_consis' + time_string)

        ### Rerender the cycle 2 outputs from a new pose ###
        if main_training_mode >= 3 and run_rerenders:
            assert False
            _c2rr_inds = cy2_pose_probs.detach().argmax(dim=1)
            cy2_T_best = cy2_T.gather( # Best textures: C_best (initially B x nH x |V| x 3)
                             dim   = 1,
                             index = _c2rr_inds.view(B,1,1,1).expand(-1, -1, model.nV, 3)
                             ).squeeze(1)
            cy2_rerenders = model.rerender_from_random_view(cy2_M_pe, cy2_T_best, 
                                        renderer               = renderer,
                                        domain_randomized_pose = (main_training_mode < 3), # inactive
                                        zero_xy_translation    = False, 
                                        detach_pose            = True )
        else:
            cy2_rerenders = None

        # ACPTC generator loss calculations (for cy2 generator loss)
        if options['use_adversarial_cptc']:
            _c2acptc_inds = cy2_pose_probs.detach().argmax(dim=1)
            # Best rotations
            acptc_best_hyp_Rmats = cy2_R.gather( # B x nH x 3 x 3 -> B x 3 x 3
                             dim = 1,
                             index = _c2acptc_inds.view(B,1,1,1).expand(-1, -1, 3, 3)
                            ).squeeze(1)
            B, nH, _nIC, H, W = cy2_texture_img_hat.shape 
            acptc_best_inferred_texture_images = cy2_texture_img_hat.gather(
                                             dim = 1, # B x nH x 3 x H x W
                                             index = _c2acptc_inds.view(B,1,1,1,1).expand(-1, -1, 3, H, W)
                                            ).squeeze(1)
            # Compute negative prediction error -> min this to get max prediction error
            # Do NOT detach the tex images: we need to backprop through them
            cptc_adv_genloss = -1.0 * cptc_critic.loss(
                                            TI     = acptc_best_inferred_texture_images, 
                                            R_true = acptc_best_hyp_Rmats.detach() ).mean()
        else:
            cptc_adv_genloss = None

        ### Compute cycle 2 loss ###
        cy2_M_pe_dgl = None # model.construct_dgl_graphs_from_pcs(cy2_M_pe)
        stopwatch.start('cy2_loss' + time_string)
        cy2_L, cy2_L_dict = Cy2_loss(cy2_images, cy2_renders, cy2_M_pe,
                                     cy2_xi_p, cy2_xi_T, 
                                     shape_critic, xi_p_critic, xi_T_critic, 
                                     #shape_critic, pose_critic, xi_T_critic, 
                                     img_critic, # <- Used for perceptual loss
                                     cy2_v, cy2_delta, cy2_M_pe_dgl, cy2_M_ints, renderer,
                                     cy2_pose_probs, cy2_R, cy2_t, model,
                                     inferred_texture_images = cy2_texture_img_hat,
                                     inferred_texture = cy2_T,
                                     texture_critic   = texture_critic,
                                     rerendered_imgs  = cy2_rerenders,
                                     mu_xi_T          = mu_xi_T_hat, 
                                     logvar_xi_T      = logvar_xi_T_hat,
                                     cptc_data        = cptc_data if ran_cptc else None,
                                     adv_cptc_loss    = cptc_adv_genloss,
                                     ) #,
        # Compute current annealed weight on cy2 loss
        cy2_annealed_w, cy2_anneal_weight_prog = cy2_total_loss_weight_and_prog(gen_iter)
        # Compute corrected loss value
        cy2_L = cy2_L * cy2_annealed_w
        stopwatch.end('cy2_loss' + time_string)

        ##### Loss consolidation #####
        #> Compute total loss <#
        if cy2_loss_only:
            total_gen_loss = cy2_L
        else:
            total_gen_loss = cy1_L + cy2_L # Printed even in mode II

        #> Compute and store gradients <#
        # In mode II, we run backward after each forward pass for efficiency in cy1
        stopwatch.start('step' + time_string)
        if main_training_mode == 1 or main_training_mode == 3:
            total_gen_loss.backward()
        elif main_training_mode == 2:
            cy2_L.backward()

        ##### Gradient Step #####
        # Assumes all the backwards have been run by now
        gen_optimizer.step()
        stopwatch.end('step' + time_string)

        ############################
        #>>>>> CRITIC UPDATES <<<<<#
        ############################

        ##### Cycle 1 critic update (on images) #####
        if not cy2_loss_only and main_training_mode >= 2:
            #ic_loss = run_img_critic_turn(gen_iter, include_fdr_batch = False)
            stopwatch.start('img_critic' + time_string)
            ic_loss = run_img_critic_turn(
                                gen_iter = gen_iter, 
                                include_fdr_batch = False, 
                                # Cycle 1 intermediates 
                                fake_gen_imgs = cy1_renders, 
                                # True image samples [cy2 inputs]
                                real_imgs = cy2_images, 
                                # Reconstructions of true image samples [cy2 recons]
                                rerendered_real_images = cy2_renders,
                                # Cy2 pose probabilities
                                P = cy2_pose_probs )
            stopwatch.end('img_critic' + time_string)

        ##### Cycle 2 critics (shape, xi_p, xi_T, texture) #####
        # Note: the xi_p critic is in an inverted position to before.
        # Rather than force xi_p_tilde to be Gaussian, we enforce the sampled poses
        #    to look like the (best) hypotheses from Cycle 2.
        # This is debatably a Cy2 critic, but since it is affecting the networks used to
        #    map xi_p to r,t, I suppose it is related to both cycles
        for cy2_critics_update in range(cy2_critics_update_per_gen_iter):

            stopwatch.start('sec_critics' + time_string)

            if gen_iter < thresh: break

            ## Sample true mesh data ##
            #c2_shapes, c2_normals = shape_dataloader.sample()
            #c2_shapes             = c2_shapes.to(device_main) 
            #c2_normals            = c2_normals.to(device_main) 
            c2_shapes  = cy1_shapes
            c2_normals = cy1_normals

            ## "Real" shapes for critic (via MAE) ##              
            #_dtf_v, _dtf_delta, _dtf_M_euc, _, _ = model.shape_to_deformed_template(c2_shapes, c2_normals) 
            if   main_training_mode >= 2:
                ( cy1_v_hat, cy1_R_hat, cy1_t_hat, cy1_C_dec_tex, cy1_delta_hat, 
                  pose_probs, cy1_M_pre_euc, cy1_r_hat, cy1_texture, cy1_sampled_texture_image, cy1_recon_texture_image
                  ) = cy1_quantities_oth
                _dtf_v     = cy1_v_hat
                _dtf_delta = cy1_delta_hat
                _dtf_M_euc = cy1_M_pre_euc
                _cy1_pose_probs = pose_probs
            elif main_training_mode == 1:
                ( fdr_R_hat, fdr_t_hat, fdr_v_hat, fdr_delta_hat, fdr_xi_T_hat, fdr_pose_probs, 
                  fdr_V_new_pe ) = fdr_cy_1_extras
                _dtf_v     = fdr_v_hat
                _dtf_delta = fdr_delta_hat
                _dtf_M_euc = fdr_V_new_pe
                #cy1_t_hat  = fdr_t_hat
                #cy1_r_hat  = fdr_ren_name

            # Generate meshes
            with torch.no_grad():

                ## Map the cy2 real images to shapes ##
                # Note: 3 scenarios 
                # (1) real N(0,I) samples [Used in Cy2],
                # (2) fake xi_ps from I -> S=(M,xi_p,xi_T) [Used in Cy2]
                # (3) xi_p samples used for S -> I [Used in Cy1; may be e.g. a truncated normal]
                
                ## Replace this with main model cy2 outputs
                #( M_hat, xi_p_hat, xi_T_hat, v_hat, R_hat, t_hat, delta_hat, 
                #  M_hat_preeuc, M_hat_ints, pose_probs_crit, r_hat, inferred_texture, _, _ 
                #  ) = model.image_to_shape(c2_images)
                r_hat                   = cy2_r
                t_hat                   = cy2_t
                v_hat                   = cy2_v 
                R_hat                   = cy2_R 
                cy2_M_pe_hat            = cy2_M_pe
                inferred_texture        = cy2_T
                pose_probs_crit         = cy2_pose_probs
                xi_T_hat                = cy2_xi_T
                inferred_texture_images = cy2_texture_img_hat
                
                ## Real and fake pose and texture samples ##
                if main_training_mode >= 1:
                    # Note this is a little different compared to most GANs or CycleGANs, since the
                    # GT distribution is learned along with the one being taught to match it.
                    #xi_p_real = torch.randn(B, dim_xi_p).to(device_main)
                    # "Real poses" -> the inferred Cy2 poses, but distributionally weighted by predicted confidence
                    hyp_poses = torch.cat( (r_hat, t_hat.squeeze(-1)), dim=-1) # B x nH x (6+3) 
                    # GT Gaussian xi_p samples
                    true_cy1_xi_ps = model.sample_xi_p(B*NH)

                    # pose_probs_crit: B x nH --> define a categorical distribution with it
                    #true_pose_weighted_dist = torch.distributions.Categorical(probs=pose_probs_crit)
                    # Sample hypothesized poses, via weighted indexing into the hypotheses -> shape: B (1D), in [0, nH)
                    #true_pose_sample_indices = true_pose_weighted_dist.sample( (1,) ).T.squeeze(-1) 
                    # Obtain the sampled hypothesized poses, weighted by predicted quality (B x (6+3))
                    #true_poses = hyp_poses.gather(index=true_pose_sample_indices.view(B,1,1).expand(-1,-1,9), 
                    #                              dim=1).squeeze(1)
                    # _binds = pose_probs_crit.detach().argmax(dim=1) # B
                    # _xip ra = xi_p_realn[:, r_coord_inds] # All reals: B x 6
                    # _xip fa = xi_p_inferredn[:, r_coord_inds] # All fakes: B*NH x 6
                    # _xip fb = xi_p_hat.gather(dim=1, # Best hyps: B x NH x dim(xi_p) -> B x dim(xi_p) -> B x 6
                    #             index=_binds.view(B,1,1).expand(-1,-1,dim_xi_p)
                    #          )
                    inds = pose_probs_crit.detach().argmax(dim=1)
                    # Best hyps: B x NH x dim(xi_p) -> B x dim(xi_p) -> B x [dimr+3]
                    true_poses = hyp_poses.gather(
                                    dim = 1, 
                                    index = inds.view(B,1,1).expand(-1, -1, model.rot_dim+3)
                                 ).squeeze(1)
                    # Best rotations
                    best_hyp_Rmats = R_hat.gather( # B x nH x 3 x 3 -> B x 3 x 3
                                         dim = 1,
                                         index = inds.view(B,1,1,1).expand(-1, -1, 3, 3)
                                        ).squeeze(1)
                    # Best textures C_best (initially B x nH x |V| x 3)
                    best_inferred_texture = inferred_texture.gather(
                                                 dim = 1,
                                                 index = inds.view(B,1,1,1).expand(-1, -1, model.nV, 3)
                                                ).squeeze(1)
                    # Best inferred texture (UV) images
                    B, nH, _nIC, H, W = inferred_texture_images.shape 
                    best_inferred_texture_images = inferred_texture_images.gather(
                                                     dim = 1, # B x nH x 3 x H x W
                                                     index = inds.view(B,1,1,1,1).expand(-1, -1, 3, H, W)
                                                    ).squeeze(1)
                    # "Fake poses" -> the random Cy1 samples
                    #_, fake_t, fake_r = model.pose_decode(true_cy1_xi_ps, ret_intermed_pose=True)
                    if main_training_mode >= 2:
                        fake_t, fake_r = cy1_t_hat, cy1_r_hat
                        fake_poses = torch.cat( (fake_r, fake_t.squeeze(-1)), dim=-1).squeeze(1)
                    #</ End of real/fake pose samples />#

                ## Random texture sample (truth/real) [cy1] ##
                xi_T_real = model.sample_xi_T(B).to(_dtf_v.device) # Real Gaussian xi_T 
                #random_cy1_textures = model.decode_texture(xi_T_real, _dtf_v) # Fake generated texture

            ### Train the shape critic ###
            # Fakes = inferred shapes (v, [M])
            # Reals = shapes from cycle 1
            if cy2_critics_update < options['shape_critic_updates']:
                stopwatch.start('shape_critic')
                sc_loss   = shape_critic(for_gen = False, 
                                         #fakes   = (v_hat.detach(),  M_hat_preeuc.detach()), # cy2 output
                                         fakes   = (v_hat.detach(),  cy2_M_pe_hat.detach()), # cy2 output
                                         reals   = (_dtf_v.detach(), _dtf_M_euc.detach())  # cy1 v
                                        ).mean() 
                shape_critic_optimizer.zero_grad()
                sc_loss.backward()
                shape_critic_optimizer.step()
                stopwatch.end('shape_critic')

            #xi_p_loss = xi_p_critic(for_gen  = False,
            #                        v_fake   = xi_p_hat.view(B*NH, -1),
            #                        v_real   = true_cy1_xi_ps ).mean()
            #xi_p_critic_optimizer.zero_grad()
            #xi_p_loss.backward()
            #xi_p_critic_optimizer.step()

            ### Train texture critic ###
            # Fakes = Cy1 random texture samples [xi_T -> C]
            # Reals = Best inferred textures from images [I -> xi_T_hat -> C_hat]
            if main_training_mode >= 2:
                stopwatch.start('texture_critic')
                random_cy1_textures = cy1_texture # cy1_C_dec_tex <- this is the reconstructed texture
                # texture_critic_loss = texture_critic(for_gen = False, 
                #                                      v_fake  = random_cy1_textures.view(B, model.nV, 3).detach(), 
                #                                      v_real  = best_inferred_texture.view(B, model.nV, 3).detach() ).mean()
                texture_critic_loss = texture_critic(
                                            for_gen = False,
                                            v_fake  = ( random_cy1_textures.view(B, model.nV, 3).detach(),
                                                        cy1_sampled_texture_image.detach(),
                                                        #cy1_M_pre_euc.detach()
                                                       ),
                                            v_real  = ( best_inferred_texture.view(B, model.nV, 3).detach(),
                                                        best_inferred_texture_images.detach(),
                                                        #cy2_M_pe_hat.detach()
                                                       ) 
                                             )
                texture_critic_optimizer.zero_grad()
                texture_critic_loss.backward()
                texture_critic_optimizer.step()
                stopwatch.end('texture_critic')
            else:
                texture_critic_loss = None

            ### Train latent xi_T critic, if needed ###
            # Fakes = Cy2 inferred xi_Ts
            # Reals = Cy1 Gaussian samples
            if n_xi_T_critic_params > 0:
                # Train the latent xi_T critic
                stopwatch.start('xi_T_critic')
                rf_xi_T_real = model.sample_xi_T(B * NH).to(_dtf_v.device)
                xi_T_loss    = xi_T_critic(for_gen = False,  
                                           v_fake  = xi_T_hat.reshape(B*NH, rf_xi_T_real.shape[-1]).detach(), # cy2
                                           v_real  = rf_xi_T_real.detach() # Gaussian
                                        ).mean()
                xi_T_critic_optimizer.zero_grad()
                xi_T_loss.backward()
                xi_T_critic_optimizer.step()
                stopwatch.end('xi_T_critic')
            else:
                # No training necessary
                xi_T_loss = None # E.g. SWD "critic" has no training

            ### Train or update the pose critic ###
            # Pose critic only optimized in Mode >= II
            # Fakes = Cy1 generated poses
            # Trues = Cy2 *best* inferred poses
            if main_training_mode >= 2:
                stopwatch.start('pose_critic')
                #> Train the pose critic [type 1: learned vector critic]
                if n_pose_critic_params > 0:
                    pose_loss = pose_critic(for_gen = False,  
                                            v_fake  = fake_poses.detach(),
                                            v_real  = true_poses.detach() )
                    # If the pose_critic is parameterless, the "update" will occur 
                    #   inside the forward method
                    pose_loss = pose_loss.mean()
                    pose_critic_optimizer.zero_grad()
                    pose_loss.backward()
                    pose_critic_optimizer.step()
                #> Update the buffered critic [type 2: keep a buffer of observed poses from cy2]
                else:
                    pose_critic(for_gen = False, 
                                R_real  = best_hyp_Rmats.detach(), # Best rotations [cy2]
                                t_real  = true_poses[:, model.rot_dim : ].detach() ) # Best translations [cy2]
                    pose_loss = None
                stopwatch.end('pose_critic')

        stopwatch.end('sec_critics' + time_string)

        # CPTC critic update
        if options['use_adversarial_cptc']:
            stopwatch.start('cptc_critic_update')
            cptc_adversary_optimizer.zero_grad()
            cptc_adv_loss = cptc_critic.loss(TI     = best_inferred_texture_images.detach(), 
                                             R_true = best_hyp_Rmats.detach() ).mean()
            cptc_adv_loss.backward()
            cptc_adversary_optimizer.step()
            stopwatch.end('cptc_critic_update')

        #### End of critic updates. Check for printing/saving requirements. ####
        #----------------------------------------------------------------------#

        ### Write out logs
        if gen_iter % print_every == 0:
            logging.info("\n>>> On generator iteration %d" % gen_iter)
            logging.info('Current mode: %d', main_training_mode)
            logging.info('Iterations per mode: %d, %d, %d', 
                    mode_1_training_iterations, mode_2_training_iterations, mode_3_training_iterations)
            if main_training_mode == 2:
                logging.info("Cy1 Balancing Weights: full cy1 = %.2f, fdr cy1 = %.2f" 
                            % (_full_weight, _fdr_weight))
            logging.info('Total Loss = %s', rfp(total_gen_loss))
            if not cy2_loss_only:
                if main_training_mode == 1:
                    utils.ppd('Full DR Cy1 losses (S->I)\n', cy1_L_dict)    
                    utils.write_all_to_tensorboard(board_writer, cy1_L_dict, gen_iter, prepend_string='cy1m1-fdr-')  
                elif main_training_mode == 2:
                    utils.ppd('Fully DR Cy1 losses (S->I)\n', fdr_L_dict) 
                    utils.write_all_to_tensorboard(board_writer, fdr_L_dict, gen_iter, prepend_string='cy1m2-fdr-')    
                    utils.ppd('Full Cy1 losses (S->I)\n', cy1_L_dict)   
                    utils.write_all_to_tensorboard(board_writer, cy1_L_dict, gen_iter, prepend_string='cy1m2-')                                                          
                else:
                    utils.ppd('Cycle 1 losses (S->I)\n', cy1_L_dict)
                    utils.write_all_to_tensorboard(board_writer, cy1_L_dict, gen_iter, prepend_string='cy1m3-') 
            utils.ppd('Cycle 2 losses (I->S)\n', cy2_L_dict)
            utils.write_all_to_tensorboard(board_writer, cy2_L_dict, gen_iter, prepend_string = ('cy2m%d-' % main_training_mode))  
            logging.info('Cy2 loss annealed loss weight: %.3f [progress: %.2f]' % (cy2_annealed_w, cy2_anneal_weight_prog))
            # GT loss if present
            if USE_GT: logging.info('GT Loss: %s', rfp(gt_loss))
            # Regular loss logging for critics
            if gen_iter >= thresh:
                _tb_c_ld = {  }
                if not cy2_loss_only:
                    if main_training_mode == 1: ic_loss = None
                    _tb_c_ld['image'] = rfp(ic_loss)
                    logging.info('Cy1 Img Critic losses: %s', _tb_c_ld['image'])
                if main_training_mode < 2: pose_loss = 0.0 
                crit2_names = ['shape', 'pose', 'xi_T', 'C_tex']
                for _nn, _L in zip(crit2_names, (sc_loss, pose_loss, xi_T_loss, texture_critic_loss) ):
                    _tb_c_ld[_nn] = rfp(_L)
                logging.info('Cy2 Shape+Vec Critic losses: ' + ", ".join([k + ' = ' + _tb_c_ld[k] for k in crit2_names]) )
                # Tensorboard writing
                utils.write_all_to_tensorboard( board_writer, _tb_c_ld, gen_iter, prepend_string='critic_L-')  
                # Timing logs
                logging.info('Average Timings: ' + stopwatch.csv_means_string())
                stopwatch.reset()

        ### Write out the model ###
        if (gen_iter % options['save_model_every'] == 0) or (gen_iter == n_gen_iters - 1):
            # Don't save fresh out of pretraining
            if options['save_main_model'] and mode_1_training_iterations > 1: 
                if options['save_only_latest_model']:
                    __numer = False
                    model_path = os.path.join(outdir, 
                                    "model-%s-latest.state_dict.pt" % 
                                    (options_choice))
                else:
                    __numer = True
                    model_path = os.path.join(outdir, 
                                    "model-%s-iter_%s.state_dict.pt" % 
                                    (options_choice, gen_iter))
                if __numer:
                    if gen_iter > 50000:
                        __do_it = True 
                    else:
                        __do_it = False 
                else:
                    __do_it = True
                    
                if __do_it:
                    logging.info('Saving state dict (iteration %d) to %s', gen_iter, model_path)
                    torch.save(model.state_dict(), model_path)
                    fmodel_path = model_path.replace('state_dict', 'model')
                    logging.info('Saving full model (iteration %d) to %s', gen_iter, fmodel_path)
                    torch.save(model, fmodel_path)

        # Maintain a "latest" dict too, if doing numerical checkpoints
        if not options['save_only_latest_model']: # 
            if (gen_iter % options['save_latest_model_every'] == 0) or (gen_iter == n_gen_iters - 1):
                model_path = os.path.join(outdir, 
                                    "model-%s-latest.state_dict.pt" % 
                                    (options_choice))
                logging.info('Saving state dict (iteration %d) to %s', gen_iter, model_path)
                torch.save(model.state_dict(), model_path)
                fmodel_path = model_path.replace('state_dict', 'model')
                logging.info('Saving full model (iteration %d) to %s', gen_iter, fmodel_path)
                torch.save(model, fmodel_path)

        ### Save images ###
        if (gen_iter % save_imgs_every == 0) or (gen_iter == n_gen_iters - 1):
            logging.info('%d: %s %s', gen_iter, 'Saving to', outdir)
            latest_files_to_save = [] # Include the outdir

            # Write renders and images
            if not cy2_loss_only:
                if main_training_mode in [1,2]:
                    fdr_ren_name = os.path.join(outdir, '%d-fdr-mode_%d-DR_renders.png' 
                                                % (gen_iter, main_training_mode))
                    imgutils.imswrite_t(fdr_renders, fdr_ren_name)
                    latest_files_to_save.append(fdr_ren_name)
                    if main_training_mode == 2:
                        _mode_2_file_names = os.path.join(outdir, '%d-mode_%d-cy1_renders.png' 
                                                % (gen_iter, main_training_mode))
                        imgutils.imswrite_t(cy1_renders, _mode_2_file_names)
                        latest_files_to_save.append(_mode_2_file_names)
                else:
                    cy1_renders_fname = os.path.join(outdir, '%d-cy1_renders.png' % gen_iter)
                    imgutils.imswrite_t(cy1_renders, cy1_renders_fname)
                    latest_files_to_save.append(cy1_renders_fname)
                    # Write the re-renders of the recons (note: this is why the oth_quantities term returns the dec_texture)
                    with torch.no_grad():
                        ( _cy1_v_hat, _cy1_R_hat, _cy1_t_hat, _cy1_C_dec_tex, _cy1_delta_hat, _cy1_pp_hat, 
                          _, _, _, _, _ ) = cy1_quantities_oth
                        #logging.info('%d: Cy1 translations -> %s', gen_iter, str(_cy1_t_hat))
                        _recon_renders_c1 = model.transformations_to_image(_cy1_R_hat, _cy1_t_hat, _cy1_v_hat, _cy1_delta_hat, 
                                                                           texture = _cy1_C_dec_tex, renderer = renderer)
                        NC = _recon_renders_c1.shape[1]
                        most_likely_c1 = _recon_renders_c1.view(B, NH, NC, _recon_renders_c1.shape[-2], 
                                                                           _recon_renders_c1.shape[-1]) # NC = 3 or 4
                        inds = _cy1_pp_hat.detach().argmax(dim=1)
                        mls = most_likely_c1.shape
                        most_likely_c1 = most_likely_c1.gather(dim=1, 
                                            index=inds.view(B,1,1,1,1).expand(-1,-1,mls[2],mls[3],mls[4])).squeeze(1)
                        c1_extra_ren_path = os.path.join(outdir, '%d-mode3-ML-cy1-recons.png' % gen_iter)
                        imgutils.imswrite_t(most_likely_c1, c1_extra_ren_path)
                        latest_files_to_save.append(c1_extra_ren_path)

            # Write Cy2 outputs
            pose_prob_strings = [ ( 'p=%.3f' % s ) 
                                  for s in cy2_pose_probs.reshape(B*NH).cpu().detach().numpy().tolist() ]
            _all_rens_file = os.path.join(outdir, '%d-cy2_renders-all.png' % gen_iter)
            imgutils.imswrite_t(cy2_renders, _all_rens_file, 
                                ncols=NH*4, corner_strings=pose_prob_strings)
            latest_files_to_save.append(_all_rens_file)
            _real_imgs_path = os.path.join(outdir, '%d-cy2_images.png' % gen_iter)
            _maxlikelihood_imgs_path = os.path.join(outdir, '%d-cy2_renders-ml.png' % gen_iter)
            latest_files_to_save += [_real_imgs_path, _maxlikelihood_imgs_path, ]
            NC = cy2_renders.shape[1]
            if NH > 1:
                most_likely = cy2_renders.view(B, NH, NC, cy2_renders.shape[-2], cy2_renders.shape[-1]) # NC = 3 or 4
                inds = cy2_pose_probs.detach().argmax(dim=1)
                mls = most_likely.shape
                most_likely = most_likely.gather(dim = 1, 
                                    index = inds.view(B,1,1,1,1).expand(-1,-1,mls[2],mls[3],mls[4])).squeeze(1)
                imgutils.imswrite_t(most_likely, _maxlikelihood_imgs_path)

                # cy2_t is B x nH x 3 x 1
                _ft_cy2_t_ml = cy2_t.gather(dim=1, index = inds.view(B,1,1,1).expand(-1,-1,3,1)).squeeze(1)
                logging.info('%d: Cy2 translations -> %s', gen_iter, str(_ft_cy2_t_ml.mean(0)))

            if ran_cptc: # Cross pose texture consistency
                _cy2_cptc_reren_imgs_path = os.path.join(outdir, '%d-cy2_reren-cptc.png' % gen_iter)
                imgutils.imswrite_t(cptc_I_reren, _cy2_cptc_reren_imgs_path)
                latest_files_to_save += [_cy2_cptc_reren_imgs_path]

            # Write Cy2 re-renders, if they are around
            if not cy2_rerenders is None:
                _cy2_reren_imgs_path = os.path.join(outdir, '%d-cy2_re-renders.png' % gen_iter)
                imgutils.imswrite_t(cy2_rerenders, _cy2_reren_imgs_path)
                latest_files_to_save += [_cy2_reren_imgs_path]

            # Write the GT images
            imgutils.imswrite_t(cy2_images,  _real_imgs_path)

            # Compute the results on the fixed image batch
            with torch.no_grad():
                fixed_cy2_outputs       = model.run_cycle_2(fixed_img_batch, renderer)
                fixed_renders           = fixed_cy2_outputs[-5]
                fixed_deltas            = fixed_cy2_outputs[-7]
                fixed_pre_euc_nodes     = fixed_cy2_outputs[1] # no hypotheses
                fixed_inferred_rotation = fixed_cy2_outputs[6]
                fixed_pose_probs        = fixed_cy2_outputs[-4]
                fixed_texture           = fixed_cy2_outputs[-6]
                fixed_texture_img       = fixed_cy2_outputs[-1]
                # Save the renders under the inferred rotation
                _fixed_inf_ml_path = os.path.join(outdir, '%d-fixed_renders.png' % gen_iter)
                most_likely        = fixed_renders.view(B, NH, NC, fixed_renders.shape[-2], fixed_renders.shape[-1]) # NC = 3 or 4
                inds               = fixed_pose_probs.detach().argmax(dim=1)
                mls                = most_likely.shape
                most_likely        = most_likely.gather( dim   = 1, 
                                                         index = inds.view(B,1,1,1,1).expand(-1,-1,mls[2],mls[3],mls[4]) 
                                                       ).squeeze(1)
                latest_files_to_save.append(_fixed_inf_ml_path)
                imgutils.imswrite_t(most_likely, _fixed_inf_ml_path)

                # Re-render from a fixed view to see the canonical pose
                azi, elev  = math.pi / 4.0, math.pi / 4.0
                _fixed_rot = graphicsutils.random_upper_hemi_rotm_manual(B, fixed_azi = azi, fixed_elev = elev)
                fixed_pre_euc_nodes = torch.bmm(fixed_pre_euc_nodes, 
                                                _fixed_rot.permute(0,2,1).to(fixed_pre_euc_nodes.device))

                # Save the fixed renders with (transformed) identity pose
                _fixed_inf_ml_iden_path = os.path.join(outdir, '%d-fixed_renders-iden.png' % gen_iter)
                latest_files_to_save.append(_fixed_inf_ml_iden_path)
                assert len(fixed_texture.shape) == 4 # B x nH x |V| x 3
                B, NH, nV, _ = fixed_texture.shape
                # Get the best textures
                fixed_texture = fixed_texture.gather(dim   = 1, # B x |V| x 3
                                                     index = inds.view(B,1,1,1).expand(-1,-1,nV,3) 
                                                    ).squeeze(1)
                B, NH, _tC, _tH, _tW = fixed_texture_img.shape
                fixed_texture_img = fixed_texture_img.gather(dim   = 1, # B x nH x 3 x H x W
                                                             index = inds.view(B,1,1,1,1).expand(-1, -1, _tC, _tH, _tW) 
                                                            ).squeeze(1)
                #fixed_texture_img = (fixed_texture_img * 2.0) - 1.0
                _fixed_teximg_ml_path = os.path.join(outdir, '%d-fixed_ml_teximgs.png' % gen_iter)
                latest_files_to_save.append(_fixed_teximg_ml_path)
                imgutils.imswrite_t(fixed_texture_img, _fixed_teximg_ml_path, denorm_m1_1 = False)

                # Save and write a CPTC measure, whether or not we are actually using CPTC losses
                #( cptc_v_hat, cptc_xi_T_hat, cptc_I_new, cptc_decoded_texture_image, 
                #  cptc_decoded_texture ) 
                cptc_data_fixed_batch = model.infer_on_random_new_view(
                                                canonical_shape        = fixed_cy2_outputs[1], 
                                                texture                = fixed_texture, 
                                                renderer               = renderer,
                                                domain_randomized_pose = (main_training_mode <= 2),
                                                zero_xy_translation    = False,
                                                return_decoded_texture = True, 
                                                detach_canonical_shape = True,
                                                include_pose_probs     = True,
                                        )             
                ( _fb_cptc_v_hat, _fb_cptc_xi_T_hat, _fb_cptc_I_new, _fb_cptc_decoded_texture_image, 
                  _fb_cptc_decoded_texture, _fb_pose_probs ) = cptc_data_fixed_batch
                _fb_inds = _fb_pose_probs.detach().argmax(dim=1)
                _fb_is = _fb_cptc_decoded_texture_image.shape
                _fb_cptc_decoded_texture_image = _fb_cptc_decoded_texture_image.gather( dim = 1,
                                    index = _fb_inds.view(-1,1,1,1,1).expand(-1,-1,_fb_is[2],_fb_is[3],_fb_is[4])
                                    ).squeeze(1)

                # Print recon loss
                logging.info('Cy2 Fixed batch CPTC Measure (random viewpoint re-rendering)')
                logging.info('\tEuclidean distance between inferred texture images: %.4f', 
                    ( _fb_cptc_decoded_texture_image - fixed_texture_img ).pow(2).clamp(1e-5).sqrt().mean() )
                # Save reconstructed texture image, as well as the re-rendered image
                _fixed_cptcreren_ml_path = os.path.join(outdir, '%d-fixed_ml_cptcreren_teximgs.png' % gen_iter)
                latest_files_to_save.append(_fixed_cptcreren_ml_path)
                imgutils.imswrite_t(_fb_cptc_decoded_texture_image, _fixed_cptcreren_ml_path, denorm_m1_1 = False)
                _fixed_cptcreren_ml_I_path = os.path.join(outdir, '%d-fixed_ml_cptcreren_imgs.png' % gen_iter)
                latest_files_to_save.append(_fixed_cptcreren_ml_I_path)
                imgutils.imswrite_t(_fb_cptc_I_new, _fixed_cptcreren_ml_I_path)

                # Save cy1 tex images
                if main_training_mode >= 2:
                    # Save the cycle 1 sampled texture images
                    _cy1_teximg_ml_path = os.path.join(outdir, '%d-cy1_sample_teximgs.png' % gen_iter)
                    latest_files_to_save.append(_cy1_teximg_ml_path)
                    imgutils.imswrite_t(cy1_sampled_texture_image, _cy1_teximg_ml_path, denorm_m1_1 = False)

                    # Save the cycle 1 reconstructions of the sampled texture images
                    _cy1_pose_prob_inds = _cy1_pose_probs.detach().argmax(dim=1)
                    _cy1_recon_teximg_ml_path = os.path.join(outdir, '%d-cy1_recon_teximgs.png' % gen_iter)
                    latest_files_to_save.append(_cy1_recon_teximg_ml_path)
                    _cB, _cnH, _cnC, _cH, _cW = cy1_recon_texture_image.shape
                    cy1_recon_texture_image = cy1_recon_texture_image.gather(
                                                dim   = 1, # 
                                                index = _cy1_pose_prob_inds.view(_cB,1,1,1,1).expand(-1,-1,_cnC,_cH,_cW) 
                                              ).squeeze(1)
                    imgutils.imswrite_t(cy1_recon_texture_image, _cy1_recon_teximg_ml_path, denorm_m1_1 = False)

                    # Save fixed-texture cy1 examples (different shapes)
                    cy1_shapes, cy1_normals = shape_dataloader.sample()
                    cy1_shapes  = cy1_shapes.to(device_main)
                    cy1_normals = cy1_normals.to(device_main)
                    _cy1_ft_xi_p = model.sample_xi_p(B).to(device_main).unsqueeze(1)
                    _cy1_ft_R, _cy1_ft_t, _ = model.pose_decode(_cy1_ft_xi_p, ret_intermed_pose = True)
                    logging.info('%d: Cy1 translations -> %s', gen_iter, str(_cy1_ft_t.mean(0)))
                    _fixed_tex_cy1_imgs = model.shape_to_image(shapes   = cy1_shapes, 
                                                               normals  = cy1_normals, 
                                                               renderer = renderer, 
                                                               xi_p     = _cy1_ft_xi_p, 
                                                               xi_T     = fixed_cy1_xi_T_vecs)[0]
                    _fixed_tex_cy1_path = os.path.join(outdir, '%d-fixed_texture_cy1_samples.png' % gen_iter)
                    latest_files_to_save.append(_fixed_tex_cy1_path)
                    imgutils.imswrite_t(_fixed_tex_cy1_imgs, _fixed_tex_cy1_path) #, ncols = fixed_cy1_num_cols)

                # Render the identity pose inferred meshes
                fixed_identity_renders = model.render(fixed_pre_euc_nodes, 
                                            fixed_texture, renderer)
                imgutils.imswrite_t(fixed_identity_renders, _fixed_inf_ml_iden_path)

                # How do the v-vectors look? E.g., for a PN-VAE
                write_v_plots = True
                if write_v_plots:
                    _n_v_hat = v_hat.detach().cpu().numpy()
                    _n_dtf_v = _dtf_v.detach().cpu().numpy()
                    _normal_vecs = np.random.randn(B, _n_v_hat.shape[1])
                    # Plot norms
                    _norm_cy2_img_inf   = np.linalg.norm(_n_v_hat,     axis = 1)
                    _norm_cy1_shape_inf = np.linalg.norm(_n_dtf_v,     axis = 1)
                    _norm_gaussians     = np.linalg.norm(_normal_vecs, axis = 1)
                    _max_norm  = max( _norm_cy2_img_inf.max(), _norm_cy1_shape_inf.max() )
                    _min_norm  = min( _norm_cy2_img_inf.min(), _norm_cy1_shape_inf.min() )
                    bins = np.linspace(_min_norm - 1e-7, _max_norm + 1e-7, 80)
                    hhfig, hhax = plt.subplots()
                    hhax.hist(_norm_cy2_img_inf,   bins, alpha=0.47, label='Img Inf')
                    hhax.hist(_norm_cy1_shape_inf, bins, alpha=0.47, label='Shape Inf')
                    hhax.hist(_norm_gaussians,     bins, alpha=0.47, label='Gaussian')
                    hhax.legend(loc='upper right', fontsize=12)
                    hhfig.suptitle('v norms', fontsize=16)
                    _v_norms_plot_path = os.path.join(outdir, '%d-v_Norms.png' % gen_iter)
                    hhfig.savefig(_v_norms_plot_path)
                    # Save plots to copy/move
                    latest_files_to_save += [ _v_norms_plot_path ]
                    plt.close('all')

                # How does xi_T look? 1D distribution of norms + coordinate axis slices
                write_xi_T_plots = True
                if write_xi_T_plots:
                    # Write a plot of xi_p values
                    _binds         = pose_probs_crit.detach().argmax(dim=1) # B
                    xi_T_real      = model.sample_xi_T(NH*B)
                    xi_T_realn     = xi_T_real.detach().cpu().numpy()
                    xi_T_best      = xi_T_hat.gather(dim=1, # Best hyps: B x NH x dim(xi_p) -> B x dim(xi_p) 
                                            index=_binds.view(B,1,1).expand(-1,-1,options['dim_xi_T'])
                                         ).squeeze(1).detach().cpu()
                    xi_T_inferredn = cy2_xi_T.view(B*NH, -1).detach().cpu().numpy()
                    r_coord_inds   = np.random.choice(options['dim_xi_T'], replace=False, size=6)
                    _xipra         = xi_T_realn[:, r_coord_inds] # All reals: B x 6
                    _xipfa         = xi_T_inferredn[:, r_coord_inds] # All fakes: B*NH x 6
                    _xipfb         = xi_T_best.numpy()[:, r_coord_inds] # Best hyps: B x 6
                    f, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(17,5))
                    for ii, ax in enumerate([ax1, ax2, ax3]):
                        _p2 = ax.scatter(_xipfa[:, ii*2], _xipfa[:, ii*2 + 1], c='red',   marker='o') # All hyps
                        _p3 = ax.scatter(_xipra[:, ii*2], _xipra[:, ii*2 + 1], c='black', marker='s') # All reals
                        _p1 = ax.scatter(_xipfb[:, ii*2], _xipfb[:, ii*2 + 1], c='green', marker='*') # Best hyps
                        ax.legend( (_p1, _p2, _p3), ('H_b', 'H_all', '~P_p'), 
                                   loc='lower left', ncol=1, fontsize=12)
                        ax.set_xlabel('Dim ' + str(r_coord_inds[2*ii]),     fontsize=14)
                        ax.set_ylabel('Dim ' + str(r_coord_inds[2*ii + 1]), fontsize=14)
                        ax.set_xlim(-3.5, 3.5)
                        ax.set_ylim(-3.5, 3.5)
                    f.suptitle('Xi_T Axis Slice Plots (GI: %d)' % gen_iter, fontsize=16)
                    _sliced_xiT_fig_path = os.path.join(outdir, '%d-XiT_Slices.png' % gen_iter)
                    f.savefig(_sliced_xiT_fig_path)
                    # Write a histogram of norms
                    norms_real = xi_T_real.norm(dim=1).detach().cpu().numpy()
                    norms_fake = cy2_xi_T.view(B*NH, -1).norm(dim=1).detach().cpu().numpy()
                    norms_best = xi_T_best.norm(dim=1).detach().cpu().numpy()
                    _max_norm  = max( norms_real.max(), norms_fake.max() )
                    _min_norm  = min( norms_real.min(), norms_fake.min() )
                    bins = np.linspace(_min_norm - 1e-7, _max_norm + 1e-7, 80)
                    hfig, hax = plt.subplots()
                    hax.hist(norms_real, bins, alpha=0.47, label='real')
                    hax.hist(norms_fake, bins, alpha=0.47, label='fake')
                    hax.hist(norms_best, bins, alpha=0.47, label='best')
                    hax.legend(loc='upper right', fontsize=12)
                    hfig.suptitle('xi_T norms', fontsize=16)
                    _norms_plot_path = os.path.join(outdir, '%d-XiT_Norms.png' % gen_iter)
                    hfig.savefig(_norms_plot_path)
                    # Save plots to copy/move
                    latest_files_to_save += [_norms_plot_path, _sliced_xiT_fig_path]
                    plt.close('all')

                # Copy latest images into the "latest" folder
                # Remove old files
                for _latfile in os.listdir(path_to_latest_folder):
                    _fp_latfile = os.path.join(path_to_latest_folder, _latfile)
                    if not fixed_batch_gt_name == _latfile:
                        if os.path.isfile( _fp_latfile ):
                            os.remove( _fp_latfile )
                # Copy in new ones
                latest_files_to_save += [ os.path.join(outdir_core, 'genren.log') ]
                logging.info('Copying to latest: ' + str(latest_files_to_save))
                for _new_lat_file in latest_files_to_save:
                    if os.path.isfile(_new_lat_file):
                        shutil.copy(_new_lat_file, path_to_latest_folder)

        # Complete iteration timer
        stopwatch.end('gen_iter' + time_string)


#-------------------------#
if __name__ == "__main__":
    main()
#-------------------------#

#