@@ -70,7 +70,6 @@ def interactive_plots(
7070 "point_size" : hyperparams .get ("point_size" , 10 ),
7171 }
7272
73-
7473 ## directories and file management
7574 data_dir = os .path .join (root_dir , "data" , run_name )
7675 plot_dir = os .path .join (root_dir , "plots" , run_name )
@@ -83,32 +82,28 @@ def interactive_plots(
8382 print (f"Checkpoint: using { checkpoint } of { stage } stage" )
8483 ckpt_pretrained = os .path .join (root_dir , "weights" , run_name , stage , checkpoint )
8584 utils .set_seed ()
86- transforms = utils .build_transforms (hyperparams )
87- loaders = utils .build_loaders (data_dir , transforms , batch_sizes , num_workers , second_stage = (stage == "second" ))
8885 model = utils .build_model (backbone , second_stage = (stage == "second" ), num_classes = num_classes , ckpt_pretrained = ckpt_pretrained ).cuda ()
8986 model .use_projection_head (False )
9087 model .eval ()
9188
92- ## Determine which embeddings to compute
89+ ## prep computation
90+ transforms = utils .build_transforms (hyperparams )
91+ loaders = utils .build_loaders (
92+ data_dir , transforms , batch_sizes , num_workers ,
93+ second_stage = (stage == "second" ), drop_last = False , shuffle_train = False )
9394 embeddings , labels , rel_paths = [], [], []
9495
95- ## val batch size cant be zero
96+ ## val set - batch size cant be zero
9697 embeddings_val , labels_val = utils .compute_embeddings (loaders ["valid_loader" ], model )
97- if len (embeddings_val ) < len (loaders ["valid_loader" ].dataset .imgs ):
98- missed_imgs = len (loaders ["valid_loader" ].dataset .imgs ) - len (embeddings_val )
99- print (f"Warning: missed { missed_imgs } images because batch size was not a multiple of validation dataset size." )
100- rel_paths_val = [item [0 ][len (root_dir ) + 1 :] for item in loaders ["valid_loader" ].dataset .imgs [:len (embeddings_val )]]
98+ rel_paths_val = [item [0 ][len (root_dir ) + 1 :] for item in loaders ["valid_loader" ].dataset .imgs ]
10199 embeddings .extend (embeddings_val )
102100 labels .extend (labels_val )
103101 rel_paths .extend (rel_paths_val )
104102
105- ## train set embeddings
103+ ## train set - skipped if zero batch size
106104 if batch_sizes ["train_batch_size" ] is not None :
107105 embeddings_train , labels_train = utils .compute_embeddings (loaders ["train_loader" ], model )
108- if len (embeddings_train ) < len (loaders ["train_loader" ].dataset .imgs ):
109- missed_imgs = len (loaders ["train_loader" ].dataset .imgs ) - len (embeddings_train )
110- print (f"Warning: missed { missed_imgs } images because batch size was not a multiple of training dataset size." )
111- rel_paths_train = [item [0 ][len (root_dir ) + 1 :] for item in loaders ["train_loader" ].dataset .imgs [:len (embeddings_train )]]
106+ rel_paths_train = [item [0 ][len (root_dir ) + 1 :] for item in loaders ["train_loader" ].dataset .imgs ]
112107 embeddings .extend (embeddings_train )
113108 labels .extend (labels_train )
114109 rel_paths .extend (rel_paths_train )
@@ -120,7 +115,7 @@ def interactive_plots(
120115
121116 ## Reduce dimensionality
122117 if not perplexity :
123- perplexity = min (100 , len (embeddings ) // 2 )
118+ perplexity = min (30 , max ( 5 , ( len (embeddings ) - 1 ) / 3 ) )
124119 print (f"tSNE: using a perplexity value of { perplexity } " )
125120 reduced_data , colnames , _ = helpers .embbedings_dimension_reductions (embeddings , perplexity )
126121
@@ -129,7 +124,6 @@ def interactive_plots(
129124 df ["paths" ] = [os .path .join (".." , ".." , p ) for p in rel_paths ]
130125 df ["class" ], df ["class_str" ] = labels , [os .path .basename (os .path .dirname (p )) for p in rel_paths ]
131126 df ["dataset" ] = df ["paths" ].apply (lambda x : "validation" if "/val/" in x else "train" )
132-
133127 helpers .bokeh_plot (df , out_path = plot_path , ** plot_config )
134128
135129
0 commit comments