Skip to content

Commit

Permalink
Fix load_image shape
Browse files Browse the repository at this point in the history
  • Loading branch information
Wovchena committed Oct 8, 2024
1 parent 5eb7011 commit 9c0f352
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 35 deletions.
46 changes: 23 additions & 23 deletions samples/cpp/visual_language_chat/export_MiniCPM-V-2_6.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,29 +1171,29 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("model_dir", type=Path)
model_dir = parser.parse_args().model_dir
model_id = "openbmb/MiniCPM-V-2_6"
ckpt = model_dir / "ckpt"
if not ckpt.exists():
snapshot_download(model_id, local_dir=ckpt, force_download=True)
patch_model_code(ckpt)
model = AutoModel.from_pretrained(ckpt, trust_remote_code=True)
model.eval()
model.config.save_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(ckpt, trust_remote_code=True)
tokenizer.save_pretrained(model_dir)
ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(tokenizer, with_detokenizer=True)
ov.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml")
ov.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml")
processor = AutoProcessor.from_pretrained(ckpt, trust_remote_code=True)
processor.save_pretrained(model_dir)

convert_llm(model, model_dir)
del model.llm
gc.collect()

convert_vision_encoder(model, model_dir)
# ov_cpm = init_model(model_dir, "CPU")
# print(ov_cpm.chat(Image.open(requests.get("https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11", stream=True).raw), [{"role": "user", "content": "What is unusual on this image?"}], ov_cpm.processor.tokenizer))
# model_id = "openbmb/MiniCPM-V-2_6"
# ckpt = model_dir / "ckpt"
# if not ckpt.exists():
# snapshot_download(model_id, local_dir=ckpt, force_download=True)
# patch_model_code(ckpt)
# model = AutoModel.from_pretrained(ckpt, trust_remote_code=True)
# model.eval()
# model.config.save_pretrained(model_dir)
# tokenizer = AutoTokenizer.from_pretrained(ckpt, trust_remote_code=True)
# tokenizer.save_pretrained(model_dir)
# ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(tokenizer, with_detokenizer=True)
# ov.save_model(ov_tokenizer, model_dir / "openvino_tokenizer.xml")
# ov.save_model(ov_detokenizer, model_dir / "openvino_detokenizer.xml")
# processor = AutoProcessor.from_pretrained(ckpt, trust_remote_code=True)
# processor.save_pretrained(model_dir)

# convert_llm(model, model_dir)
# del model.llm
# gc.collect()

# convert_vision_encoder(model, model_dir)
ov_cpm = init_model(model_dir, "CPU")
print(ov_cpm.chat(Image.open("/home/vzlobin/r/g/g.png"), [{"role": "user", "content": "What is unusual on this image?"}], ov_cpm.processor.tokenizer))

if "__main__" == __name__:
main()
2 changes: 1 addition & 1 deletion samples/cpp/visual_language_chat/load_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ ov::Tensor utils::load_image(const std::filesystem::path& image_path) {
};
return ov::Tensor(
ov::element::u8,
ov::Shape{1, size_t(desired_channels), size_t(y), size_t(x)},
ov::Shape{1, size_t(y), size_t(x), size_t(desired_channels)},
SharedImageAllocator{image, desired_channels, y, x}
);
}
22 changes: 11 additions & 11 deletions samples/cpp/visual_language_chat/visual_language_chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ bool print_subword(std::string&& subword) {
return !(std::cout << subword << std::flush);
}

int main(int argc, char* argv[]) try {
int main(int argc, char* argv[]) {
if (3 != argc) {
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE>");
}
Expand Down Expand Up @@ -40,14 +40,14 @@ int main(int argc, char* argv[]) try {
"question:\n";
}
pipe.finish_chat();
} catch (const std::exception& error) {
try {
std::cerr << error.what() << '\n';
} catch (const std::ios_base::failure&) {}
return EXIT_FAILURE;
} catch (...) {
try {
std::cerr << "Non-exception object thrown\n";
} catch (const std::ios_base::failure&) {}
return EXIT_FAILURE;
// } catch (const std::exception& error) {
// try {
// std::cerr << error.what() << '\n';
// } catch (const std::ios_base::failure&) {}
// return EXIT_FAILURE;
// } catch (...) {
// try {
// std::cerr << "Non-exception object thrown\n";
// } catch (const std::ios_base::failure&) {}
// return EXIT_FAILURE;
}
20 changes: 20 additions & 0 deletions src/cpp/src/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,23 @@ ov::genai::TokenizedInputs subtract_chat_tokenized_inputs(const ov::genai::Token
} // namespace utils
} // namespace genai
} // namespace ov

inline std::ostream& operator<<(std::ostream& os, const ov::Tensor& tensor) {
ov::Shape shape = tensor.get_shape();
size_t height = shape.at(shape.size() - 2);
size_t width = shape.at(shape.size() - 1);
os << shape << ", " << tensor.get_element_type() << '\n';
switch (tensor.get_element_type()) {
case ov::element::u8: {
const uint8_t* data = tensor.data<uint8_t>();
for (size_t col = 0; col < std::min(14ul, height); ++col) {
for (size_t row = 0; row < std::min(14ul, width); ++row) {
os << +data[col * width + row] << ", ";
}
os << '\n';
}
return os;
}
default: OPENVINO_THROW("Print not implemented");
}
}
1 change: 1 addition & 0 deletions src/cpp/src/vision_encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ EncodedImage llava_image_embed_make_with_bytes_slice(clip_ctx& ctx_clip, const o
int(img.get_shape().at(2)),
{img.data<uint8_t>(), img.data<uint8_t>() + img.get_size()}
};
std::cout << img;
std::vector<std::vector<clip_image_u8>> imgs = ::slice_image(source, max_slice_nums, scale_resolution, patch_size, never_split);
std::vector<std::vector<ov::Tensor>> results;
std::vector<std::vector<ImageSize>> sizes;
Expand Down

0 comments on commit 9c0f352

Please sign in to comment.