Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mobi/AZW3 meta #63

Merged
merged 6 commits into from
May 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 197 additions & 0 deletions Charcoal/Charcoal/AZW3.cpp
Original file line number Diff line number Diff line change
@@ -1 +1,198 @@
#include "AZW3.h"
#include <mobi.h>
#include <cstdio>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include "stb_image.h"
#include "stb_image_write.h"
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>

#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING
#include <experimental/filesystem>

using namespace std;
namespace fs = std::experimental::filesystem;

#pragma once

std::string wstring_to_utf8(const std::wstring& wstr) {
if (wstr.empty()) return std::string();

int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), nullptr, 0, nullptr, nullptr);
std::string strTo(size_needed, 0);
WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, nullptr, nullptr);
return strTo;
}

std::string safeCharPtrToString(const char* charPtr) {
if (charPtr != nullptr) {
return std::string(charPtr);
}
else {
return std::string();
}
}

MOBIData* initAndLoadMobi(const std::string& filePath) {
MOBIData* mobiData = mobi_init();
if (!mobiData) {
std::cerr << "Memory allocation for MOBIData failed." << std::endl;
return nullptr;
}

FILE* file;
errno_t err = fopen_s(&file, filePath.c_str(), "rb");
if (err != 0 || file == nullptr) {
std::cerr << "Failed to open file: " << filePath << " with error code: " << err << std::endl;
mobi_free(mobiData);
return nullptr;
}

if (mobi_load_file(mobiData, file) != MOBI_SUCCESS) {
std::cerr << "Failed to load MOBI file: " << filePath << std::endl;
fclose(file);
mobi_free(mobiData);
return nullptr;
}

fclose(file);
return mobiData;
}

book azw3::add(PWSTR path) {
// Convert PWSTR to std::wstring
std::wstring ws(path);
// Now convert std::wstring to std::string
std::string filePath = wstring_to_utf8(ws);

// Initialize and load MOBI file
MOBIData* mobiData = initAndLoadMobi(filePath);
if (!mobiData) {
// Handle error: unable to initialize and load MOBI
std::cerr << "Unable to initialize and load MOBI data" << std::endl;
return book(); // Return an empty book object
}

book curr;
// Extract metadata
curr.title = mobi_meta_get_title(mobiData);
curr.author = safeCharPtrToString(mobi_meta_get_author(mobiData));
curr.publisher = safeCharPtrToString(mobi_meta_get_publisher(mobiData));
curr.contributor = safeCharPtrToString(mobi_meta_get_contributor(mobiData));
curr.rights = safeCharPtrToString(mobi_meta_get_copyright(mobiData));
curr.format = "AZW3";
curr.date = safeCharPtrToString(mobi_meta_get_publishdate(mobiData));
curr.language = safeCharPtrToString(mobi_meta_get_language(mobiData));
curr.description = safeCharPtrToString(mobi_meta_get_description(mobiData));

mobi_free(mobiData);

return curr;
}

book mobi::add(PWSTR path) {
std::wstring ws(path);
std::string filePath = wstring_to_utf8(ws);

// Initialize and load MOBI file
MOBIData* mobiData = initAndLoadMobi(filePath);
if (!mobiData) {
// Handle error: unable to initialize and load MOBI
std::cerr << "Unable to initialize and load MOBI data" << std::endl;
return book(); // Return an empty book object
}

book curr;
// Extract metadata
curr.title = mobi_meta_get_title(mobiData);
curr.author = safeCharPtrToString(mobi_meta_get_author(mobiData));
curr.publisher = safeCharPtrToString(mobi_meta_get_publisher(mobiData));
curr.contributor = safeCharPtrToString(mobi_meta_get_contributor(mobiData));
curr.rights = safeCharPtrToString(mobi_meta_get_copyright(mobiData));
curr.format = "MOBI";
curr.date = safeCharPtrToString(mobi_meta_get_publishdate(mobiData));
curr.language = safeCharPtrToString(mobi_meta_get_language(mobiData));
curr.description = safeCharPtrToString(mobi_meta_get_description(mobiData));

mobi_free(mobiData);

return curr;
}

void extractTextAndOtherContent(const MOBIRawml* rawml, const std::string& outputDir) {
for (MOBIPart* part = rawml->markup; part != NULL; part = part->next) {
if (part->type == T_HTML) {
std::string filename = outputDir + "/markup_" + std::to_string(part->uid) + ".html";
std::ofstream outFile(filename, std::ios::binary);
outFile.write(reinterpret_cast<char*>(part->data), part->size);
outFile.close();
}
}
}

void extractImages(const MOBIRawml* rawml, const std::string& outputDir, int scaleFactor) {
for (MOBIPart* part = rawml->resources; part != NULL; part = part->next) {
if (part->type == T_JPG || part->type == T_PNG || part->type == T_GIF || part->type == T_BMP) {
std::string extension;
switch (part->type) {
case T_JPG: extension = ".jpg"; break;
case T_PNG: extension = ".png"; break;
case T_GIF: extension = ".gif"; break;
case T_BMP: extension = ".bmp"; break;
default: continue;
}

int width, height, channels;
unsigned char* imgData = stbi_load_from_memory(part->data, part->size, &width, &height, &channels, 0);
if (imgData) {
// Convert the image to grayscale using OpenCV
cv::Mat colorImg(height, width, channels == 4 ? CV_8UC4 : channels == 3 ? CV_8UC3 : CV_8UC1, imgData);
cv::Mat grayImg;
cv::cvtColor(colorImg, grayImg, cv::COLOR_BGR2GRAY);

// Optionally resize the image according to scaleFactor
if (scaleFactor != 1) {
cv::resize(grayImg, grayImg, cv::Size(), scaleFactor, scaleFactor, cv::INTER_LINEAR);
}

// Save the grayscale image
std::string filename = outputDir + "/image_" + std::to_string(part->uid) + extension;
cv::imwrite(filename, grayImg);

// Free the image data allocated by stb_image
stbi_image_free(imgData);
}
else {
std::cerr << "Failed to load image for grayscaling from memory." << std::endl;
}
}
}
}

bool extractContentFromMobi(const std::string& filePath, const std::string& outputDir, int scaleFactor) {
MOBIData* mobiData = initAndLoadMobi(filePath);
if (!mobiData) return false;

if (!fs::create_directories(outputDir) && !fs::exists(outputDir)) {
std::cerr << "Failed to create output directory: " << outputDir << std::endl;
mobi_free(mobiData);
return false;
}

MOBIRawml* rawml = mobi_init_rawml(mobiData);
if (!rawml || mobi_parse_rawml(rawml, mobiData) != MOBI_SUCCESS) {
std::cerr << "Failed to initialize or parse MOBI rawml" << std::endl;
mobi_free(mobiData); // Clean up mobiData
return false;
}

// Clean up
mobi_free_rawml(rawml);
mobi_free(mobiData);

return true;
}
25 changes: 24 additions & 1 deletion Charcoal/Charcoal/AZW3.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
#pragma once
#include "Library.h"
class AZW3 : public Library
#include "mobi.h"
#include <Windows.h>
#include <string>

std::string wstring_to_utf8(const std::wstring& wstr);

class mobi : public Library
{
public:
book add(PWSTR path);
};

class azw3 : public Library
{
public:
book add(PWSTR path);
};

//Utility functions that are used across different files
std::string wstring_to_utf8(const std::wstring& wstr);
std::string safeCharPtrToString(const char* charPtr);

//Function declarations related to MOBI library handling
MOBIData* initAndLoadMobi(const std::string& filePath);
void extractTextAndOtherContent(const MOBIRawml* rawml, const std::string& outputDir);
void extractImages(const MOBIRawml* rawml, const std::string& outputDir, int scaleFactor);
bool extractContentFromMobi(const std::string& filePath, const std::string& outputDir, int scaleFactor);
6 changes: 3 additions & 3 deletions Charcoal/Charcoal/Charcoal.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,14 @@
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>$(SolutionDir)Charcoal\openvc-4.9.0\include;$(SolutionDir)Charcoal\ultralight-sdk\include;$(SolutionDir)Charcoal\libepub-master\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)Charcoal\libmobi-public\src;$(SolutionDir)Charcoal\packages\libxml2-vc140-static-32_64.2.9.4.1\lib\native\include\libxml;$(SolutionDir)Charcoal\openvc-4.9.0\include;$(SolutionDir)Charcoal\ultralight-sdk\include;$(SolutionDir)Charcoal\libepub-master\include</AdditionalIncludeDirectories>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>AppCore.lib;Ultralight.lib;UltralightCore.lib;WebCore.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(SolutionDir)Charcoal\ultralight-sdk\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>AppCore.lib;Ultralight.lib;UltralightCore.lib;WebCore.lib;libmobi.lib;libxml2.lib;Ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(SolutionDir)Charcoal\libmobi-public\msvc\x64\Debug;$(SolutionDir)Charcoal\packages\libxml2-vc140-static-32_64.2.9.4.1\lib\native\libs\x64\static\Debug;$(SolutionDir)Charcoal\ultralight-sdk\lib</AdditionalLibraryDirectories>
<EntryPointSymbol>mainCRTStartup</EntryPointSymbol>
</Link>
</ItemDefinitionGroup>
Expand Down
11 changes: 9 additions & 2 deletions Charcoal/Charcoal/Library.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,18 @@ std::string Library::add(PWSTR path)
}
else if (f == "AZW3" || f == "azw3")
{
/* AZW3 a;
azw3 a;
book curr = a.add(path);
collection.push_back(curr);
return curr.title;*/
return curr.title;
}
else if (f == "MOBI" || f == "mobi")
{
mobi m;
book curr = m.add(path);
collection.push_back(curr);
return curr.title;
}
else if (f == "pdf" || f == "PDF")
{
/*PDF p;
Expand Down
101 changes: 101 additions & 0 deletions Charcoal/Charcoal/libmobi-public/.github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
name: Build

on:
push:
branches: [ public ]
pull_request:
branches: [ public ]

jobs:
unix-build:

runs-on: ubuntu-latest
defaults:
run:
shell: bash

strategy:
fail-fast: false
matrix:
config:
- name: default build with debug
options: --enable-debug
- name: bulid with internal libs
options: --with-zlib=no --with-libxml2=no
- name: build without encryption
options: --disable-encryption

steps:
- uses: actions/checkout@v2
- name: install dependencies
run: |
if [ "${{ runner.os }}" = "Linux" ]; then
sudo apt-get update -qq;
sudo apt-get install -y autotools-dev pkg-config automake autoconf libtool;
sudo apt-get install -y zlib1g-dev libxml2-dev;
elif [ "${{ runner.os }}" = "macOS" ]; then
brew update > /dev/null;
brew outdated autoconf || brew upgrade autoconf;
brew outdated automake || brew upgrade automake;
brew outdated libtool || brew upgrade libtool;
fi
- name: autogen
run: ./autogen.sh
- name: configure
run: ./configure ${{ matrix.config.options }}
- name: make
run: make -j `nproc`
- name: make check
run: make -j `nproc` check
- name: make distcheck
run: make -j `nproc` distcheck
- name: upload debug artifacts
uses: actions/upload-artifact@v2
if: ${{ failure() }}
with:
name: test-logs
path: |
**/tests/test-suite.log
**/tests/samples/*.log

win64-build:

runs-on: windows-latest
defaults:
run:
shell: msys2 {0}
steps:
- name: setup-msys2
uses: msys2/setup-msys2@v2
with:
msystem: MINGW64
path-type: minimal
update: true
install: >-
git
autotools
base-devel
mingw-w64-x86_64-toolchain
mingw-w64-x86_64-libtool
mingw-w64-x86_64-libxml2
mingw-w64-x86_64-zlib
- name: checkout
uses: actions/checkout@v2
- name: autogen
run: sh ./autogen.sh
- name: configure
run: ./configure --enable-debug
- name: make
run: make -j$(nproc)
- name: make check
run: make -j$(nproc) check
- name: make distcheck
run: make -j$(nproc) distcheck
- name: upload debug artifacts
uses: actions/upload-artifact@v2
if: ${{ failure() }}
with:
name: test-logs
path: |
**/tests/test-suite.log
**/tests/samples/*.log
Loading
Loading