Skip to content

Commit

Permalink
Merge pull request #63 from ChicoState/mobi_meta
Browse files Browse the repository at this point in the history
Mobi/AZW3 meta
  • Loading branch information
infinimineralex authored May 11, 2024
2 parents 63e00aa + 2127501 commit 9aadb36
Show file tree
Hide file tree
Showing 246 changed files with 53,512 additions and 6 deletions.
197 changes: 197 additions & 0 deletions Charcoal/Charcoal/AZW3.cpp
Original file line number Diff line number Diff line change
@@ -1 +1,198 @@
#include "AZW3.h"
#include <mobi.h>
#include <cstdio>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include "stb_image.h"
#include "stb_image_write.h"
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>

#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING
#include <experimental/filesystem>

using namespace std;
namespace fs = std::experimental::filesystem;

#pragma once

std::string wstring_to_utf8(const std::wstring& wstr) {
if (wstr.empty()) return std::string();

int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), nullptr, 0, nullptr, nullptr);
std::string strTo(size_needed, 0);
WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, nullptr, nullptr);
return strTo;
}

std::string safeCharPtrToString(const char* charPtr) {
if (charPtr != nullptr) {
return std::string(charPtr);
}
else {
return std::string();
}
}

MOBIData* initAndLoadMobi(const std::string& filePath) {
MOBIData* mobiData = mobi_init();
if (!mobiData) {
std::cerr << "Memory allocation for MOBIData failed." << std::endl;
return nullptr;
}

FILE* file;
errno_t err = fopen_s(&file, filePath.c_str(), "rb");
if (err != 0 || file == nullptr) {
std::cerr << "Failed to open file: " << filePath << " with error code: " << err << std::endl;
mobi_free(mobiData);
return nullptr;
}

if (mobi_load_file(mobiData, file) != MOBI_SUCCESS) {
std::cerr << "Failed to load MOBI file: " << filePath << std::endl;
fclose(file);
mobi_free(mobiData);
return nullptr;
}

fclose(file);
return mobiData;
}

book azw3::add(PWSTR path) {
// Convert PWSTR to std::wstring
std::wstring ws(path);
// Now convert std::wstring to std::string
std::string filePath = wstring_to_utf8(ws);

// Initialize and load MOBI file
MOBIData* mobiData = initAndLoadMobi(filePath);
if (!mobiData) {
// Handle error: unable to initialize and load MOBI
std::cerr << "Unable to initialize and load MOBI data" << std::endl;
return book(); // Return an empty book object
}

book curr;
// Extract metadata
curr.title = mobi_meta_get_title(mobiData);
curr.author = safeCharPtrToString(mobi_meta_get_author(mobiData));
curr.publisher = safeCharPtrToString(mobi_meta_get_publisher(mobiData));
curr.contributor = safeCharPtrToString(mobi_meta_get_contributor(mobiData));
curr.rights = safeCharPtrToString(mobi_meta_get_copyright(mobiData));
curr.format = "AZW3";
curr.date = safeCharPtrToString(mobi_meta_get_publishdate(mobiData));
curr.language = safeCharPtrToString(mobi_meta_get_language(mobiData));
curr.description = safeCharPtrToString(mobi_meta_get_description(mobiData));

mobi_free(mobiData);

return curr;
}

book mobi::add(PWSTR path) {
std::wstring ws(path);
std::string filePath = wstring_to_utf8(ws);

// Initialize and load MOBI file
MOBIData* mobiData = initAndLoadMobi(filePath);
if (!mobiData) {
// Handle error: unable to initialize and load MOBI
std::cerr << "Unable to initialize and load MOBI data" << std::endl;
return book(); // Return an empty book object
}

book curr;
// Extract metadata
curr.title = mobi_meta_get_title(mobiData);
curr.author = safeCharPtrToString(mobi_meta_get_author(mobiData));
curr.publisher = safeCharPtrToString(mobi_meta_get_publisher(mobiData));
curr.contributor = safeCharPtrToString(mobi_meta_get_contributor(mobiData));
curr.rights = safeCharPtrToString(mobi_meta_get_copyright(mobiData));
curr.format = "MOBI";
curr.date = safeCharPtrToString(mobi_meta_get_publishdate(mobiData));
curr.language = safeCharPtrToString(mobi_meta_get_language(mobiData));
curr.description = safeCharPtrToString(mobi_meta_get_description(mobiData));

mobi_free(mobiData);

return curr;
}

void extractTextAndOtherContent(const MOBIRawml* rawml, const std::string& outputDir) {
for (MOBIPart* part = rawml->markup; part != NULL; part = part->next) {
if (part->type == T_HTML) {
std::string filename = outputDir + "/markup_" + std::to_string(part->uid) + ".html";
std::ofstream outFile(filename, std::ios::binary);
outFile.write(reinterpret_cast<char*>(part->data), part->size);
outFile.close();
}
}
}

void extractImages(const MOBIRawml* rawml, const std::string& outputDir, int scaleFactor) {
for (MOBIPart* part = rawml->resources; part != NULL; part = part->next) {
if (part->type == T_JPG || part->type == T_PNG || part->type == T_GIF || part->type == T_BMP) {
std::string extension;
switch (part->type) {
case T_JPG: extension = ".jpg"; break;
case T_PNG: extension = ".png"; break;
case T_GIF: extension = ".gif"; break;
case T_BMP: extension = ".bmp"; break;
default: continue;
}

int width, height, channels;
unsigned char* imgData = stbi_load_from_memory(part->data, part->size, &width, &height, &channels, 0);
if (imgData) {
// Convert the image to grayscale using OpenCV
cv::Mat colorImg(height, width, channels == 4 ? CV_8UC4 : channels == 3 ? CV_8UC3 : CV_8UC1, imgData);
cv::Mat grayImg;
cv::cvtColor(colorImg, grayImg, cv::COLOR_BGR2GRAY);

// Optionally resize the image according to scaleFactor
if (scaleFactor != 1) {
cv::resize(grayImg, grayImg, cv::Size(), scaleFactor, scaleFactor, cv::INTER_LINEAR);
}

// Save the grayscale image
std::string filename = outputDir + "/image_" + std::to_string(part->uid) + extension;
cv::imwrite(filename, grayImg);

// Free the image data allocated by stb_image
stbi_image_free(imgData);
}
else {
std::cerr << "Failed to load image for grayscaling from memory." << std::endl;
}
}
}
}

bool extractContentFromMobi(const std::string& filePath, const std::string& outputDir, int scaleFactor) {
MOBIData* mobiData = initAndLoadMobi(filePath);
if (!mobiData) return false;

if (!fs::create_directories(outputDir) && !fs::exists(outputDir)) {
std::cerr << "Failed to create output directory: " << outputDir << std::endl;
mobi_free(mobiData);
return false;
}

MOBIRawml* rawml = mobi_init_rawml(mobiData);
if (!rawml || mobi_parse_rawml(rawml, mobiData) != MOBI_SUCCESS) {
std::cerr << "Failed to initialize or parse MOBI rawml" << std::endl;
mobi_free(mobiData); // Clean up mobiData
return false;
}

// Clean up
mobi_free_rawml(rawml);
mobi_free(mobiData);

return true;
}
25 changes: 24 additions & 1 deletion Charcoal/Charcoal/AZW3.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
#pragma once
#include "Library.h"
class AZW3 : public Library
#include "mobi.h"
#include <Windows.h>
#include <string>

std::string wstring_to_utf8(const std::wstring& wstr);

class mobi : public Library
{
public:
book add(PWSTR path);
};

class azw3 : public Library
{
public:
book add(PWSTR path);
};

//Utility functions that are used across different files
std::string wstring_to_utf8(const std::wstring& wstr);
std::string safeCharPtrToString(const char* charPtr);

//Function declarations related to MOBI library handling
MOBIData* initAndLoadMobi(const std::string& filePath);
void extractTextAndOtherContent(const MOBIRawml* rawml, const std::string& outputDir);
void extractImages(const MOBIRawml* rawml, const std::string& outputDir, int scaleFactor);
bool extractContentFromMobi(const std::string& filePath, const std::string& outputDir, int scaleFactor);
6 changes: 3 additions & 3 deletions Charcoal/Charcoal/Charcoal.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,14 @@
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>$(SolutionDir)Charcoal\openvc-4.9.0\include;$(SolutionDir)Charcoal\ultralight-sdk\include;$(SolutionDir)Charcoal\libepub-master\include</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)Charcoal\libmobi-public\src;$(SolutionDir)Charcoal\packages\libxml2-vc140-static-32_64.2.9.4.1\lib\native\include\libxml;$(SolutionDir)Charcoal\openvc-4.9.0\include;$(SolutionDir)Charcoal\ultralight-sdk\include;$(SolutionDir)Charcoal\libepub-master\include</AdditionalIncludeDirectories>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>AppCore.lib;Ultralight.lib;UltralightCore.lib;WebCore.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(SolutionDir)Charcoal\ultralight-sdk\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>AppCore.lib;Ultralight.lib;UltralightCore.lib;WebCore.lib;libmobi.lib;libxml2.lib;Ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(SolutionDir)Charcoal\libmobi-public\msvc\x64\Debug;$(SolutionDir)Charcoal\packages\libxml2-vc140-static-32_64.2.9.4.1\lib\native\libs\x64\static\Debug;$(SolutionDir)Charcoal\ultralight-sdk\lib</AdditionalLibraryDirectories>
<EntryPointSymbol>mainCRTStartup</EntryPointSymbol>
</Link>
</ItemDefinitionGroup>
Expand Down
11 changes: 9 additions & 2 deletions Charcoal/Charcoal/Library.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,18 @@ std::string Library::add(PWSTR path)
}
else if (f == "AZW3" || f == "azw3")
{
/* AZW3 a;
azw3 a;
book curr = a.add(path);
collection.push_back(curr);
return curr.title;*/
return curr.title;
}
else if (f == "MOBI" || f == "mobi")
{
mobi m;
book curr = m.add(path);
collection.push_back(curr);
return curr.title;
}
else if (f == "pdf" || f == "PDF")
{
/*PDF p;
Expand Down
101 changes: 101 additions & 0 deletions Charcoal/Charcoal/libmobi-public/.github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
name: Build

on:
push:
branches: [ public ]
pull_request:
branches: [ public ]

jobs:
unix-build:

runs-on: ubuntu-latest
defaults:
run:
shell: bash

strategy:
fail-fast: false
matrix:
config:
- name: default build with debug
options: --enable-debug
- name: bulid with internal libs
options: --with-zlib=no --with-libxml2=no
- name: build without encryption
options: --disable-encryption

steps:
- uses: actions/checkout@v2
- name: install dependencies
run: |
if [ "${{ runner.os }}" = "Linux" ]; then
sudo apt-get update -qq;
sudo apt-get install -y autotools-dev pkg-config automake autoconf libtool;
sudo apt-get install -y zlib1g-dev libxml2-dev;
elif [ "${{ runner.os }}" = "macOS" ]; then
brew update > /dev/null;
brew outdated autoconf || brew upgrade autoconf;
brew outdated automake || brew upgrade automake;
brew outdated libtool || brew upgrade libtool;
fi
- name: autogen
run: ./autogen.sh
- name: configure
run: ./configure ${{ matrix.config.options }}
- name: make
run: make -j `nproc`
- name: make check
run: make -j `nproc` check
- name: make distcheck
run: make -j `nproc` distcheck
- name: upload debug artifacts
uses: actions/upload-artifact@v2
if: ${{ failure() }}
with:
name: test-logs
path: |
**/tests/test-suite.log
**/tests/samples/*.log
win64-build:

runs-on: windows-latest
defaults:
run:
shell: msys2 {0}
steps:
- name: setup-msys2
uses: msys2/setup-msys2@v2
with:
msystem: MINGW64
path-type: minimal
update: true
install: >-
git
autotools
base-devel
mingw-w64-x86_64-toolchain
mingw-w64-x86_64-libtool
mingw-w64-x86_64-libxml2
mingw-w64-x86_64-zlib
- name: checkout
uses: actions/checkout@v2
- name: autogen
run: sh ./autogen.sh
- name: configure
run: ./configure --enable-debug
- name: make
run: make -j$(nproc)
- name: make check
run: make -j$(nproc) check
- name: make distcheck
run: make -j$(nproc) distcheck
- name: upload debug artifacts
uses: actions/upload-artifact@v2
if: ${{ failure() }}
with:
name: test-logs
path: |
**/tests/test-suite.log
**/tests/samples/*.log
Loading

0 comments on commit 9aadb36

Please sign in to comment.