-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimise manifest database #7
Comments
look at https://meekro.com/ for database abstraction |
meekro doesnt perform cacheing by itself, but can be used alongside stash Sure! To achieve this, you can modify the code to ensure that read operations fetch data from the cache, and only write operations interact with the database. Here’s how you can adjust the code: Install Stash (if not already done): Configure Stash:
AI-generated code. Review and use carefully. More info on FAQ.
In this setup: getLookupTable: Reads from the cache if available, otherwise fetches from the database and caches the result. |
ive added meekro to the startup. |
Drop meekro, try propel or redbean which has table relationships |
also consider using an ETL library such as flow which has support for Json and xml ### Updated Schema Definition ( <database name="default" defaultIdMethod="native">
<table name="instrument">
<column name="id" type="INTEGER" primaryKey="true" autoIncrement="true" />
<column name="name" type="VARCHAR" size="255" required="true" />
<behavior name="query_cache" />
</table>
<table name="images">
<column name="id" type="INTEGER" primaryKey="true" autoIncrement="true" />
<column name="bucket" type="VARCHAR" size="255" />
<column name="camera_model_component_list" type="VARCHAR" size="255" />
<column name="date_added" type="TIMESTAMP" />
<column name="filter_name" type="VARCHAR" size="255" />
<column name="pds_label_url" type="VARCHAR" size="255" />
<column name="scale_factor" type="VARCHAR" size="255" />
<column name="url_list" type="VARCHAR" size="255" />
<column name="sclk" type="VARCHAR" size="255" />
<column name="attitude" type="VARCHAR" size="255" />
<column name="camera_position" type="VARCHAR" size="255" />
<column name="drive" type="VARCHAR" size="255" />
<column name="camera_model_type" type="VARCHAR" size="255" />
<column name="contributor" type="VARCHAR" size="255" />
<column name="mast_az" type="VARCHAR" size="255" />
<column name="site" type="VARCHAR" size="255" />
<column name="camera_vector" type="VARCHAR" size="255" />
<column name="item_name" type="VARCHAR" size="255" />
<column name="subframe_rect" type="VARCHAR" size="255" />
<column name="utc" type="TIMESTAMP" />
<column name="sol" type="INTEGER" />
<column name="mast_el" type="VARCHAR" size="255" />
<column name="instrument_id" type="INTEGER" required="true" />
<foreign-key foreignTable="instrument">
<reference local="instrument_id" foreign="id" />
</foreign-key>
</table>
</database> ### Updated Ingestion Script with Caching: <?php
require_once 'propel.php';
use YourNamespace\InstrumentQuery;
use YourNamespace\Instrument;
use YourNamespace\Images;
// Load and decode JSON file
$json = file_get_contents('path/to/your/json/file.json');
$data = json_decode($json, true);
// Prepare a list of instruments to be inserted
$instrumentQuery = InstrumentQuery::create();
$instruments = [];
foreach ($data['images'] as $imageData) {
$instrumentName = $imageData['instrument'];
if (!isset($instruments[$instrumentName])) {
$instrument = $instrumentQuery
->setQueryKey('find_instrument_by_name')
->findOneByName($instrumentName);
if (!$instrument) {
$instrument = new Instrument();
$instrument->setName($instrumentName);
$instrument->save();
}
$instruments[$instrumentName] = $instrument->getId();
}
}
// Insert images data
foreach ($data['images'] as $imageData) {
$image = new Images();
$image->setBucket($imageData['bucket']);
$image->setCameraModelComponentList($imageData['cameraModelComponentList']);
$image->setDateAdded(new \DateTime($imageData['dateAdded']));
$image->setFilterName($imageData['filterName']);
$image->setPdsLabelUrl($imageData['pdsLabelUrl']);
$image->setScaleFactor($imageData['scaleFactor']);
$image->setUrlList($imageData['urlList']);
$image->setSclk($imageData['sclk']);
$image->setAttitude($imageData['attitude']);
$image->setCameraPosition($imageData['cameraPosition']);
$image->setDrive($imageData['drive']);
$image->setCameraModelType($imageData['cameraModelType']);
$image->setContributor($imageData['contributor']);
$image->setMastAz($imageData['mastAz']);
$image->setSite($imageData['site']);
$image->setCameraVector($imageData['cameraVector']);
$image->setItemName($imageData['itemName']);
$image->setSubframeRect($imageData['subframeRect']);
$image->setUtc(new \DateTime($imageData['utc']));
$image->setSol($imageData['sol']);
$image->setMastEl($imageData['mastEl']);
$image->setInstrumentId($instruments[$imageData['instrument']]);
$image->save();
}
echo "Data ingestion complete!"; |
theres a lot of code needed to manage the instrument table. copilot suggests using eloquent: Here’s a simplified example using Eloquent:
Key Advantages:
Using Eloquent should reduce the amount of code required and make it easier to manage your data. If you have any other questions or need further assistance, feel free to ask! 🌟🚀 |
The database contains thousands of duplicate strings for mission, sol, instrument, type.
The database would be a lot smaller if integer indexes were used
The text was updated successfully, but these errors were encountered: