Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: interface #29

Merged
merged 28 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
77f5821
refactor: renamed model to rimo
mathisdrn Aug 28, 2023
92c28cc
feat: defined interface
mathisdrn Aug 28, 2023
f714bff
refactor: .gitignore
mathisdrn Aug 28, 2023
6f6bd24
feat: added FileWriter interface
mathisdrn Aug 29, 2023
bfc6fba
feat: fileWriter interface with test
mathisdrn Aug 30, 2023
c23f3ad
feat: loader for JSONL
mathisdrn Aug 30, 2023
4b19cca
test: rimo interface test
mathisdrn Aug 30, 2023
00e39ab
feat: added filesReader interface
mathisdrn Aug 30, 2023
82f1dbb
test: FilesReader with 2 files
mathisdrn Aug 30, 2023
b4c6b28
refactor: re adding to model package to avoid circular dependency
mathisdrn Aug 31, 2023
257d7ef
feat(rimo): driven_test.go
mathisdrn Aug 31, 2023
978c352
refactor(rimo): improve Writer name
mathisdrn Aug 31, 2023
816fdda
refactor: renamed testWriter() and added GetBase() method
mathisdrn Aug 31, 2023
4d9af3c
feat: TestWriter improv (similar to prev commit)
mathisdrn Aug 31, 2023
88e9b64
refactor: minor typo
mathisdrn Aug 31, 2023
5ab51c0
test: RIMO pipeline infra_test.go
mathisdrn Aug 31, 2023
adaa585
refactor: added cobra command using interface
mathisdrn Aug 31, 2023
f6e473c
refactor: more explicit variable naming
mathisdrn Aug 31, 2023
275d66e
refactor: removed unusued function
mathisdrn Aug 31, 2023
9105812
refactor: added test to compare pipeline with expected output
mathisdrn Aug 31, 2023
979b9df
refactor: fix : giving filesReader proper filepath
mathisdrn Aug 31, 2023
297b18e
refactor: almost work as expected
mathisdrn Sep 15, 2023
1480c83
refactor: updated schema from rimo pkg to model pkg
mathisdrn Sep 15, 2023
5e562b2
refactor: work as expected
mathisdrn Sep 15, 2023
d89b38a
fix: remove old analyse command
Sep 27, 2023
7a66d45
chore: remove dead code
Sep 27, 2023
6b4220f
docs: add GPLv3 license header in new files
Sep 27, 2023
a3f1e1f
fix: remove output test from git
Sep 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/rimo/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (

"github.com/cgi-fr/rimo/pkg/analyse"
"github.com/cgi-fr/rimo/pkg/io"
"github.com/cgi-fr/rimo/pkg/model"
"github.com/cgi-fr/rimo/pkg/rimo"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -59,7 +59,7 @@ func main() { //nolint:funlen
Short: "Return rimo jsonschema",
Args: cobra.NoArgs,
Run: func(cmd *cobra.Command, args []string) {
jsonschema, err := model.GetJSONSchema()
jsonschema, err := rimo.GetJSONSchema()
if err != nil {
os.Exit(1)
}
Expand Down
64 changes: 64 additions & 0 deletions internal/infra/fileWriter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package infra
youen marked this conversation as resolved.
Show resolved Hide resolved

import (
"fmt"
"os"

"github.com/cgi-fr/rimo/pkg/model"
"gopkg.in/yaml.v3"
)

// Terminal writter interface

type StdoutWriter struct{}

func StdoutWriterFactory() *StdoutWriter {
writer := StdoutWriter{}

return &writer
}

func (w *StdoutWriter) Export(base *model.Base) error {
fmt.Printf("%v\n", base)

return nil
}

// YAML Writter interface

type YAMLWriter struct {
outputPath string
}

func YAMLWriterFactory(filepath string) *YAMLWriter {
writer := YAMLWriter{
outputPath: filepath,
}

return &writer
}

// Write a YAML file from RIMO base at outputPath.
func (w *YAMLWriter) Export(base *model.Base) error {
err := ValidateOutputPath(w.outputPath)
if err != nil {
return fmt.Errorf("failed to validate file path: %w", err)
}

outputFile, err := os.Create(w.outputPath)
if err != nil {
return fmt.Errorf("failed to create output file: %w", err)
}
defer outputFile.Close()

// Encode Base to YAML.
encoder := yaml.NewEncoder(outputFile)
defer encoder.Close()

err = encoder.Encode(base)
if err != nil {
return fmt.Errorf("failed to encode Base to YAML: %w", err)
}

return nil
}
83 changes: 83 additions & 0 deletions internal/infra/fileWriter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (C) 2023 CGI France
//
// This file is part of RIMO.
//
// RIMO is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// RIMO is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with RIMO. If not, see <http://www.gnu.org/licenses/>.

package infra_test

import (
"os"
"path/filepath"
"testing"

"github.com/cgi-fr/rimo/internal/infra"
"github.com/cgi-fr/rimo/pkg/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

const (
dataDir = "../../testdata/"
)

func TestWriterYAML(t *testing.T) {
t.Parallel()

base := model.Base{
Name: "databaseName",
Tables: []model.Table{
{
Name: "tableName",
Columns: []model.Column{},
},
},
}

// Create a temporary directory for the test
tempDir, err := os.MkdirTemp(dataDir, "export_test")
require.NoError(t, err)

defer os.RemoveAll(tempDir)

// Create a temporary file for the output
outputFile := filepath.Join(tempDir, "output.yaml")

// Create the writer
writer := infra.YAMLWriterFactory(outputFile)

err = writer.Export(&base)
require.NoError(t, err)

// Read the output file and check its contents
file, err := os.Open(outputFile)
require.NoError(t, err)

defer file.Close()

stat, err := file.Stat()
require.NoError(t, err)

outputData := make([]byte, stat.Size())
_, err = file.Read(outputData)
require.NoError(t, err)

expectedData := `database: databaseName
tables:
- name: tableName
columns: []
`

assert.Equal(t, expectedData, string(outputData))
}
156 changes: 156 additions & 0 deletions internal/infra/filesReader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package infra
youen marked this conversation as resolved.
Show resolved Hide resolved

import (
"errors"
"fmt"
)

// Errors declaration.
var (
ErrInvalidFilePath = errors.New("failed to validate path")
ErrNoFilePath = errors.New("no file path provided")
ErrNonUniqueBase = errors.New("base name is not unique")
)

// FilesReader can read multiple type of file and feed data to rimo.
// FilesReader is responsible of :
// - BaseName() return the name of the base
// - Next() return true if there is a next value to read
// - Value() return the value of the current column, the name of the column and the name of the table
// Interface itself with a Loader interface. Which currently only supports YAML files.
// Loader and FilesReader can be initialized with LoaderFactory and FilesReaderFactory.
type FilesReader struct {
filepathList []string
loader JSONLinesLoader // responsible of loading a file format
baseName string
// variable for looping over columns
fileIndex int
colNameMapIndex map[int]string // map of column name by index
colIndex int // value of current column index
// given by Value()
dataMap map[string][]interface{}
tableName string // filled by FilesReader
}

// Constructor for FilesReader.
func FilesReaderFactory(filepathList []string) (*FilesReader, error) {
var err error

// Process inputDirList
if len(filepathList) == 0 {
return nil, ErrNoFilePath
}

for _, path := range filepathList {
err := ValidateFilePath(path)
if err != nil {
return nil, ErrInvalidFilePath
}
}

// Initialize FilesReader
var filesReader FilesReader
filesReader.filepathList = filepathList
filesReader.fileIndex = -1

filesReader.baseName, err = filesReader.isBaseUnique()
if err != nil {
return nil, fmt.Errorf("base is not unique: %w", err)
}

// Use of JSONLinesLoader
filesReader.loader = JSONLinesLoader{}

return &filesReader, nil
}

// Reader interface implementation

func (r *FilesReader) BaseName() string {
return r.baseName
}

func (r *FilesReader) Next() bool {
// First call to Next()
if r.fileIndex == -1 {
r.fileIndex = 0
r.colIndex = 0

return true
}

// Current file contain column left to process.
if r.colIndex < len(r.dataMap) {
r.colIndex++
}

// Current file contain no columns left to process.
if r.colIndex == len(r.dataMap) {
// Current file is last file.
if r.fileIndex == len(r.filepathList)-1 {
return false
}
// There is a next file.
r.fileIndex++
r.colIndex = 0
}

return true
}

// Charger les fichiers un à un dans une dataMap.
// Retourne les valeurs d'une colonne, son nom et le nom de table.
func (r *FilesReader) Value() ([]interface{}, string, string, error) {
var err error

// colIndex = 0 : new file to load
if r.colIndex == 0 {
filepath := r.filepathList[r.fileIndex]

// Extract table name from file name
_, r.tableName, err = ExtractName(filepath)
if err != nil {
return nil, "", "", fmt.Errorf("failed to extract table name: %w", err)
}

// Load file in dataMap
r.dataMap, err = r.loader.Load(r.filepathList[r.fileIndex])
if err != nil {
panic(err)
}

// Create a map of column name by index
r.colNameMapIndex = make(map[int]string, 0)
i := 0

for k := range r.dataMap {
r.colNameMapIndex[i] = k
i++
}
}

// colIndex = n : current file have been partially processed

// return values, colName, tableName
return r.dataMap[r.colNameMapIndex[r.colIndex]], r.colNameMapIndex[r.colIndex], r.tableName, nil
}

func (r *FilesReader) isBaseUnique() (string, error) {
baseName, _, err := ExtractName(r.filepathList[0])
if err != nil {
return "", err
}

for _, path := range r.filepathList {
baseNameI, _, err := ExtractName(path)
if err != nil {
return "", err
}

if baseName != baseNameI {
return "", fmt.Errorf("%w : %s and %s", ErrNonUniqueBase, baseName, baseNameI)
}
}

return baseName, nil
}
66 changes: 66 additions & 0 deletions internal/infra/filesReader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package infra_test

import (
"fmt"
"path/filepath"
"testing"

"github.com/cgi-fr/rimo/internal/infra"
"github.com/stretchr/testify/assert"
)

func TestReader(t *testing.T) {
t.Parallel()

inputFile := filepath.Join(dataDir, "data0/data_input.jsonl")

reader, err := infra.FilesReaderFactory([]string{inputFile})
assert.NoError(t, err)

// Assertions.

actualBaseName := reader.BaseName()
expectedBaseName := "data"
assert.Equal(t, expectedBaseName, actualBaseName)

expectedTableName := "input"
expectedDataMap := map[string][]interface{}{
"address": {"PSC", "095", "06210"},
"age": {nil, nil, float64(61)},
"major": {true, false, true},
"empty": {nil, nil, nil},
}

for reader.Next() {
values, colName, tableName, err := reader.Value()
if err != nil {
assert.NoError(t, err)
}

expectedColData, ok := expectedDataMap[colName]
if !ok {
assert.Fail(t, "column name not found : %s", colName)
}

assert.Equal(t, expectedColData, values)
assert.Equal(t, expectedTableName, tableName)
}
}

func TestReaderMultipleFiles(t *testing.T) {
t.Parallel()

inputFile := filepath.Join(dataDir, "data0/data_input.jsonl")
inputFile2 := filepath.Join(dataDir, "data0/data_input2.jsonl")
reader, err := infra.FilesReaderFactory([]string{inputFile, inputFile2})
assert.NoError(t, err)

for reader.Next() {
youen marked this conversation as resolved.
Show resolved Hide resolved
values, colName, tableName, err := reader.Value()
if err != nil {
assert.NoError(t, err)
}

fmt.Printf("%s.%s: %v\n", tableName, colName, values)
}
}
Loading