-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
performance monitor package #2046
Changes from 2 commits
1e99a71
7bd548d
51fc43d
1483702
91ce438
9369efe
21b359f
a79f0c7
fef5daf
b17cb6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ import ( | |
"github.com/threefoldtech/zos/pkg/environment" | ||
"github.com/threefoldtech/zos/pkg/events" | ||
"github.com/threefoldtech/zos/pkg/monitord" | ||
"github.com/threefoldtech/zos/pkg/perf" | ||
"github.com/threefoldtech/zos/pkg/registrar" | ||
"github.com/threefoldtech/zos/pkg/stubs" | ||
"github.com/threefoldtech/zos/pkg/utils" | ||
|
@@ -193,6 +194,17 @@ func action(cli *cli.Context) error { | |
return hypervisor, nil | ||
}) | ||
|
||
log.Info().Msg("Start Perf scheduler") | ||
performanceMonitor := perf.NewPerformanceMonitor("/var/run/redis.sock") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prefer to use shorter var names e.g perfMon |
||
performanceMonitor.InitScheduler() | ||
err = performanceMonitor.RunScheduler(ctx) | ||
if err != nil { | ||
log.Error().Err(err).Msg("fails in scheduler") | ||
} | ||
bus.WithHandler("zos.perf.get", func(ctx context.Context, payload []byte) (interface{}, error) { | ||
return performanceMonitor.Get(ctx, payload) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. handling of user input is part of the This breaks |
||
}) | ||
|
||
// answer calls for dmi | ||
go func() { | ||
if err := bus.Run(ctx); err != nil { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Perf | ||
|
||
Perf is a performance test module than is scheduled to run and cache those tests results in redis which can be retrieved later over RMB call. | ||
|
||
Perf tests are monitored by `noded` service from zos modules. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package perf | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"time" | ||
|
||
"github.com/pkg/errors" | ||
) | ||
|
||
// TestResultData the result test schema | ||
type TestResultData struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now after checking the rest of the files, this should be TaskResult struct only task name and the result |
||
TestName string | ||
TestNumber uint64 | ||
Result interface{} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fields missing json tags |
||
} | ||
|
||
// GetRequest the get request struct | ||
type GetRequest struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this struct can be removed completely and use the task name always to get the task result from the cache |
||
TestName string `json:"test_name"` | ||
TestNumber uint64 `json:"test_number"` | ||
} | ||
|
||
// CacheResult set result in redis | ||
func (pm *PerformanceMonitor) CacheResult(ctx context.Context, resultKey string, resultData TestResultData) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this a |
||
data, err := json.Marshal(resultData) | ||
if err != nil { | ||
return errors.Wrap(err, "Error marshaling data to JSON") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. errors should be lowercased |
||
} | ||
|
||
return pm.RedisClient.Set(resultKey, data, 10*time.Second).Err() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. extract the expiration into an unexported constant There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was gonna say that data should never expire, but instead have the |
||
} | ||
|
||
// GetCachedResult get data from redis | ||
func (pm *PerformanceMonitor) GetCachedResult(resultKey string) (TestResultData, error) { | ||
var res TestResultData | ||
|
||
data, err := pm.RedisClient.Get(resultKey).Result() | ||
if err != nil { | ||
return res, errors.Wrap(err, "Failed getting the cached result") | ||
} | ||
|
||
err = json.Unmarshal([]byte(data), &res) | ||
if err != nil { | ||
return res, errors.Wrap(err, "Failed unmarshal data from json") | ||
} | ||
|
||
return res, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
package perf | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"time" | ||
|
||
"github.com/go-redis/redis" | ||
"github.com/jasonlvhit/gocron" | ||
"github.com/pkg/errors" | ||
) | ||
|
||
// TaskMethod is the task method signature | ||
type TaskMethod func(ctx context.Context) (interface{}, error) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TaskAction or TaskFunc |
||
|
||
type Task struct { | ||
Key string | ||
Method TaskMethod | ||
Interval time.Duration // interval in seconds | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IntervalSeconds There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's wrong to use then a duration of 5 seconds is If you want a type that represents time in Seconds then use a Another way to do this is make duration a private field, then the New function will then take type Test {
dur Duration
}
func NewTest(seconds uint32) Test {
return Test {
dur: seconds * time.Seconds
}
}
|
||
ExecutionCount uint64 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this mean? |
||
} | ||
|
||
// PerformanceMonitor holds the module data | ||
type PerformanceMonitor struct { | ||
Scheduler *gocron.Scheduler | ||
RedisClient *redis.Client | ||
Tasks []Task | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why these fields are public? they open the door to bad design since you will end up accessing an Some fields can become public of course, but usually in Data only types. Not operational types. For example there is noway someone will ever need to access the redis client on the Monitor structure because that is implementation details of how the Monitor caches the result! |
||
} | ||
|
||
// NewPerformanceMonitor returns PerformanceMonitor instance | ||
func NewPerformanceMonitor(redisAddr string) *PerformanceMonitor { | ||
redisClient := redis.NewClient(&redis.Options{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already using a where the url is just |
||
Network: "unix", | ||
Addr: redisAddr, | ||
}) | ||
|
||
scheduler := gocron.NewScheduler() | ||
|
||
return &PerformanceMonitor{ | ||
Scheduler: scheduler, | ||
RedisClient: redisClient, | ||
Tasks: []Task{}, | ||
} | ||
} | ||
|
||
// AddTask a simple helper method to add new tasks | ||
func (pm *PerformanceMonitor) AddTask(taskName string, interval time.Duration, task TaskMethod) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The point behind using A task can be secduled to run every sunday at 12pm, or 3 times per day on 0 8 and 16, etc.. So specifying an interval is not the right way. I also don't like that the task is
|
||
pm.Tasks = append(pm.Tasks, Task{ | ||
Key: taskName, | ||
Method: task, | ||
Interval: interval, | ||
ExecutionCount: 0, | ||
}) | ||
} | ||
|
||
// InitScheduler adds all the test to the scheduler queue | ||
func (pm *PerformanceMonitor) InitScheduler() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove this |
||
pm.AddTask("TestLogging", 5, TestLogging) | ||
} | ||
|
||
// RunScheduler adds the tasks to the corn queue and start the scheduler | ||
func (pm *PerformanceMonitor) RunScheduler(ctx context.Context) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the name RunScheduler shouldn't be introduced, a simple Run or Start should have been enough, now whoever is using this API will need to know it's using go-cron scheduler |
||
for _, task := range pm.Tasks { | ||
err := pm.Scheduler.Every(uint64(task.Interval)).Seconds().Do(func() error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see the added value of using go-cron, simple goroutines and error groups should have been more than enough? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I already commented on the AddTask method regarding exactly that |
||
testResult, err := task.Method(ctx) | ||
if err != nil { | ||
return errors.Wrapf(err, "failed running test: %s", task.Key) | ||
} | ||
|
||
task.ExecutionCount++ | ||
err = pm.CacheResult(ctx, task.Key, TestResultData{ | ||
TestName: task.Key, | ||
TestNumber: task.ExecutionCount, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay now I understand the point of TestNumber, Please remove it from the test data completely, we only care about the latest test results. it doesn't mean much given it gets invalidated every 10 seconds according to the current code, so there's no point in storing that. |
||
Result: testResult, | ||
}) | ||
if err != nil { | ||
return errors.Wrap(err, "failed setting cache") | ||
} | ||
|
||
return nil | ||
}) | ||
if err != nil { | ||
return errors.Wrapf(err, "failed scheduling the job: %s", task.Key) | ||
} | ||
} | ||
|
||
pm.Scheduler.Start() | ||
return nil | ||
} | ||
|
||
// Get handles the request to get data from cache | ||
func (pm *PerformanceMonitor) Get(ctx context.Context, payload []byte) (interface{}, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is so wrong! Why a function in a library knows about an API request format ? A library should only expose a library interface (Say a Get(test)) and that' it An public API interface (http, rmb, or whatever) then can handle user requests and then just make a proper call to the library |
||
var req GetRequest | ||
err := json.Unmarshal([]byte(payload), &req) | ||
if err != nil { | ||
return nil, errors.Wrapf(err, "failed to unmarshal payload: %v", payload) | ||
} | ||
|
||
return pm.GetCachedResult(req.TestName) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package perf | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
"github.com/rs/zerolog/log" | ||
) | ||
|
||
// TestLogging simple helper method that ensure scheduler is working | ||
func TestLogging(ctx context.Context) (interface{}, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's remove this one, there're other logs we can see. |
||
log.Info().Msgf("TestLogging: time is: %v", time.Now()) | ||
return fmt.Sprintf("time is: %v", time.Now()), nil | ||
} | ||
|
||
// should run every 5 min | ||
// ping 12 point, each 3 times and make average result | ||
// iperf 12 grid node with ipv4 | ||
// iperf 12 grid node with ipv6 | ||
// uses the iperf binary | ||
func TestNetworkPerformance(ctx context.Context) (interface{}, error) { | ||
return nil, nil | ||
} | ||
|
||
// should run every 6min | ||
// upload/download a 1 MB file to any point | ||
func TestNetworkLoading(ctx context.Context) (interface{}, error) { | ||
return nil, nil | ||
} | ||
|
||
// should run every 6 hours | ||
// measure cpu, mem, disk performance and usage | ||
// uses some tool that do the mentoring | ||
func TestResourcesPerformance(ctx context.Context) (interface{}, error) { | ||
return nil, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,16 @@ start: | |
bash vm.sh -n node1 -c "runmode=dev farmer_id=$(FARMERID)" | ||
test: | ||
bash vm.sh -n node1 -c "runmode=test farmer_id=$(FARMERID)" | ||
|
||
auth: | ||
@echo "Copying your public ssh to machine rootfs" | ||
mkdir -p overlay/root/.ssh | ||
cp ~/.ssh/id_rsa.pub overlay/root/.ssh/authorized_keys | ||
|
||
net: | ||
@echo "Creating a virtual natted network" | ||
bash ../docs/development/net.sh | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can the script be instead moved here, and docs then refers to it in this location instead of the other way around? |
||
|
||
run: | ||
@echo "Running your node" | ||
sudo ./vm.sh -g -n myzos-01 -c "farmer_id=$(id) version=v3 printk.devmsg=on runmode=dev nomodeset" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use lowercase in the message