forked from Dirbaio/gominer
-
Notifications
You must be signed in to change notification settings - Fork 79
/
calibrate.go
126 lines (106 loc) · 3.41 KB
/
calibrate.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// Copyright (c) 2016-2023 The Decred developers.
//go:build !cuda
// +build !cuda
package main
import (
"math"
"time"
"unsafe"
"github.com/decred/gominer/cl"
"github.com/decred/gominer/work"
)
// getKernelExecutionTime returns the kernel execution time for a device.
func (d *Device) getKernelExecutionTime(globalWorksize uint32) (time.Duration,
error) {
d.work = work.Work{}
minrLog.Tracef("Started DEV #%d: %s for kernel execution time fetch",
d.index, d.deviceName)
outputData := make([]uint32, outputBufferSize)
var status cl.CL_int
// arg 0: pointer to the buffer
obuf := d.outputBuffer
status = cl.CLSetKernelArg(d.kernel, 0,
cl.CL_size_t(unsafe.Sizeof(obuf)),
unsafe.Pointer(&obuf))
if status != cl.CL_SUCCESS {
return time.Duration(0), clError(status, "CLSetKernelArg")
}
// args 1..8: midstate
for i := 0; i < 8; i++ {
ms := d.midstate[i]
status = cl.CLSetKernelArg(d.kernel, cl.CL_uint(i+1),
uint32Size, unsafe.Pointer(&ms))
if status != cl.CL_SUCCESS {
return time.Duration(0), clError(status, "CLSetKernelArg")
}
}
// args 9..20: lastBlock except nonce
i2 := 0
for i := 0; i < 12; i++ {
if i2 == work.Nonce0Word {
i2++
}
lb := d.lastBlock[i2]
status = cl.CLSetKernelArg(d.kernel, cl.CL_uint(i+9),
uint32Size, unsafe.Pointer(&lb))
if status != cl.CL_SUCCESS {
return time.Duration(0), clError(status, "CLSetKernelArg")
}
i2++
}
// Clear the found count from the buffer
status = cl.CLEnqueueWriteBuffer(d.queue, d.outputBuffer,
cl.CL_FALSE, 0, uint32Size, unsafe.Pointer(&zeroSlice[0]),
0, nil, nil)
if status != cl.CL_SUCCESS {
return time.Duration(0), clError(status, "CLEnqueueWriteBuffer")
}
// Execute the kernel and follow its execution time.
currentTime := time.Now()
var globalWorkSize [1]cl.CL_size_t
globalWorkSize[0] = cl.CL_size_t(globalWorksize)
var localWorkSize [1]cl.CL_size_t
localWorkSize[0] = localWorksize
status = cl.CLEnqueueNDRangeKernel(d.queue, d.kernel, 1, nil,
globalWorkSize[:], localWorkSize[:], 0, nil, nil)
if status != cl.CL_SUCCESS {
return time.Duration(0), clError(status, "CLEnqueueNDRangeKernel")
}
// Read the output buffer.
cl.CLEnqueueReadBuffer(d.queue, d.outputBuffer, cl.CL_TRUE, 0,
uint32Size*outputBufferSize, unsafe.Pointer(&outputData[0]), 0,
nil, nil)
if status != cl.CL_SUCCESS {
return time.Duration(0), clError(status, "CLEnqueueReadBuffer")
}
elapsedTime := time.Since(currentTime)
minrLog.Tracef("DEV #%d: Kernel execution to read time for work "+
"size calibration: %v", d.index, elapsedTime)
return elapsedTime, nil
}
// calcWorkSizeForMilliseconds calculates the correct worksize to achieve
// a device execution cycle of the passed duration in milliseconds.
func (d *Device) calcWorkSizeForMilliseconds(ms int) (uint32, error) {
workSize := uint32(1 << 10)
timeToAchieve := time.Duration(ms) * time.Millisecond
for {
execTime, err := d.getKernelExecutionTime(workSize)
if err != nil {
return 0, err
}
// If we fail to go above the desired execution time, double
// the work size and try again.
if execTime < timeToAchieve && workSize < 1<<30 {
workSize <<= 1
continue
}
// We're passed the desired execution time, so now calculate
// what the ideal work size should be.
adj := float64(workSize) * (float64(timeToAchieve) / float64(execTime))
adj /= 256.0
adjMultiple256 := uint32(math.Ceil(adj))
workSize = adjMultiple256 * 256
break
}
return workSize, nil
}