-
Notifications
You must be signed in to change notification settings - Fork 5
/
CIC4H64.v
416 lines (416 loc) · 12.5 KB
/
CIC4H64.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
//
// CIC4H64.v - 4-stage Cascaded Integrator Comb Filter
//
// (C) Copyright 2007-2011 John B. Stephensen
//
// This Verilog source file and all its derivatives are licensed only for
// personal non-profit educational use in the Amateur Radio Service and
// the license is not transferrable. The information is provided as-is for
// experimental purposes and the author does not warranty its freedom
// from defects or its suitability for any specific application.
//
// This module implements a switchable CIC interpolator or CIC decimator. Interpolation
// and decimation by 8-2896 is possible. RDI and TDO are the input from the mixer and
// output to the mixer for receiving and transmitting, respectively. Data is sampled
// synchronous to SCLK and multiplexed at the DCLK rate. RDO and TDI are decimated data
// for receiving and input data for interpolation for transmitting, respectively. TIE
// requests new data on TDI. One X or Y sample is present on each DCLK cycle for TDO.
// SCLK high indicates X and SCLK low indicates Y.
//
// One 64-bit integrator, three 56-bit integrators and a 4-stage 24-bit comb filter are used
// in the receive path. Filter gain is the decimation factor to the fourth power, so it is
// 4.1 x 10^3 to 3.9 x 10^13 for decimation factors from 8-2580. 46 extra bits are provided
// and full-scale output can be reached by adjusting the input gain to 2^46/(filter gain).
//
// A 4-stage 24-bit comb filter, one 64-bit integrator and three 52-bit integrators are used
// in the transmit path. Filter gain is the decimation factor to the third power, so filter
// gain is 5.1 x 10^2 to 1.6 x 10^10 for decimation factors from 8-2580. 42 extra bits are
// provided so full-scale output can be reached by adjusting the input gain to 2^42/(filter
// gain). Note that the multiplication factor cannot exceed 4 to prevent clipping in the
// comb filter.
//
// One configuration register controls gain with 5 bits of binary exponent, 4 bits of
// integer gain and 6 bits of fractional gain. The other contains the 12-bit decimation
// or interpolation factor. The multiplier gain should be between 1 and 16 for receiving
// and be between 1 and 4 for transmitting. The shift value is 0-30 for reception and
// 8-31 for transmission. Thus, an 18-bit input can be adjusted to use the lower 18 to 51
// bits of the 64-bit accumulator input.
//
// Note that several components must be instantiated instead of inferred to prevent
// optimization that can slow operation and/or disrupt I/O timing. The upper bit of the
// receiver output is dropped as the image output of the mixer is removed by the CIC filter.
// 16 DCLK delay. SCLK is 1/2 DCLK frequency with positive-going edges aligned.
//
// 600 LUTs, 1044 registers and 1 DSP48 are used. The maximum DCLK frequency is 241 MHz.
//
// History:
// 9-16-11 create from CIC4H72
// 11-17-11 reduce counter delay by using qi <= xmt^iq
// 12-9-11 separate configuration clock (was 600 LUT, 1034 reg.)
//
module CIC4H64(
input [17:0] rdi, // receiver data input - serial X and Y
input [17:0] tdix,tdiy, // tranmitter data input - parallel X and Y
output tie, // get new transmitter input
output [17:0] rdo,tdo, // data outputs - serial X and Y
output rov, // valid receiver output
output ovf, // receiver output overflow
// input sclk, // sample clock
input dclk, // data clock
input iq, // data tag (I=1, Q=0)
input rst, // master reset (sync. to SCLK)
input xmt, // 0=receive (RDI->RDO), 1=transmit (TDI->TDO)
input [15:0] cin, // gain and decimation set (sync. to DCLK)
input [1:0] cwr, // write configuration word (0=gain, 1=dec/int)
input cclk // config. clock
);
// internal signals
reg [11:0] n; // decimation/interpolation factor
wire [17:0] mux0; // multiplier input multiplexer
wire [23:0] mux1; // integrator input multiplexer
reg rstimux; // reset signal for mux1
wire [47:0] prod; // DSP48 output
reg [4:0] ge; // gain exponent
reg [9:0] gm; // gain mantissa
reg [25:0] shift1; // shift 0,1,2,3 bits (24->27)
reg [37:0] shift4; // shift 0,4,8,12 bits (27->39)
reg [53:0] shift16; // shift 0,16 bits (39->55)
reg s1,s4,s16; // sign bits
wire [31:0] acc0l; // LS 32 bits from shifter
reg [22:0] acc0h; // delayed MS 23 bits from shifter
wire zero;
wire s0; // 6 sign bits
reg [23:0] rnd; // rounded result
reg [17:0] clp; // clipped result
wire sr,sc; // rounder and clipper input sign bits
wire [31:0] acc1ah,acc2ah,acc3ah,acc4ah,acc1al; // integrator adders
wire [23:0] acc2al,acc3al,acc4al;
reg [31:0] acc1bh,acc2bh,acc3bh,acc4bh,acc1bl; // chan. 2 storage
reg [23:0] acc2bl,acc3bl,acc4bl;
wire c1,c2,c3,c4;
wire [23:0] dif0; // comb filter input
reg [23:0] dly1a,dly1b,dly2a,dly2b,dly3a,dly3b,dly4a,dly4b; // comb filter storage
reg [23:0] dif1,dif2,dif3,dif4; // comb filter subtractors
reg [11:0] i; // sample counter
wire tc; // terminal count
reg e1,e2,e3,e4,e5,e6,e7,e8; // differentiator data valid delay
wire ovflo; // receiver overflow detection logic
reg ovff; // overflow latch
// gain control and decimation/interpolation configuration registers
always @ (posedge cclk)
begin
if (cwr[0]) ge <= cin[14:10]; // gain binary exponent
if (cwr[0]) gm <= cin[9:0]; // gain mantissa
if (cwr[1]) n <= cin[11:0]; // decimation/interpolation factor
end
// Multiplier input multiplexer - multiplex transmittter input
MUX4X18S mmux (
.D0(rdi), // already multiplexed with X/Y while SCLK high/low
.D1(rdi),
.D2(tdiy), // data following negative-going edge of SCLK
.D3(tdix), // data following positive-going edge of SCLK
.S({xmt,iq}), // IQ same phase as SCLK
.Q(mux0),
.CE(1'b1),
.CLK(dclk),
.RST(rst)
);
// Gain control - 3 clock delay
// input synchronized to negative-going edges of SCLK
// output synchronized to positive-going edges of SCLK
DSP48A1 #(
.A0REG(0),
.A1REG(1),
.B0REG(0),
.B1REG(1),
.CREG(0),
.DREG(0),
.MREG(1),
.PREG(1),
.CARRYINSEL("OPMODE5"),
.CARRYINREG(0),
.CARRYOUTREG(0),
.OPMODEREG(0),
.RSTTYPE("SYNC")
) mult (
.OPMODE(8'b00000001),
.A(mux0),
.B({8'h00,gm}),
.BCOUT(),
.C(48'h000000),
.D(18'b000000000000000000),
.M(),
.CARRYIN(1'b0),
.CARRYOUT(),
.CARRYOUTF(),
.PCIN(48'h000000),
.PCOUT(),
.P(prod),
.CLK(dclk),
.CEA(1'b1),
.CEB(1'b1),
.CEC(1'b1),
.CED(1'b1),
.CEM(1'b1),
.CEP(1'b1),
.CECARRYIN(1'b1),
.CEOPMODE(1'b1),
.RSTA(1'b0),
.RSTB(1'b0),
.RSTC(1'b0),
.RSTD(1'b0),
.RSTM(1'b0),
.RSTP(1'b0),
.RSTCARRYIN(1'b0),
.RSTOPMODE(1'b0)
);
// Integrator input multiplexer - 1 clock delay
// zero output between samples on transmit
// select differentiator output (xmt) or multiplier output (rcv)
// output synchronized to negative-going edges of SCLK due to odd number of registers
// Instantiate instead of inferring to prevent optimization
MUX2X24S imux (
.D0(prod[29:6]), // receive mult = 0-16
.D1(dif4[23:0]), // transmit
.S(xmt),
.Q(mux1),
.CLK(dclk),
.CE(1'b1),
.RST(rstimux)
// .RST(xmt & ~e6)
);
// Integrator input shifter: 24-bit input and 48-bit output with 3 clock delay
// The first multiplexer shifts in 1 bit increments, the second in 4 bit increments
// and the third in 16-bit increments. The most significant bit is not shifted.
always @ (posedge dclk)
begin
case (ge[1:0])
2'b00: shift1 <= {mux1[23],mux1[23],mux1[23],mux1[22:0]};
2'b01: shift1 <= {mux1[23],mux1[23],mux1[22:0],1'b0};
2'b10: shift1 <= {mux1[23],mux1[22:0],2'b00};
2'b11: shift1 <= {mux1[22:0],3'b00};
default: shift1 <= 26'hxxxxxxx;
endcase
s1 <= mux1[23];
case (ge[3:2])
2'b00: shift4 <= {s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,shift1};
2'b01: shift4 <= {s1,s1,s1,s1,s1,s1,s1,s1,shift1,4'h0};
2'b10: shift4 <= {s1,s1,s1,s1,shift1,8'h00};
2'b11: shift4 <= {shift1,12'h000};
default: shift4 <= 38'hxxxxxxxxxx;
endcase
s4 <= s1;
case (ge[4])
1'b0: shift16 <= {s4,s4,s4,s4,s4,s4,s4,s4,s4,s4,s4,s4,s4,s4,s4,s4,shift4};
1'b1: shift16 <= {shift4,16'h0000};
default: shift16 <= 54'hxxxxxxxxxxxxxx;
endcase
s16 <= s4;
acc0h <= {s16,shift16[53:32]}; // MS 32 bits
end
assign acc0l = shift16[31:0]; // LS 32 bits
assign s0 = acc0h[22]; // 9 extra sign bits
// 4 pipelined integrators with dual accumulators
assign zero = rst;
// Integrator 1
add32s add1l (
.A(acc0l), // input
.B(acc1bl),
.CI(zero),
.CO(c1),
.Q(acc1al),
.CLK(dclk),
.CE(1'b1),
.SCLR(rst)
);
add32s add1h (
.A({s0,s0,s0,s0,s0,s0,s0,s0,s0,acc0h}),
.B(acc1bh),
.CI(c1),
.CO(),
.Q(acc1ah),
.CLK(dclk),
.CE(1'b1),
.SCLR(rst)
);
// Integrator 2 - trim bottom 8 bits
add24s add2l (
.A(acc1al[31:8]),
//add32s add2l (
// .A(acc1al),
.B(acc2bl),
.CI(zero),
.CO(c2),
.Q(acc2al),
.CLK(dclk),
.CE(1'b1),
.SCLR(rst)
);
add32s add2h (
.A(acc1ah),
.B(acc2bh),
.CI(c2),
.CO(),
.Q(acc2ah),
.CLK(dclk),
.CE(1'b1),
.SCLR(rst)
);
// Integrator 3
add24s add3l (
//add32s add3l (
.A(acc2al),
.B(acc3bl),
.CI(zero),
.CO(c3),
.Q(acc3al),
.CLK(dclk),
.CE(1'b1),
.SCLR(rst)
);
add32s add3h (
.A(acc2ah),
.B(acc3bh),
.CI(c3),
.CO(),
.Q(acc3ah),
.CLK(dclk),
.CE(1'b1),
.SCLR(rst)
);
// Integrator 4
add24s add4l (
//add32s add4l (
.A(acc3al),
.B(acc4bl),
.CI(zero),
.CO(c4),
.Q(acc4al),
.CLK(dclk),
.CE(1'b1),
.SCLR(rst)
);
add32s add4h (
.A(acc3ah),
.B(acc4bh),
.CI(c4),
.CO(),
.Q(acc4ah),
.CLK(dclk),
.CE(1'b1),
.SCLR(rst)
);
// delay provides 2nd channel in each accumulator
always @ (posedge dclk)
begin
acc1bl <= acc1al;
acc2bl <= acc2al;
acc3bl <= acc3al;
acc4bl <= acc4al;
acc1bh <= acc1ah;
acc2bh <= acc2ah;
acc3bh <= acc3ah;
acc4bh <= acc4ah;
end
// sample counter and output valid delay
// odd number of delays for transmit (IQ=1, use negedge sclk)
// even number of delays for receive (IQ=0, use posedge sclk)
reg qi;
always @ (posedge dclk)
begin
qi <= (xmt ^ iq);
if (rst|(tc & qi)) i <= n; // interpolation/decimation counter
else if (qi) i <= i - 1'b1;
end
assign tc = (i == 0); // i is zero
//assign tc = ~|i; // i is zero
//always @ (posedge dclk)
//begin
// if (rst|(tc & (xmt ^ iq))) i <= 0; // interpolation/decimation counter
// else if (xmt ^ iq) i <= i + 1'b1;
//end
//assign tc = (i == n);
// delay terminal count to enable differentiators
// even-numbered taps synchronized to SCLK positive-going edge
// odd-numbered taps synchronized to SCLK negative-going edge
// also generates reset signal for integrator input multiplexer
always @ (posedge dclk)
begin
e1 <= tc;
e2 <= e1;
e3 <= e2;
e4 <= e3;
e5 <= e4;
e6 <= e5;
rstimux <= xmt & ~e5;
e7 <= e6;
e8 <= e7;
end
assign tie = e2; // also use to request transmitter input samples
// Differentiator input multiplexer
// Instantiate instead of inferring to prevent delay-increasing optimization
// RX: output synchronized to SCLK due to even number of registers following input port
// TX: output synchronized to ~SCLK due to odd number of registers following input port
MUX2X24S dmux (
.D0(acc4ah[31:8]),// receive: integrator output
.D1(prod[29:6]), // transmit: multipler output
.S(xmt),
.Q(dif0),
.CLK(dclk),
.CE(1'b1),
.RST(1'b0)
);
// Comb Filter
// 4 differentiators with 2 clock delay on negative input to support 2 channels
always @ (posedge dclk)
begin
if (rst) dif1 <= 0;
else if (e2) dif1 <= dif0 - dly1b;
if (rst) dly1a <= 0;
else if (e2) dly1a <= dif0;
if (rst) dly1b <= 0;
else if (e2) dly1b <= dly1a;
if (rst) dif2 <= 0;
else if (e3) dif2 <= dif1 - dly2b;
if (rst) dly2a <= 0;
else if (e3) dly2a <= dif1;
if (rst) dly2b <= 0;
else if (e3) dly2b <= dly2a;
if (rst) dif3 <= 0;
else if (e4) dif3 <= dif2 - dly3b;
if (rst) dly3a <= 0;
else if (e4) dly3a <= dif2;
if (rst) dly3b <= 0;
else if (e4) dly3b <= dly3a;
if (rst) dif4 <= 0;
else if (e5) dif4 <= dif3 - dly4b;
if (rst) dly4a <= 0;
else if (e5) dly4a <= dif3;
if (rst) dly4b <= 0;
else if (e5) dly4b <= dly4a;
end
// truncate transmitter output
assign tdo = acc4ah[27:10]; // drop upper 4 and lower 10 bits
// round receiver output to 19 bits by adding 0.100000000 for positive numbers
// or 0.011111111 for negative numbers and dropping lower 5 bits
assign sr = dif4[23];
always @ (posedge dclk)
rnd <= dif4 + {19'b0000000000000000000,~sr,sr,sr,sr,sr};
// drop MSB, detect positive or negative overflow beyond 18 bits and saturate output
assign sc = rnd[23]; // most significant or sign bit
assign ovflo = (rnd[23] ^ rnd[22]); // overflow
always @ (posedge dclk)
clp <= ovflo ?
{sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,~sc,1'b1}
: rnd[22:5]; // drop MSB as image frequency has been filtered out
// check for overflow
always @ (posedge dclk)
begin
if (rst) ovff <= 0;
else if (e7) ovff <= ovflo;
end
// connect receiver output
assign rdo = clp;
assign rov = e8;
assign ovf = ovff;
endmodule