//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
// //
|
// //
|
// Barrel Shifter for Amber 2 Core //
|
// Barrel Shifter for Amber 2 Core //
|
// //
|
// //
|
// The design is optimized for Altera family of FPGAs, //
|
// The design is optimized for Altera family of FPGAs, //
|
// and it can be used directly or adapted other N-to-1 LUT //
|
// and it can be used directly or adapted other N-to-1 LUT //
|
// FPGA platforms. //
|
// FPGA platforms. //
|
// //
|
// //
|
// This file is part of the Amber project //
|
// This file is part of the Amber project //
|
// http://www.opencores.org/project,amber //
|
// http://www.opencores.org/project,amber //
|
// //
|
// //
|
// Description //
|
// Description //
|
// Provides 32-bit shifts LSL, LSR, ASR and ROR //
|
// Provides 32-bit shifts LSL, LSR, ASR and ROR //
|
// //
|
// //
|
// Author(s): //
|
// Author(s): //
|
// - Dmitry Tarnyagin, dmitry.tarnyagin@lockless.no //
|
// - Dmitry Tarnyagin, dmitry.tarnyagin@lockless.no //
|
// //
|
// //
|
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
// //
|
// //
|
// Copyright (C) 2010-2013 Authors and OPENCORES.ORG //
|
// Copyright (C) 2010-2013 Authors and OPENCORES.ORG //
|
// //
|
// //
|
// This source file may be used and distributed without //
|
// This source file may be used and distributed without //
|
// restriction provided that this copyright statement is not //
|
// restriction provided that this copyright statement is not //
|
// removed from the file and that any derivative work contains //
|
// removed from the file and that any derivative work contains //
|
// the original copyright notice and the associated disclaimer. //
|
// the original copyright notice and the associated disclaimer. //
|
// //
|
// //
|
// This source file is free software; you can redistribute it //
|
// This source file is free software; you can redistribute it //
|
// and/or modify it under the terms of the GNU Lesser General //
|
// and/or modify it under the terms of the GNU Lesser General //
|
// Public License as published by the Free Software Foundation; //
|
// Public License as published by the Free Software Foundation; //
|
// either version 2.1 of the License, or (at your option) any //
|
// either version 2.1 of the License, or (at your option) any //
|
// later version. //
|
// later version. //
|
// //
|
// //
|
// This source is distributed in the hope that it will be //
|
// This source is distributed in the hope that it will be //
|
// useful, but WITHOUT ANY WARRANTY; without even the implied //
|
// useful, but WITHOUT ANY WARRANTY; without even the implied //
|
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
|
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
|
// PURPOSE. See the GNU Lesser General Public License for more //
|
// PURPOSE. See the GNU Lesser General Public License for more //
|
// details. //
|
// details. //
|
// //
|
// //
|
// You should have received a copy of the GNU Lesser General //
|
// You should have received a copy of the GNU Lesser General //
|
// Public License along with this source; if not, download it //
|
// Public License along with this source; if not, download it //
|
// from http://www.opencores.org/lgpl.shtml //
|
// from http://www.opencores.org/lgpl.shtml //
|
// //
|
// //
|
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
|
|
|
|
module a23_barrel_shift_fpga (
|
module a23_barrel_shift_fpga (
|
|
|
input [31:0] i_in,
|
input [31:0] i_in,
|
input i_carry_in,
|
input i_carry_in,
|
input [7:0] i_shift_amount, // uses 8 LSBs of Rs, or a 5 bit immediate constant
|
input [7:0] i_shift_amount, // uses 8 LSBs of Rs, or a 5 bit immediate constant
|
input i_shift_imm_zero, // high when immediate shift value of zero selected
|
input i_shift_imm_zero, // high when immediate shift value of zero selected
|
input [1:0] i_function,
|
input [1:0] i_function,
|
|
|
output [31:0] o_out,
|
output [31:0] o_out,
|
output o_carry_out
|
output o_carry_out
|
|
|
);
|
);
|
|
|
`include "a23_localparams.v"
|
`include "a23_localparams.vh"
|
|
|
wire [31:0] rot_prod; // Input rotated by the shift amount
|
wire [31:0] rot_prod; // Input rotated by the shift amount
|
|
|
wire [1:0] lsl_out; // LSL: {carry, bit_31}
|
wire [1:0] lsl_out; // LSL: {carry, bit_31}
|
wire [1:0] lsr_out; // LSR: {carry, bit_31}
|
wire [1:0] lsr_out; // LSR: {carry, bit_31}
|
wire [1:0] asr_out; // ASR: {carry, bit_31}
|
wire [1:0] asr_out; // ASR: {carry, bit_31}
|
wire [1:0] ror_out; // ROR: {carry, bit_31}
|
wire [1:0] ror_out; // ROR: {carry, bit_31}
|
|
|
reg [32:0] lsl_mask; // Left-hand mask
|
reg [32:0] lsl_mask; // Left-hand mask
|
reg [32:0] lsr_mask; // Right-hand mask
|
reg [32:0] lsr_mask; // Right-hand mask
|
reg [15:0] low_mask; // Mask calculation helper
|
reg [15:0] low_mask; // Mask calculation helper
|
|
|
reg [4:0] shift_amount; // Shift amount for the low-level shifter
|
reg [4:0] shift_amount; // Shift amount for the low-level shifter
|
|
|
reg [2:0] lsl_selector; // Left shift {shift_32, shift_over, shift_amount[4]}
|
reg [2:0] lsl_selector; // Left shift {shift_32, shift_over, shift_amount[4]}
|
reg [2:0] lsr_selector; // Right shift {shift_32, shift_over, shift_amount[4]}
|
reg [2:0] lsr_selector; // Right shift {shift_32, shift_over, shift_amount[4]}
|
reg [3:0] low_selector; // {shift_amount[3:0]}
|
reg [3:0] low_selector; // {shift_amount[3:0]}
|
|
|
reg shift_nzero; // Amount is not zero
|
reg shift_nzero; // Amount is not zero
|
reg shift_over; // Amount is 32 or higher
|
reg shift_over; // Amount is 32 or higher
|
reg shift_32; // Amount is exactly 32
|
reg shift_32; // Amount is exactly 32
|
reg asr_sign; // Sign for ASR shift
|
reg asr_sign; // Sign for ASR shift
|
reg direction; // Shift direction
|
reg direction; // Shift direction
|
|
|
wire [31:0] p_r; // 1 bit rotated rot_prod
|
wire [31:0] p_r; // 1 bit rotated rot_prod
|
wire [31:0] p_l; // Alias for the rot_prod
|
wire [31:0] p_l; // Alias for the rot_prod
|
|
|
|
|
// Implementation details:
|
// Implementation details:
|
// Design is based on masking of rotated input by a left- and right- hand masks.
|
// Design is based on masking of rotated input by a left- and right- hand masks.
|
// Rotated product calculation requires 5 levels of combinational logic, and masks
|
// Rotated product calculation requires 5 levels of combinational logic, and masks
|
// must be ready before the product is ready. In fact masks require just 3 to 4 levels
|
// must be ready before the product is ready. In fact masks require just 3 to 4 levels
|
// of logic cells using 4-to-1/2x3-to-1 Altera.
|
// of logic cells using 4-to-1/2x3-to-1 Altera.
|
|
|
always @*
|
always @*
|
begin
|
begin
|
shift_32 = i_shift_amount == 32;
|
shift_32 = i_shift_amount == 32;
|
|
|
shift_over = |i_shift_amount[7:5];
|
shift_over = |i_shift_amount[7:5];
|
|
|
shift_nzero = |i_shift_amount[7:0];
|
shift_nzero = |i_shift_amount[7:0];
|
|
|
shift_amount = i_shift_amount[4:0];
|
shift_amount = i_shift_amount[4:0];
|
|
|
if (i_shift_imm_zero) begin
|
if (i_shift_imm_zero) begin
|
if (i_function == LSR || i_function == ASR) begin
|
if (i_function == LSR || i_function == ASR) begin
|
// The form of the shift field which might be
|
// The form of the shift field which might be
|
// expected to correspond to LSR #0 is used
|
// expected to correspond to LSR #0 is used
|
// to encode LSR #32, which has a zero result
|
// to encode LSR #32, which has a zero result
|
// with bit 31 of Rm as the carry output.
|
// with bit 31 of Rm as the carry output.
|
shift_nzero = 1'b1;
|
shift_nzero = 1'b1;
|
shift_over = 1'b1;
|
shift_over = 1'b1;
|
// Redundant and can be optimized out
|
// Redundant and can be optimized out
|
// shift_32 = 1'b1;
|
// shift_32 = 1'b1;
|
end else if (i_function == ROR) begin
|
end else if (i_function == ROR) begin
|
// RXR, (ROR w/ imm 0)
|
// RXR, (ROR w/ imm 0)
|
shift_amount[0] = 1'b1;
|
shift_amount[0] = 1'b1;
|
shift_nzero = 1'b1;
|
shift_nzero = 1'b1;
|
end
|
end
|
end
|
end
|
|
|
// LSB sub-selector calculation. Usually it is taken
|
// LSB sub-selector calculation. Usually it is taken
|
// directly from the shift_amount, but ROR requires
|
// directly from the shift_amount, but ROR requires
|
// no masking at all.
|
// no masking at all.
|
case (i_function)
|
case (i_function)
|
LSL: low_selector = shift_amount[3:0];
|
LSL: low_selector = shift_amount[3:0];
|
LSR: low_selector = shift_amount[3:0];
|
LSR: low_selector = shift_amount[3:0];
|
ASR: low_selector = shift_amount[3:0];
|
ASR: low_selector = shift_amount[3:0];
|
ROR: low_selector = 4'b0000;
|
ROR: low_selector = 4'b0000;
|
endcase
|
endcase
|
|
|
// Left-hand MSB sub-selector calculation. Opaque for every function but LSL.
|
// Left-hand MSB sub-selector calculation. Opaque for every function but LSL.
|
case (i_function)
|
case (i_function)
|
LSL: lsl_selector = {shift_32, shift_over, shift_amount[4]};
|
LSL: lsl_selector = {shift_32, shift_over, shift_amount[4]};
|
LSR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
LSR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
ASR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
ASR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
ROR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
ROR: lsl_selector = 3'b0_1_0; // Opaque mask selector
|
endcase
|
endcase
|
|
|
// Right-hand MSB sub-selector calculation. Opaque for LSL, transparent for ROR.
|
// Right-hand MSB sub-selector calculation. Opaque for LSL, transparent for ROR.
|
case (i_function)
|
case (i_function)
|
LSL: lsr_selector = 3'b0_1_0; // Opaque mask selector
|
LSL: lsr_selector = 3'b0_1_0; // Opaque mask selector
|
LSR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
|
LSR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
|
ASR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
|
ASR: lsr_selector = {shift_32, shift_over, shift_amount[4]};
|
ROR: lsr_selector = 3'b0_0_0; // Transparent mask selector
|
ROR: lsr_selector = 3'b0_0_0; // Transparent mask selector
|
endcase
|
endcase
|
|
|
// Direction
|
// Direction
|
case (i_function)
|
case (i_function)
|
LSL: direction = 1'b0; // Left shift
|
LSL: direction = 1'b0; // Left shift
|
LSR: direction = 1'b1; // Right shift
|
LSR: direction = 1'b1; // Right shift
|
ASR: direction = 1'b1; // Right shift
|
ASR: direction = 1'b1; // Right shift
|
ROR: direction = 1'b1; // Right shift
|
ROR: direction = 1'b1; // Right shift
|
endcase
|
endcase
|
|
|
// Sign for ASR shift
|
// Sign for ASR shift
|
asr_sign = 1'b0;
|
asr_sign = 1'b0;
|
if (i_function == ASR && i_in[31])
|
if (i_function == ASR && i_in[31])
|
asr_sign = 1'b1;
|
asr_sign = 1'b1;
|
end
|
end
|
|
|
// Generic rotate. Theoretical cost: 32x5 4-to-1 LUTs.
|
// Generic rotate. Theoretical cost: 32x5 4-to-1 LUTs.
|
// Practically a bit higher due to high fanout of "direction".
|
// Practically a bit higher due to high fanout of "direction".
|
generate
|
generate
|
genvar i, j;
|
genvar i, j;
|
for (i = 0; i < 5; i = i + 1)
|
for (i = 0; i < 5; i = i + 1)
|
begin : netgen
|
begin : netgen
|
wire [31:0] in;
|
wire [31:0] in;
|
reg [31:0] out;
|
reg [31:0] out;
|
for (j = 0; j < 32; j = j + 1)
|
for (j = 0; j < 32; j = j + 1)
|
begin : net
|
begin : net
|
always @*
|
always @*
|
out[j] = in[j] & (~shift_amount[i] ^ direction) |
|
out[j] = in[j] & (~shift_amount[i] ^ direction) |
|
in[wrap(j, i)] & (shift_amount[i] ^ direction);
|
in[wrap(j, i)] & (shift_amount[i] ^ direction);
|
end
|
end
|
end
|
end
|
|
|
// Order is reverted with respect to volatile shift_amount[0]
|
// Order is reverted with respect to volatile shift_amount[0]
|
assign netgen[4].in = i_in;
|
assign netgen[4].in = i_in;
|
for (i = 1; i < 5; i = i + 1)
|
for (i = 1; i < 5; i = i + 1)
|
begin : router
|
begin : router
|
assign netgen[i-1].in = netgen[i].out;
|
assign netgen[i-1].in = netgen[i].out;
|
end
|
end
|
endgenerate
|
endgenerate
|
|
|
// Aliasing
|
// Aliasing
|
assign rot_prod = netgen[0].out;
|
assign rot_prod = netgen[0].out;
|
|
|
// Submask calculated from LSB sub-selector.
|
// Submask calculated from LSB sub-selector.
|
// Cost: 16 4-to-1 LUTs.
|
// Cost: 16 4-to-1 LUTs.
|
always @*
|
always @*
|
case (low_selector) // synthesis full_case parallel_case
|
case (low_selector) // synthesis full_case parallel_case
|
4'b0000: low_mask = 16'hffff;
|
4'b0000: low_mask = 16'hffff;
|
4'b0001: low_mask = 16'hfffe;
|
4'b0001: low_mask = 16'hfffe;
|
4'b0010: low_mask = 16'hfffc;
|
4'b0010: low_mask = 16'hfffc;
|
4'b0011: low_mask = 16'hfff8;
|
4'b0011: low_mask = 16'hfff8;
|
4'b0100: low_mask = 16'hfff0;
|
4'b0100: low_mask = 16'hfff0;
|
4'b0101: low_mask = 16'hffe0;
|
4'b0101: low_mask = 16'hffe0;
|
4'b0110: low_mask = 16'hffc0;
|
4'b0110: low_mask = 16'hffc0;
|
4'b0111: low_mask = 16'hff80;
|
4'b0111: low_mask = 16'hff80;
|
4'b1000: low_mask = 16'hff00;
|
4'b1000: low_mask = 16'hff00;
|
4'b1001: low_mask = 16'hfe00;
|
4'b1001: low_mask = 16'hfe00;
|
4'b1010: low_mask = 16'hfc00;
|
4'b1010: low_mask = 16'hfc00;
|
4'b1011: low_mask = 16'hf800;
|
4'b1011: low_mask = 16'hf800;
|
4'b1100: low_mask = 16'hf000;
|
4'b1100: low_mask = 16'hf000;
|
4'b1101: low_mask = 16'he000;
|
4'b1101: low_mask = 16'he000;
|
4'b1110: low_mask = 16'hc000;
|
4'b1110: low_mask = 16'hc000;
|
4'b1111: low_mask = 16'h8000;
|
4'b1111: low_mask = 16'h8000;
|
endcase
|
endcase
|
|
|
// Left-hand mask calculation.
|
// Left-hand mask calculation.
|
// Cost: 33 4-to-1 LUTs.
|
// Cost: 33 4-to-1 LUTs.
|
always @*
|
always @*
|
casez (lsl_selector) // synthesis full_case parallel_case
|
casez (lsl_selector) // synthesis full_case parallel_case
|
7'b1??: lsl_mask = 33'h_1_0000_0000;
|
7'b1??: lsl_mask = 33'h_1_0000_0000;
|
7'b01?: lsl_mask = 33'h_0_0000_0000;
|
7'b01?: lsl_mask = 33'h_0_0000_0000;
|
7'b001: lsl_mask = { 1'h_1, low_mask, 16'h_0000};
|
7'b001: lsl_mask = { 1'h_1, low_mask, 16'h_0000};
|
7'b000: lsl_mask = {17'h_1_ffff, low_mask};
|
7'b000: lsl_mask = {17'h_1_ffff, low_mask};
|
endcase
|
endcase
|
|
|
// Right-hand mask calculation.
|
// Right-hand mask calculation.
|
// Cost: 33 4-to-1 LUTs.
|
// Cost: 33 4-to-1 LUTs.
|
always @*
|
always @*
|
casez (lsr_selector) // synthesis full_case parallel_case
|
casez (lsr_selector) // synthesis full_case parallel_case
|
7'b1??: lsr_mask = 33'h_1_0000_0000;
|
7'b1??: lsr_mask = 33'h_1_0000_0000;
|
7'b01?: lsr_mask = 33'h_0_0000_0000;
|
7'b01?: lsr_mask = 33'h_0_0000_0000;
|
7'b000: lsr_mask = { 1'h_1, bit_swap(low_mask), 16'h_ffff};
|
7'b000: lsr_mask = { 1'h_1, bit_swap(low_mask), 16'h_ffff};
|
7'b001: lsr_mask = {17'h_1_0000, bit_swap(low_mask)};
|
7'b001: lsr_mask = {17'h_1_0000, bit_swap(low_mask)};
|
endcase
|
endcase
|
|
|
// Alias: right-rotated
|
// Alias: right-rotated
|
assign p_r = {rot_prod[30:0], rot_prod[31]};
|
assign p_r = {rot_prod[30:0], rot_prod[31]};
|
|
|
// Alias: left-rotated
|
// Alias: left-rotated
|
assign p_l = rot_prod[31:0];
|
assign p_l = rot_prod[31:0];
|
|
|
// ROR MSB, handling special cases
|
// ROR MSB, handling special cases
|
assign ror_out[0] = i_shift_imm_zero ? i_carry_in :
|
assign ror_out[0] = i_shift_imm_zero ? i_carry_in :
|
p_r[31];
|
p_r[31];
|
|
|
// ROR carry, handling special cases
|
// ROR carry, handling special cases
|
assign ror_out[1] = i_shift_imm_zero ? i_in[0] :
|
assign ror_out[1] = i_shift_imm_zero ? i_in[0] :
|
shift_nzero ? p_r[31] :
|
shift_nzero ? p_r[31] :
|
i_carry_in;
|
i_carry_in;
|
|
|
// LSL MSB
|
// LSL MSB
|
assign lsl_out[0] = p_l[31] & lsl_mask[31];
|
assign lsl_out[0] = p_l[31] & lsl_mask[31];
|
|
|
// LSL carry, handling special cases
|
// LSL carry, handling special cases
|
assign lsl_out[1] = shift_nzero ? p_l[0] & lsl_mask[32]:
|
assign lsl_out[1] = shift_nzero ? p_l[0] & lsl_mask[32]:
|
i_carry_in;
|
i_carry_in;
|
|
|
// LSR MSB
|
// LSR MSB
|
assign lsr_out[0] = p_r[31] & lsr_mask[31];
|
assign lsr_out[0] = p_r[31] & lsr_mask[31];
|
|
|
// LSR carry, handling special cases
|
// LSR carry, handling special cases
|
assign lsr_out[1] = i_shift_imm_zero ? i_in[31] :
|
assign lsr_out[1] = i_shift_imm_zero ? i_in[31] :
|
shift_nzero ? p_r[31] & lsr_mask[32]:
|
shift_nzero ? p_r[31] & lsr_mask[32]:
|
i_carry_in;
|
i_carry_in;
|
|
|
// ASR MSB
|
// ASR MSB
|
assign asr_out[0] = i_in[31] ? i_in[31] :
|
assign asr_out[0] = i_in[31] ? i_in[31] :
|
p_r[31] & lsr_mask[31] ;
|
p_r[31] & lsr_mask[31] ;
|
|
|
// LSR carry, handling special cases
|
// LSR carry, handling special cases
|
assign asr_out[1] = shift_over ? i_in[31] :
|
assign asr_out[1] = shift_over ? i_in[31] :
|
shift_nzero ? p_r[31] :
|
shift_nzero ? p_r[31] :
|
i_carry_in;
|
i_carry_in;
|
|
|
// Carry and MSB are calculated as above
|
// Carry and MSB are calculated as above
|
assign {o_carry_out, o_out[31]} = i_function == LSL ? lsl_out :
|
assign {o_carry_out, o_out[31]} = i_function == LSL ? lsl_out :
|
i_function == LSR ? lsr_out :
|
i_function == LSR ? lsr_out :
|
i_function == ASR ? asr_out :
|
i_function == ASR ? asr_out :
|
ror_out ;
|
ror_out ;
|
|
|
// And the rest of result is the masked rotated input.
|
// And the rest of result is the masked rotated input.
|
assign o_out[30:0] = (p_l[30:0] & lsl_mask[30:0]) |
|
assign o_out[30:0] = (p_l[30:0] & lsl_mask[30:0]) |
|
(p_r[30:0] & lsr_mask[30:0]) |
|
(p_r[30:0] & lsr_mask[30:0]) |
|
(~lsr_mask[30:0] & {31{asr_sign}});
|
(~lsr_mask[30:0] & {31{asr_sign}});
|
|
|
// Rotate: calculate bit pos for level "level" and offset "pos"
|
// Rotate: calculate bit pos for level "level" and offset "pos"
|
function [4:0] wrap;
|
function [4:0] wrap;
|
input integer pos;
|
input integer pos;
|
input integer level;
|
input integer level;
|
integer out;
|
integer out;
|
begin
|
begin
|
out = pos - (1 << level);
|
out = pos - (1 << level);
|
wrap = out[4:0];
|
wrap = out[4:0];
|
end
|
end
|
endfunction
|
endfunction
|
|
|
// Swap bits in the input 16-bit value
|
// Swap bits in the input 16-bit value
|
function [15:0] bit_swap;
|
function [15:0] bit_swap;
|
input [15:0] value;
|
input [15:0] value;
|
integer i;
|
integer i;
|
begin
|
begin
|
for (i = 0; i < 16; i = i + 1)
|
for (i = 0; i < 16; i = i + 1)
|
bit_swap[i] = value[15 - i];
|
bit_swap[i] = value[15 - i];
|
end
|
end
|
endfunction
|
endfunction
|
|
|
endmodule
|
endmodule
|
|
|