OpenCores
URL https://opencores.org/ocsvn/highload/highload/trunk

Subversion Repositories highload

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /highload/trunk
    from Rev 1 to Rev 2
    Reverse comparison

Rev 1 → Rev 2

/dsp_use.vhd
0,0 → 1,61
-- High load test project.
-- Alexey Fedorov, 2014
-- email: FPGA@nerudo.com
--
-- It implements 7 multipliers
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
entity dsp_use is
generic (
DATA_WIDTH : positive := 16
);
port
(
clk : in std_logic;
datain: in std_logic_vector(DATA_WIDTH-1 downto 0);
dataout: out std_logic_vector(DATA_WIDTH-1 downto 0)
);
end dsp_use;
 
architecture rtl of dsp_use is
type TShReg is array (0 to 7) of signed(DATA_WIDTH-1 downto 0);
signal ShReg_1, ShReg_1r, ShReg_2, ShReg_2r, ShReg_2rr, ShReg_3, ShReg_3r, ShReg_3rr, ShReg_4 : TShReg := (others => (others => '0'));
begin
 
process(clk)
variable product : signed(2*DATA_WIDTH-1 downto 0);
begin
if rising_edge(clk) then
ShReg_1(0) <= signed(datain);
ShReg_1(1 to 7) <= ShReg_1(0 to 6);
 
ShReg_1r <= ShReg_1;
for i in 0 to 3 loop
product := ShReg_1r(2*i) * ShReg_1r(2*i+1);
ShReg_2(i) <= product(DATA_WIDTH-1 downto 0);
end loop;
ShReg_2r <= ShReg_2;
ShReg_2rr <= ShReg_2r;
for i in 0 to 1 loop
product := ShReg_2rr(2*i) * ShReg_2rr(2*i+1);
ShReg_3(i) <= product(DATA_WIDTH-1 downto 0);
end loop;
ShReg_3r <= ShReg_3;
ShReg_3rr <= ShReg_3r;
 
product := ShReg_3rr(0) * ShReg_3rr(1);
ShReg_4(0) <= product(DATA_WIDTH-1 downto 0);
dataout <= std_logic_vector(ShReg_4(0));
end if;
 
end process;
 
end rtl;
/lc_use.vhd
0,0 → 1,55
-- High load test project.
-- Alexey Fedorov, 2014
-- email: FPGA@nerudo.com
--
-- It implements 256 LUT/DFFs per one row (NUM_ROWS parameter)
-- with default other parameters
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
 
entity lc_use is
generic (
DATA_WIDTH : positive := 128;
ARITH_SIZE : positive := 16; -- Should be divider of DATA_WIDTH
NUM_ROWS: positive := 6 -- Input pins
);
port
(
clk : in std_logic;
inputs: in std_logic_vector(DATA_WIDTH-1 downto 0);
dataout: out std_logic_vector(DATA_WIDTH-1 downto 0)
);
end lc_use;
 
 
architecture rtl of lc_use is
type TArr is array (natural range <>) of unsigned(127 downto 0);
signal arr : TArr(0 to 2*NUM_ROWS) := (others => (others => '0'));
 
begin
 
assert DATA_WIDTH mod ARITH_SIZE = 0 report "ARITH_SIZE should be divider of DATA_WIDTH" severity error;
 
process(clk)
begin
if rising_edge(clk) then
arr(0)(DATA_WIDTH-1 downto 0) <= unsigned(inputs);
for i in 0 to NUM_ROWS-1 loop
arr(2*i+1) <= arr(2*i) xor (arr(2*i) rol 1) xor (arr(2*i) rol 2) xor (arr(2*i) rol 3);
for j in 0 to DATA_WIDTH/ARITH_SIZE-1 loop
arr(2*i+2)((j+1)*ARITH_SIZE - 1 downto j*ARITH_SIZE) <=
arr(2*i+0)((j+1)*ARITH_SIZE - 1 downto j*ARITH_SIZE) +
arr(2*i+1)((j+1)*ARITH_SIZE - 1 downto j*ARITH_SIZE);
end loop;
end loop;
dataout <= std_logic_vector(arr(2*NUM_ROWS));
 
end if;
 
end process;
 
end rtl;
/ram_buf.vhd
0,0 → 1,56
-- High load test project.
-- Alexey Fedorov, 2014
-- email: FPGA@nerudo.com
--
-- It implements a number of RAM bits depends on given parameters.
 
LIBRARY ieee;
USE ieee.std_logic_1164.all;
USE ieee.numeric_std.all;
 
 
ENTITY ram_buf IS
generic (
DATA_WIDTH: positive := 12;
DEPTH_LOG2: positive := 10
);
port(
clk : in std_logic; -- input data clock
-- ena : in std_logic; -- input data enable
din : in std_logic_vector(DATA_WIDTH-1 downto 0);
delay : in std_logic_vector(DEPTH_LOG2-1 downto 0);
dout : out std_logic_vector(DATA_WIDTH-1 downto 0)
);
END ENTITY ram_buf;
 
 
ARCHITECTURE rtl OF ram_buf IS
 
type TDelayRam is array (0 to 2**DEPTH_LOG2-1) of std_logic_vector(DATA_WIDTH-1 downto 0);
signal delayram : TDelayRam := (others => (others => '0'));
 
signal buf_waddr, buf_raddr : unsigned(DEPTH_LOG2-1 downto 0) := (others => '0');
 
 
begin
 
delay_p: process(clk) -- , reset
begin
if(rising_edge(clk)) then
-- if(ena = '1') then
delayram(to_integer(buf_waddr)) <= din;
buf_waddr <= buf_waddr + 1;
-- end if;
-- On a read during a write to the same address, the read will
-- return the OLD data at the address
dout <= delayram(to_integer(buf_raddr));
buf_raddr <= buf_waddr - unsigned(delay);
end if;
--if reset = '1' then
-- buf_waddr <= (others => '0');
--end if;
end process;
 
 
 
end rtl;
/high_load.vhd
0,0 → 1,251
-- High load test project. ***** TOP level file *****
-- Alexey Fedorov, 2014
-- email: FPGA@nerudo.com
--
-- It is intended for checking device
-- for high consumption power.
-- Number of parameter gives possibility
-- to change number of used LC/DFF, DSP, RAM and I/O.
--
-- It can operate at 200 MHz in Cyclone 5E FPGA
--
-- 1 LC core is about 1500 LUT4/FF (with default parameters)
-- 1 DSP core is 7 DSP 18*18.
-- Each LC core also demands 4*N RAM block (32 bits width)
 
--To maximize power consumption:
--1) Find parameters for maximum FPGA resource usage
--2) Fed maximum frequency clock to CLK input (directly or via PLL instantiated in top level)
--3) Fed random data to inputs (lower ADC bits or data from PRBS generator)
--4) Connect maximal count of outputs. Be careful: They are switching simultaneously.
--
-- **** USE HIGH LOAD PROJECT AT YOUR OWN RISK ****
--
 
 
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
 
 
entity high_load is
generic (
NUM_IN : positive := 3*14; -- Input pins
NUM_OUT : positive := 1; -- Output pins
NUM_LC : positive := 16; -- Number of LC cores
NUM_DSP : positive := 9; -- Number of DSP cores
RAM_DEPTH_LOG2 : integer range 4 to 30 := 10 -- RAM depth
);
port
(
-- Input ports
clk : in std_logic;
inputs: in std_logic_vector(NUM_IN-1 downto 0);
 
-- Output ports
dataout: out std_logic_vector(NUM_OUT-1 downto 0)
);
end high_load;
 
 
 
architecture rtl of high_load is
 
--component aes_test_wrap is
--port(
-- clk : in std_logic;
-- datain: in std_logic_vector(127 downto 0);
-- key : in std_logic_vector(127 downto 0);
-- dataout: out std_logic_vector(127 downto 0)
-- );
--end component;
 
component lc_use is
generic (
DATA_WIDTH : positive := 128;
ARITH_SIZE : positive := 16; -- Should be divider of DATA_WIDTH
NUM_ROWS: positive := 6 -- Input pins
);
port
(
clk : in std_logic;
inputs: in std_logic_vector(DATA_WIDTH-1 downto 0);
dataout: out std_logic_vector(DATA_WIDTH-1 downto 0)
);
end component;
 
component dsp_use is
generic (
DATA_WIDTH : positive := 16
);
port
(
clk : in std_logic;
datain: in std_logic_vector(DATA_WIDTH-1 downto 0);
dataout: out std_logic_vector(DATA_WIDTH-1 downto 0)
);
end component;
 
component ram_buf IS
generic (
DATA_WIDTH: positive := 12;
DEPTH_LOG2: positive := 10
);
port(
clk : in std_logic; -- input data clock
-- ena : in std_logic; -- input data enable
din : in std_logic_vector(DATA_WIDTH-1 downto 0);
delay : in std_logic_vector(DEPTH_LOG2-1 downto 0);
dout : out std_logic_vector(DATA_WIDTH-1 downto 0)
);
END component;
 
constant DSP_WIDTH : integer := 15; -- Data width of DSP multipliers
 
constant LC_W : integer := 128*NUM_LC;
constant DSP_W : integer := DSP_WIDTH*NUM_DSP;
 
--constant key : bit_vector(127 downto 0) := X"2BAC93F18E4797830BD476554BBE27A5";
 
signal lc_in, lc_out, ram_in, ram_out : std_logic_vector(LC_W-1 downto 0);
signal dsp_in, dsp_out : std_logic_vector(DSP_W-1 downto 0);
 
signal xor_result : std_logic;
 
procedure assign_bus(
signal inp : in std_logic_vector;
signal outp : out std_logic_vector) is
 
constant IN_W : integer := inp'length(1);
constant OUT_W: integer := outp'length(1);
 
begin
for i in 1 to OUT_W/IN_W loop
if i = 1 then
outp((i-1)*IN_W+IN_W-1 downto (i-1)*IN_W) <= inp;
else
outp((i-1)*IN_W+IN_W-1 downto (i-1)*IN_W) <= inp xor to_stdlogicvector(to_bitvector(inp) rol (i-1));
end if;
end loop;
if OUT_W mod IN_W > 0 then
outp(OUT_W-1 downto (OUT_W/IN_W)*IN_W) <= inp(OUT_W mod IN_W - 1 downto 0);
end if;
end procedure;
 
procedure xorbus(
signal inp : in std_logic_vector;
signal outp : out std_logic
) is
variable tmp : std_logic := '0';
begin
 
for i in inp'range loop
tmp := tmp xor inp(i);
end loop;
outp <= tmp;
 
end procedure;
 
 
procedure resultbus(
signal inp : in std_logic_vector;
signal outp : out std_logic
) is
variable tmp : integer := 0;
begin
for i in inp'range loop
if inp(i) = '1' then
tmp := tmp + 1;
end if;
end loop;
if tmp >= inp'length(1) then
outp <= '1';
else
outp <= '0';
end if;
end procedure;
 
 
begin
 
assert lc_in'length(1) < dsp_in'length(1) report "Implementing Input => DSP => RAM => LC => Output" severity warning;
assert lc_in'length(1) >= dsp_in'length(1) report "Implementing Input => LC => RAM => DSP => Output" severity warning;
 
process(clk) --inputs, lc_in, lc_out, ram_in, ram_out, dsp_in, dsp_out, xor_result)
begin
if rising_edge(clk) then
if(lc_in'length(1) < dsp_in'length(1)) then
assign_bus(inputs, lc_in); -- Input => LC => RAM => DSP => Output
assign_bus(lc_out, ram_in);
assign_bus(ram_out, dsp_in);
-- resultbus(dsp_out, xor_result);
xorbus(dsp_out, xor_result);
dataout <= (others => xor_result);
else
assign_bus(inputs, dsp_in); -- Input => DSP => RAM => LC => Output
assign_bus(dsp_out, ram_in);
assign_bus(ram_out, lc_in);
-- resultbus(lc_out, xor_result);
xorbus(lc_out, xor_result);
dataout <= (others => xor_result);
end if;
end if;
 
end process;
 
 
LC_GEN: for i in 0 to NUM_LC-1 generate
-- aes_i : aes_test_wrap
-- port map(
-- clk => clk,
-- datain => aes_in(128*i+127 downto 128*i),
-- key => to_stdlogicvector(key rol i),
-- dataout=> aes_out(128*i+127 downto 128*i)
-- );
lc_i: lc_use
generic map (
DATA_WIDTH => 128,
ARITH_SIZE => 16, -- Should be divider of DATA_WIDTH
NUM_ROWS => 6 -- Input pins
)
port map
(
clk => clk,
inputs => lc_in(128*i+127 downto 128*i),
dataout=> lc_out(128*i+127 downto 128*i)
);
end generate;
 
DSP_GEN: for i in 0 to NUM_DSP-1 generate
dsp_i : dsp_use
generic map(
DATA_WIDTH => DSP_WIDTH)
port map
(
clk => clk,
datain => dsp_in(DSP_WIDTH*i+DSP_WIDTH-1 downto DSP_WIDTH*i),
dataout => dsp_out(DSP_WIDTH*i+DSP_WIDTH-1 downto DSP_WIDTH*i)
);
end generate;
 
RAM_GEN: for i in 0 to NUM_LC-1 generate
ram_i: ram_buf
generic map(
DATA_WIDTH => 128,
DEPTH_LOG2 => RAM_DEPTH_LOG2
)
port map(
clk => clk,
din => ram_in(128*i+127 downto 128*i),
delay => std_logic_vector(to_unsigned(2**RAM_DEPTH_LOG2-10, RAM_DEPTH_LOG2)),
dout => ram_out(128*i+127 downto 128*i)
);
end generate;
 
end rtl;

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.