-- -- Algorithm taken from Yalu: https://www.mikrocontroller.net/topic/244674#new*/ -- library ieee; use ieee.std_logic_1164.all; use ieee.numeric_std.all; use ieee.math_real.ceil; entity cube_root is generic ( NUM_BITS : positive := 8 ); port ( clk : in std_logic := 'X'; rst : in std_logic := 'X'; data_in : in std_logic_vector(NUM_BITS-1 downto 0) := (others => 'X'); data_out : out std_logic_vector(NUM_BITS-1 downto 0) := (others => 'X') ); end entity; architecture rtl of cube_root is -- We need some additional bits to do proper rounding later constant BITS_INTERN : positive := NUM_BITS + 3; constant NUM_STAGES : positive := integer(ceil(real(BITS_INTERN)/3.0)); constant NUM_DATA_BITS : positive := 3*NUM_STAGES + 1; type pipeline_data_t is record y2 : unsigned(NUM_DATA_BITS-1 downto 0); root : unsigned(NUM_DATA_BITS-1 downto 0); remainder : unsigned(NUM_DATA_BITS-1 downto 0); end record; constant pipeline_data_t_rst : pipeline_data_t := ( y2 => (others => '0'), root => (others => '0'), remainder => (others => '0') ); type DATA_ARRAY_T is array (0 to NUM_STAGES-1) of pipeline_data_t; signal store_in : DATA_ARRAY_T := (others => pipeline_data_t_rst); signal store_out : DATA_ARRAY_T := (others => pipeline_data_t_rst); begin store_in(0).remainder(NUM_DATA_BITS-1 downto BITS_INTERN) <= (others => '0'); -- Multiply with 8 (adding three zeros at the bottom) to be able to round later store_in(0).remainder(BITS_INTERN-1 downto 0) <= unsigned(data_in) & "000"; store_in(0).y2 <= (others => '0'); store_in(0).root <= (others => '0'); conn_stages : for i in 1 to NUM_STAGES-1 generate store_in(i) <= store_out(i-1); end generate; gen_stages : for i in 0 to NUM_STAGES-1 generate constant cubedbit : unsigned(NUM_DATA_BITS-1 downto 0) := to_unsigned(1, NUM_DATA_BITS) sll ((NUM_STAGES-i-1)*3); begin proc : process(clk, rst) variable s : unsigned(NUM_DATA_BITS downto 0) := (others => '0'); begin if rst = '1' then s := (others => '0'); store_out(i) <= pipeline_data_t_rst; elsif rising_edge(clk) then s := ( "0" & store_in(i).y2 ) + ( "0" & (store_in(i).y2 srl 1) ) + ( "0" & (store_in(i).root srl 1) ) + ( "0" & (store_in(i).root srl 2) ) + ( "0" & cubedbit ); if store_in(i).remainder >= s then store_out(i).remainder <= store_in(i).remainder - s(BITS_INTERN-1 downto 0); store_out(i).y2 <= (store_in(i).y2 srl 1) + cubedbit + (store_in(i).root srl 1); store_out(i).root <= (store_in(i).root srl 2) + cubedbit; else store_out(i).remainder <= store_in(i).remainder; store_out(i).y2 <= store_in(i).y2 srl 1; store_out(i).root <= store_in(i).root srl 2; end if; end if; end process; end generate; proc_round : process(clk, rst) variable tmp : unsigned(BITS_INTERN-1 downto 0) := (others => '0'); begin if rst = '1' then tmp := (others => '0'); data_out <= (others => '0'); elsif rising_edge(clk) then -- Input was multiplied by 8, so output must be divided by cuberoot(8)=2. -- To get proper rounding, we add 1 first. tmp := (store_out(NUM_STAGES-1).root(BITS_INTERN-1 downto 0) + 1) srl 1; data_out <= std_logic_vector( tmp(NUM_BITS-1 downto 0) ); end if; end process; end architecture;