// /**
// *
// *  This Verilog HDL File is used for simulation and synthesis in the simple DMA design example.  
// *  This contains the DMA engine and memory buffer.
// *
// */
// synthesis verilog_input_version verilog_2001
`timescale 1 ps / 1 ps 
//-----------------------------------------------------------------------------
// Title         : PCI Express Reference Design Master Entity
// Project       : PCI Express MegaCore function
//-----------------------------------------------------------------------------
// File          : altpcierd_master.v
// Author        : Altera Corporation
//-----------------------------------------------------------------------------
// Description :
// This is an entity that initiates transactions to the PCI Express interface
//-----------------------------------------------------------------------------
// Copyright (c) 2005-2006 Altera Corporation. All rights reserved.  Altera products are
// protected under numerous U.S. and foreign patents, maskwork rights, copyrights and
// other intellectual property laws.  
//
// This reference design file, and your use thereof, is subject to and governed by
// the terms and conditions of the applicable Altera Reference Design License Agreement.
// By using this reference design file, you indicate your acceptance of such terms and
// conditions between you and Altera Corporation.  In the event that you do not agree with
// such terms and conditions, you may not use the reference design file. Please promptly
// destroy any copies you have made.
//
// This reference design file being provided on an "as-is" basis and as an accommodation 
// and therefore all warranties, representations or guarantees of any kind 
// (whether express, implied or statutory) including, without limitation, warranties of 
// merchantability, non-infringement, or fitness for a particular purpose, are 
// specifically disclaimed.  By making this reference design file available, Altera
// expressly does not recommend, suggest or require that this reference design file be
// used in combination with any other product not provided by Altera. 
//-----------------------------------------------------------------------------
module altpcierd_master (hclk, hrstn, testtim, ko_cpl_spc_vc0, 
ko_cpl_spc_vc1, cfg_busdev, cfg_maxpload, cfg_maxrdreq, cfg_rcb, 
cfg_tcvcmap, trans_pending, err_cpltim, err_cplunexp, m_req, m_size, 
m_addr, m_ctrl, m_tgt_adr, m_ack, m_pending, rx_busy, tx_busy, vc0_out, 
vc1_out, rx_req, rx_ack, rx_desc, rx_ws, rx_data, rx_be, rx_dv, rx_dfr, 
tx_req, tx_desc, tx_ack, tx_dfr, tx_data, tx_dv, tx_err, tx_ws, fetch_tx_data, nextcyc_is_last, last_datacyc,
tx_avalon_data, rx_st_eop);

   parameter RAM_SIZE  = 10; 
   
   input hclk; 
   input hrstn; 
   input testtim; 
   input[19:0] ko_cpl_spc_vc0; 
   input[19:0] ko_cpl_spc_vc1; 
   input[12:0] cfg_busdev; 
   input[2:0] cfg_maxpload; 
   input[2:0] cfg_maxrdreq; 
   input cfg_rcb; 
   input[23:0] cfg_tcvcmap; 
   output trans_pending; 
   reg trans_pending;
   output err_cpltim; 
   reg err_cpltim;
   output err_cplunexp; 
   reg err_cplunexp;
   input m_req; 
   input[31:0] m_size; 
   input[63:0] m_addr; 
   input[31:0] m_ctrl; 
   input[12:0] m_tgt_adr; 
   output m_ack; 
   wire m_ack;
   output m_pending; 
   wire m_pending;
   output rx_busy; 
   wire rx_busy;
   output tx_busy; 
   wire tx_busy;
   output vc0_out; 
   reg vc0_out;
   output vc1_out; 
   reg vc1_out;
   input rx_req; 
   output rx_ack; 
   wire rx_ack;
   input[135:0] rx_desc; 
   output rx_ws; 
   reg rx_ws;
   input[63:0] rx_data; 
   input[7:0] rx_be; 
   input rx_dv; 
   input rx_dfr; 
   output tx_req; 
   wire tx_req;
   output[127:0] tx_desc; 
   wire[127:0] tx_desc;
   input tx_ack; 
   output tx_dfr; 
   wire tx_dfr;
   output[63:0] tx_data; 
   reg[63:0] tx_data;
   output tx_dv; 
   wire tx_dv;
   output tx_err; 
   wire tx_err;
   input tx_ws;  
   
   input fetch_tx_data;        // external control to force a data read for tx
   
   output nextcyc_is_last;     // indicates that next data cycle is last for tx pkt (last dfr cycle)
   output last_datacyc;        // indicates that this data cycle is last for tx pkt (last dv cycle)
   reg    nextcyc_is_last;
   reg    last_datacyc; 
   
   // streaming interface
   output[63:0] tx_avalon_data; 
   wire[63:0]   tx_avalon_data;
   input        rx_st_eop;
   
   parameter[1:0] cpl_idl = 0; 
   parameter[1:0] cpl_rxdesc = 1; 
   parameter[1:0] cpl_rxdata = 2; 
   reg[1:0] cpl_sm; 
   parameter[1:0] req_idle = 0; 
   parameter[1:0] req_txdesc = 1; 
   parameter[1:0] req_txdata = 2; 
   reg[1:0] req_sm; 
   wire is_rxmst; 
   wire is_rxwr; 
   wire is_rxdval; 
   reg[RAM_SIZE-1:0] rx_addr; 
   wire[12:0] rx_bytecnt; 
   reg rx_wr; 
   wire wr_allow; 
   wire[63:0] datain; 
   wire[63:0] dataout; 
   wire[RAM_SIZE-1:0] rx_adwr;   //xhdl
   wire[12:0] tx_adrd; 
   wire[7:0] wren; 
   reg[9:0] phasecnt; 
   reg[12:0] txaddr; 
   reg[1:0] fetch_cnt; 
   reg[2:0] desc_setup_cnt; 
   reg need_to_fetch; 
   reg pipe_fill; 
   wire rdclken; 
   wire[12:0] txaddr_inc; 
   reg fetch_first; 
   reg mem_first; 
   reg mem_first_del;
   reg tx_dv_r; 
   wire rd_allow; 
   wire rd_out; 
   reg tx_dfr_r; 
   reg[63:0] tx_data_r; 
   // master signals
   reg mem_desc; 
   reg mem_new; 
   reg m_ack_r; 
   reg[31:0] mem_addr64; 
   reg[15:0] mem_ctrl; 
   reg[12:0] mem_tgt_adr; 
   reg[3:0] mem_tgt_adr_lsb; 
   reg[2:0] mem_pload; 
   reg[31:0] mem_addr32; 
   reg[31:0] mem_size; 
   wire[2:0] maxreq_cfg;
   reg [2:0] cfg_maxrdreq_i;
   reg[2:0] kod_cfg_maxrdreq;
   reg[2:0] koh_cfg_maxrdreq;
   reg[2:0] ko_cfg_maxrdreq;
   reg[2:0] maxreq; 
   wire[12:0] addr_13lsb; 
   wire[12:0] bytecnt_max; 
   reg[12:0] bytecnt_sub_r;
   wire[12:0] bytecnt_sub; 
   wire[12:0] bytecnt_sat; 
   reg[12:0] tx_bytecnt; 
   wire[12:0] tx_lastoffset; 
   reg ack_req; 
   wire[0:0] tx_remain; 
   reg[10:0] tx_dwcnt_r; 
   reg[10:0] tx_dwcnt_aft64B; 
   wire[4:0] addr_aft64B; 
   wire[3:0] tx_lbe_n; 
   wire[3:0] tx_fbe_n;
   wire[3:0] tx_lbe_d; 
   wire[3:0] tx_fbe_d; 
   reg[3:0] tx_lbe; 
   reg[3:0] tx_fbe; 
   wire rcb; 
   wire[0:0] rem_cpld; 
   wire[0:0] rem_cplh; 
   wire[0:0] xtr_cplh; 
   wire[6:0] bas_cplh; 
   wire[6:0] cred_cplh_n; 
   wire[10:0] cred_cpld_n; 
   reg[6:0] cred_cplh; 
   reg[10:0] cred_cpld; 
   reg[2:0] vc_sel; 
   wire[7:0] cur_cplh; 
   wire[13:0] cur_cpld; 
   reg[7:0] cur_cplh0; 
   reg[13:0] cur_cpld0; 
   reg[7:0] cur_cplh1; 
   reg[13:0] cur_cpld1; 
   wire en_rdreq; 
   wire en_npwr;
   reg  en_rdreq_r; 
   reg en_npwr_r; 
   wire en_req;
   reg en_req_r; 
   reg mem_req;
   reg mem_req_r;
reg    tx_ack_r;
reg    mst_tag_vld;
   reg[127:0] tx_desc_mem; 
   wire cpl_busy_n;
   reg cpl_busy; 
   reg[1:0] mst_tag; 
   reg[2:0] vc_r; 
   reg[6:0] cplh_r; 
   reg[10:0] cpld_r; 
   reg init; 
   reg cpl_ack; 
   reg cpl_ack_r; 
   reg cpl_hdrcred_rls; 
   reg dwcnt_msb; 
   reg[2:0] cpl_vc; 
   wire[2:0] cpl_vc_n; 
   wire[6:0] cpl_hdrcred; 
   wire[10:0] cpl_datacred; 
   reg req_cpl_chnl; 
   reg[16:0] tim1ms; 
   reg ev1ms;
   reg[RAM_SIZE-1:0] cpl_addr_n; 
   reg cpl_busy0; 
   reg[RAM_SIZE-1:0] cpl_addr0; 
   reg[3:0] timer0; 
   reg[10:0] cpl_dwcnt0; 
   reg[6:0] hdrcred0; 
   reg[10:0] datacred0; 
   reg cpl_busy1; 
   reg[3:0] timer1; 
   reg[RAM_SIZE-1:0] cpl_addr1; 
   reg[10:0] cpl_dwcnt1; 
   reg[6:0] hdrcred1; 
   reg[10:0] datacred1; 
   reg cpl_busy2; 
   reg[3:0] timer2; 
   reg[RAM_SIZE-1:0] cpl_addr2; 
   reg[10:0] cpl_dwcnt2; 
   reg[6:0] hdrcred2; 
   reg[10:0] datacred2; 
   reg cpl_busy3; 
   reg[3:0] timer3; 
   reg[RAM_SIZE-1:0] cpl_addr3; 
   reg[10:0] cpl_dwcnt3; 
   reg[6:0] hdrcred3; 
   reg[10:0] datacred3; 
   reg rx_ack_r;
   reg rx_ack_rr; 
   wire[7:0] rx_tag;
reg 	     rx_tag0_hit;
reg 	     rx_tag1_hit;
reg 	     rx_tag2_hit;
reg 	     rx_tag3_hit; 
   wire[6:0] rx_addr7; 
   wire[10:0] rx_dwcnt; 
   wire[0:0] rem_rxcpld; 
   wire[0:0] rem_rxcplh; 
   wire[6:0] bas_rxcplh; 
   wire[6:0] rx_cplh_n; 
   wire[8:0] rx_cpld_n; 
   reg[6:0] rx_cplh; 
   reg[10:0] rx_cpld; 
   wire[2:0] rx_tc; 
   wire is_txwr; 
   reg[3:0] cpl_sel; 
   wire[2:0] vc_sel_n; 
   wire err_tim0; 
   wire err_tim1; 
   wire err_tim2; 
   wire err_tim3; 
   reg mem_zero; 
   reg mem_ramp1; 
   reg mem_ramp4; 
   wire[63:0] data_ramp; 
   reg[14:0] data_ramp15; 
   wire[2:0] maxreq_n; 
   wire[10:0] rx_addr_dw; 
   wire[10:0] rx_addrend_n; 
   reg[5:0] rx_addrend;
reg [135:0] rx_desc_r;
reg	    rx_req_r;
   
   // pipeline stages for dprambe 
   // write port input.   
   // for performance.
   reg [RAM_SIZE - 1:0] rx_adwr_del;
   reg [63:0]           datain_del;
   reg [7:0]            wren_del; 
   reg [RAM_SIZE - 1:0] rx_adwr_del2;
   reg [63:0]           datain_del2;
   reg [7:0]            wren_del2; 
   reg [RAM_SIZE - 1:0] rx_adwr_del3;
   reg [63:0]           datain_del3;
   reg [7:0]            wren_del3;
   
   // pipe stages
   reg [31:0]            memsize_minus_bytecnt;
   reg [31:0]            memaddr32_plus_bytecnt;
   

   // wires for concatenations
   // xhdl
   
   wire[12:0] mem_addr32_7lsb;
   wire[12:0] mem_addr32_8lsb;
   wire[12:0] mem_addr32_9lsb;
   wire[12:0] mem_addr32_10lsb;
   wire[12:0] mem_addr32_11lsb;
   wire[12:0] mem_addr32_12lsb;
   wire[6:0]  mem_addr32_6to0;
   wire[7:0]  mem_addr32_7to0;
   wire[8:0]  mem_addr32_8to0;
   wire[9:0]  mem_addr32_9to0;
   wire[10:0] mem_addr32_10to0;
   wire[11:0] mem_addr32_11to0; 
   wire[4:0]  rx_desc_r_38to34; 
   wire[4:0]  data_ramp15_4to0; 
   wire[63:0] data_ramp_memramp1; 
   wire[63:0] data_ramp_memramp2;   
   wire[11:0] ko_cpl_spc_vc0_19to8;
   wire[11:0] ko_cpl_spc_vc1_19to8;
   
  // wire       maxreqis_zero;   
   wire[RAM_SIZE-1:0] rx_dwcnt_div2;     
   
   assign rx_dwcnt_div2 = {1'b0,rx_dwcnt[RAM_SIZE-1:1]};    //xhdl
   
   // xhdl
   assign mem_addr32_6to0  = mem_addr32[6:0];
   assign mem_addr32_7to0  = mem_addr32[7:0];
   assign mem_addr32_8to0  = mem_addr32[8:0];
   assign mem_addr32_9to0  = mem_addr32[9:0];
   assign mem_addr32_10to0 = mem_addr32[10:0];
   assign mem_addr32_11to0 = mem_addr32[11:0];
   
   assign mem_addr32_7lsb  = {6'b000000, mem_addr32_6to0};
   assign mem_addr32_8lsb  = {5'b00000, mem_addr32_7to0};
   assign mem_addr32_9lsb  = {4'b0000, mem_addr32_8to0};
   assign mem_addr32_10lsb = {3'b000, mem_addr32_9to0};
   assign mem_addr32_11lsb = {2'b00, mem_addr32_10to0};
   assign mem_addr32_12lsb = {1'b0, mem_addr32_11to0};
    
   //----------------------------------
   // streaming interface
   //----------------------------------
   assign tx_avalon_data  = tx_data; 
   // assign    rx_desc_r = rx_desc;
  //  assign     rx_req_r = rx_req;


   wire [RAM_SIZE-1:0] cpl_addr_n_add_mem;
   wire [63:0] zero_qword;
   assign zero_qword = 0;
   
   assign cpl_addr_n_add_mem[0] = (mem_addr32[2] & tx_dwcnt_r[0]);
   assign cpl_addr_n_add_mem[RAM_SIZE-1:1] = zero_qword[RAM_SIZE-1:1];
   
   // master interface control
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         mem_desc <= 1'b0 ; 
         mem_new <= 1'b0 ; 
         mem_addr64 <= {32{1'b0}} ; 
         mem_ctrl <= {16{1'b0}} ; 
         mem_tgt_adr <= {13{1'b0}} ; 
         mem_tgt_adr_lsb <= {4{1'b0}} ; 
         mem_pload <= {3{1'b0}} ; 
         mem_addr32 <= {32{1'b0}} ;
      cpl_addr_n <= {RAM_SIZE{1'b0}};
         mem_size <= {32{1'b0}} ; 
         m_ack_r <= 1'b0 ; 
         mem_zero <= 1'b0 ; 
         mem_ramp1 <= 1'b0 ; 
         mem_ramp4 <= 1'b0 ;
         desc_setup_cnt <= 3'b000;
      rx_desc_r <= {136{1'b0}};
      rx_req_r <= 1'b0;
	     memsize_minus_bytecnt <= 32'h0;
         memaddr32_plus_bytecnt <= 32'h0;
      end
      else
      begin
         if (mem_desc == 1'b0 & m_req == 1'b1)
         begin
            mem_desc <= 1'b1 ; 
         end
         else if (ack_req == 1'b1 & mem_size == tx_bytecnt)
         begin
            mem_desc <= 1'b0 ; 
         end 
         if (mem_desc == 1'b0 & m_req == 1'b1)
         begin
            mem_new <= 1'b1 ; 
         end
         else if (desc_setup_cnt == 3'b011)
         begin
            mem_new <= 1'b0 ; 
         end 
		 
        // if (mem_desc == 1'b0 & m_req == 1'b1)
		if (mem_desc == 1'b0 & m_req == 1'b1 & (desc_setup_cnt==3'h4))   // gaurantee timing between m_ack_r and ack_req
         begin
            m_ack_r <= 1'b1 ; 
         end
         else
         begin
            m_ack_r <= 1'b0 ; 
         end

      if (desc_setup_cnt != 3'b100)
	desc_setup_cnt <= desc_setup_cnt + 3'h1;
      else if ((m_ack_r == 1'b1) || (ack_req == 1'b1))
	desc_setup_cnt <= 3'h0;


      rx_desc_r <= rx_desc;
      rx_req_r <= rx_req;
	       
	

		 
         if (m_ack_r == 1'b1)
         begin
            mem_addr64 <= m_addr[63:32] ; 
         end 
         mem_tgt_adr_lsb <= {1'b0, tx_bytecnt[2:0]} + mem_addr32[2:0] ; 
         if (m_ack_r == 1'b1)
         begin
            mem_tgt_adr <= m_tgt_adr ; 
         end
         else if (ack_req == 1'b1)
         begin
            mem_tgt_adr <= mem_tgt_adr + tx_bytecnt[12:3] + 
            mem_tgt_adr_lsb[3:3] ; 
         end 
         if (m_ack_r == 1'b1)
         begin
            mem_ctrl <= m_ctrl[15:0] ; 
            mem_pload <= m_ctrl[18:16] ; 
            mem_zero <= m_ctrl[20] ; 
            mem_ramp1 <= m_ctrl[21] ; 
            mem_ramp4 <= m_ctrl[22] ; 
         end 
		 
		 memaddr32_plus_bytecnt <= mem_addr32 + tx_bytecnt;  // pipe for performance
		 
         if (m_ack_r == 1'b1)
         begin
            mem_addr32 <= m_addr[31:0] ; 
         end
         else if (ack_req == 1'b1)
         begin
          //  mem_addr32 <= mem_addr32 + tx_bytecnt ;  
		      mem_addr32 <= memaddr32_plus_bytecnt;      
         end  
		 
		 memsize_minus_bytecnt <= mem_size - tx_bytecnt;    // pipe for performance
		 
         if (m_ack_r == 1'b1)
         begin
            mem_size <= m_size ; 
         end
         else if (ack_req == 1'b1)
         begin
          //  mem_size <= mem_size - tx_bytecnt ; 
			mem_size <= memsize_minus_bytecnt;  
         end
         if (m_ack_r == 1'b1)
         begin
         cpl_addr_n <= m_tgt_adr[RAM_SIZE-1:0];    // xhdl
         end
         else if (ack_req == 1'b1)
         begin 
            cpl_addr_n <= cpl_addr_n + tx_dwcnt_r[10:1] + cpl_addr_n_add_mem ; 
         end

      end 
   end 
   

   assign m_ack = m_ack_r ;
   assign m_pending = (mem_desc == 1'b1 | tx_busy == 1'b1 |tx_dfr_r == 1'b1 | cpl_busy0 == 1'b1 | cpl_busy1 
   == 1'b1 | cpl_busy2 == 1'b1 | cpl_busy3 == 1'b1) ? 1'b1 : 1'b0 ;
   // calculate parameters of next transfer according to PCI-Express rules
   // 1. max payload size differs from read to write

   assign maxreq_cfg = ((mem_ctrl[6]) == 1'b1) ? cfg_maxpload : 
   cfg_maxrdreq_i ;

   // 2. saturate payload size if small transfer requested
assign 	  maxreq_n = (mem_pload <= maxreq_cfg) ? mem_pload : maxreq_cfg;


   // 3. address used to calculate next transfer (force aligned on 128 bytes boundary)
   //128 KB
   //256 KB
   //512 KB
   //1 KB
   //2 KB
   // assign maxreqis_zero = (maxreq == 3'b000);  // unused.  xhdl
 
   assign addr_13lsb = (maxreq == 3'b000) ? mem_addr32_7lsb : ((maxreq == 3'b001) ? mem_addr32_8lsb :  ((maxreq == 3'b010) ? mem_addr32_9lsb : ((maxreq == 3'b011) ? mem_addr32_10lsb : ((maxreq == 3'b100) ? mem_addr32_11lsb : mem_addr32_12lsb ))));
					   
   // 4. byte count max of the transfer
   // 128 KB
   // 256 KB
   // 512 KB
   // 1 KB
   // 2 KB
   assign bytecnt_max = (maxreq == 3'b000) ? 13'b0000010000000 : (maxreq 
   == 3'b001) ? 13'b0000100000000 : (maxreq == 3'b010) ? 
   13'b0001000000000 : (maxreq == 3'b011) ? 13'b0010000000000 : (maxreq 
   == 3'b100) ? 13'b0100000000000 : 13'b1000000000000 ;
   // 5. substract for non-aligned addresses
   assign bytecnt_sub = bytecnt_max - addr_13lsb ;
   // 6. saturation of bytecount at the end of transfer
   assign bytecnt_sat = (mem_size >= bytecnt_sub_r) ? bytecnt_sub_r : 
   mem_size[12:0] ;

   // 7. pipeline for performance
   
   always @ (posedge hclk) begin        // 
       bytecnt_sub_r <= bytecnt_sub;
   end 
   
   
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         maxreq <= 3'b000 ; 
         tx_bytecnt <= {13{1'b0}} ; 
         tx_dwcnt_aft64B <= {11{1'b0}} ;
         tx_dwcnt_r <= {11{1'b0}};  
      end
      else
	begin
	  
		 
         maxreq <= maxreq_n ; 
        
		 tx_bytecnt <= bytecnt_sat ;  
		
         if (mem_addr32[5:2] == 4'b0000)
         begin
            tx_dwcnt_aft64B <= tx_dwcnt_r ; 
         end
         else if ((addr_aft64B[4]) == 1'b1)
         begin
            tx_dwcnt_aft64B <= tx_dwcnt_r + mem_addr32[5:2] - 5'b10000 ; 
         end
         else
         begin
            tx_dwcnt_aft64B <= tx_dwcnt_r ; 
         end
	tx_dwcnt_r <= tx_lastoffset[12:2] + tx_remain ;
      end 
   end 
   // 8. calculate the DW count and the First BE and Last BE
   assign tx_lastoffset = tx_bytecnt + mem_addr32[1:0] ;
   assign tx_remain = (tx_lastoffset[1:0] != 2'b00) ? 1 : 0 ;
   assign addr_aft64B = ({1'b0, tx_dwcnt_r[3:0]}) + ({1'b0, 
   mem_addr32[5:2]}) ;
   assign tx_fbe_n = (mem_addr32[1:0] == 2'b00) ? 4'b1111 : (mem_addr32[1:0] 
   == 2'b01) ? 4'b1110 : (mem_addr32[1:0] == 2'b10) ? 4'b1100 : 4'b1000 
   ;
   assign tx_lbe_n = (tx_lastoffset[1:0] == 2'b00) ? 4'b1111 : 
   (tx_lastoffset[1:0] == 2'b01) ? 4'b0001 : (tx_lastoffset[1:0] == 
   2'b10) ? 4'b0011 : 4'b0111 ;
   assign tx_fbe_d = (tx_dwcnt_r == 1) ? tx_fbe & tx_lbe : tx_fbe ;
   assign tx_lbe_d = (tx_dwcnt_r == 1) ? 4'b0000 : tx_lbe ;
   assign rcb = cfg_rcb ;
   // 9. check the completion buffer availability
   // calculate completion header credits and data credits for request
   // The Worst case for header credit requirment is one big transfer is broken
   // into smaller transfer based on Read Completion Boundary
   // 
   // An extra header is allocated in case the address and the length is such
   // that it crosses 64 Byte Cacheline boundary. When this happens, the completion
   // will also be broken up.
   assign cred_cpld_n = tx_dwcnt_r[10:0] ;
   assign xtr_cplh = (mem_addr32[5:2] == 4'b0000) ? 0 : ((addr_aft64B[4])
   == 1'b1) ? 1 : (tx_dwcnt_r[10:4] != 7'b0000000) ? 1 : 0 ;
   assign rem_cplh = (tx_dwcnt_aft64B[3:0] != 4'b0000 & rcb == 1'b0) ? 1 
   : (tx_dwcnt_aft64B[4:0] != 5'b00000 & rcb == 1'b1) ? 1 : 0 ;
   assign bas_cplh = (rcb == 1'b0) ? tx_dwcnt_aft64B[10:4] : {1'b0, 
   tx_dwcnt_aft64B[10:5]} ;
   assign cred_cplh_n = bas_cplh + rem_cplh + xtr_cplh ;

   //--------------------------------------------------------------------
   // Pipelining stage for performance purpose
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         cred_cpld <= {11{1'b0}} ; 
         cred_cplh <= {7{1'b0}} ; 
         tx_fbe <= {4{1'b0}} ; 
         tx_lbe <= {4{1'b0}} ; 
      end
      else
      begin
         cred_cpld <= cred_cpld_n ; 
         cred_cplh <= cred_cplh_n ; 
         tx_fbe <= tx_fbe_n ; 
         tx_lbe <= tx_lbe_n ; 
      end 
   end 
   // 10. Select the VC associated to the TC and the associated CPL credits
   assign vc_sel_n = (mem_ctrl[10:8] == 3'b000) ? cfg_tcvcmap[2:0] : 
   (mem_ctrl[10:8] == 3'b001) ? cfg_tcvcmap[5:3] : (mem_ctrl[10:8] == 
   3'b010) ? cfg_tcvcmap[8:6] : (mem_ctrl[10:8] == 3'b011) ? 
   cfg_tcvcmap[11:9] : (mem_ctrl[10:8] == 3'b100) ? cfg_tcvcmap[14:12] : 
   (mem_ctrl[10:8] == 3'b101) ? cfg_tcvcmap[17:15] : (mem_ctrl[10:8] == 
   3'b110) ? cfg_tcvcmap[20:18] : cfg_tcvcmap[23:21] ;

   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         vc_sel <= 3'b000 ; 
      end
      else
      begin
         vc_sel <= vc_sel_n ; 
      end 
   end 
   // only two VC are implemented in the design
   assign cur_cplh = (vc_sel == 3'b000) ? cur_cplh0 : cur_cplh1 ;
   assign cur_cpld = (vc_sel == 3'b000) ? cur_cpld0 : cur_cpld1 ;
   // 11. Enable the read request only enough completion buffer available
   //  required by PCI-Express for deadlock prevention in endpoint and root
   assign en_rdreq = (cur_cplh >= cred_cplh & cur_cpld >= cred_cpld) ? 
   1'b1 : 1'b0 ;
   assign en_npwr = (cur_cplh >= cred_cplh) ? 1'b1 : 1'b0 ;
   assign en_req = ((mem_ctrl[6]) == 1'b0) ? en_rdreq_r : (mem_ctrl[6:0] 
   == 7'b1000010 | mem_ctrl[6:1] == 6'b100010) ? en_npwr_r : 1'b1 ;
 

   // 12. generate the descriptor and the request
   
   always @ (posedge hclk) begin             // 
	   en_npwr_r <= en_npwr;   
	   en_rdreq_r <= en_rdreq;
   end
   
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         mem_req <= 1'b0 ; 
         mem_req_r <= 1'b0 ;
         tx_ack_r <= 1'b0;
         mst_tag_vld <= 1'b0;
         ack_req <= 1'b0 ; 
         tx_desc_mem <= {128{1'b0}} ; 
         vc_r <= 3'b000 ; 
         cplh_r <= {7{1'b0}} ; 
         cpld_r <= {11{1'b0}} ; 
         mst_tag <= {2{1'b0}} ; 
         dwcnt_msb <= 1'b0 ; 
         req_cpl_chnl <= 1'b0 ; 
         vc0_out <= 1'b0 ; 
         vc1_out <= 1'b0 ;
         en_req_r <= 1'b0; 
      end
      else
	begin
	en_req_r <= en_req;

         mem_req_r <= mem_req ; 
         if (req_sm == req_idle & mem_req == 1'b0 & mem_req_r == 1'b0 & 
         en_req_r == 1'b1 & mem_desc == 1'b1 & mem_new == 1'b0 & cpl_busy 
         == 1'b0 & desc_setup_cnt == 3'h4)
         begin
            mem_req     <= 1'b1 ;  
         end
         else if (tx_ack == 1'b1)
         begin
            mem_req     <= 1'b0 ;  
         end 
		 
		 //-------------------------------
		 
         if (req_sm == req_idle & mem_req == 1'b0 & mem_req_r == 1'b0 & 
         en_req_r == 1'b1 & mem_desc == 1'b1 & mem_new == 1'b0 & cpl_busy 
         == 1'b0 & desc_setup_cnt == 3'h4)
         begin
            ack_req <= 1'b1 ; 
         end
         else
         begin
            ack_req <= 1'b0 ; 
         end 
         if (mem_req == 1'b0)
         begin

           // tx_desc_mem[127:96] <= {mem_ctrl[7:0], 1'b0, mem_ctrl[10:8], 4'b0000, mem_ctrl[15:12], 2'b00, tx_dwcnt_r[9:0]} ;    // xhdl
		   // tx_desc_mem[95:64] <= {cfg_busdev, 3'b000, 6'b000000, mst_tag, tx_lbe_d, tx_fbe_d };                                // xhdl
		     
            tx_desc_mem[127:120] <= mem_ctrl[7:0] ;  
			tx_desc_mem[119]     <=  1'b0  ;
			tx_desc_mem[118:116] <= mem_ctrl[10:8] ;
			tx_desc_mem[115:112] <= 4'b0000 ;
			tx_desc_mem[111:108] <= mem_ctrl[15:12] ;
			tx_desc_mem[107:106] <= 2'b00  ;
			tx_desc_mem[105:96]  <= tx_dwcnt_r[9:0] ;
			
		    tx_desc_mem[95:83] <= cfg_busdev;
			tx_desc_mem[82:80] <=  3'b000;
			tx_desc_mem[79:74] <=  6'b000000;
			tx_desc_mem[73:72] <=  mst_tag;
			tx_desc_mem[71:68] <=  tx_lbe_d;
			tx_desc_mem[67:64] <=  tx_fbe_d;
			
            if ((mem_ctrl[5]) == 1'b0)
            begin
             //  tx_desc_mem[63:32] <= {mem_addr32[31:2], 2'b00} ;  //xhdl
			   tx_desc_mem[63:34] <= mem_addr32[31:2] ; 
			   tx_desc_mem[33:32] <=  2'b00 ; 
            end
            else
            begin
               tx_desc_mem[63:32] <= mem_addr64 ; 
            end 
          //  tx_desc_mem[31:0] <= {mem_addr32[31:2], 2'b00} ;   //xhdl 
		      tx_desc_mem[31:2] <= mem_addr32[31:2];            
			  tx_desc_mem[1:0]  <= 2'b00 ;          
         end
         if (mem_req == 1'b0)
         begin
            vc_r <= vc_sel ; 
            cplh_r <= cred_cplh ; 
            cpld_r <= cred_cpld ; 
            dwcnt_msb <= tx_dwcnt_r[10] ; 
	 end

        tx_ack_r <= tx_ack;
	if (req_sm == req_idle & (tx_ack_r == 1'b1 | mst_tag_vld == 1'b0))
	  begin
	  if (cpl_busy0 == 1'b0)
            mst_tag <= 2'b00;
	  else if (cpl_busy1 == 1'b0)
            mst_tag <= 2'b01;
	  else if (cpl_busy2 == 1'b0)
            mst_tag <= 2'b10;
	  else if (cpl_busy3 == 1'b0)
            mst_tag <= 2'b11;
	  end

	if (tx_ack_r == 1'b1)
	  mst_tag_vld <= ~cpl_busy_n;
	  

      if (mem_req == 1'b0)
         begin
            if ((mem_ctrl[6]) == 1'b0 | mem_ctrl[6:0] == 7'b1000010 | 
            mem_ctrl[6:1] == 6'b100010)
            begin
               req_cpl_chnl <= 1'b1 ; 
            end
            else
            begin
               req_cpl_chnl <= 1'b0 ; 
            end 
         end 
         if (mem_req == 1'b0 & tx_dfr_r == 1'b0 & tx_dv_r == 1'b0)
         begin
            if (vc_sel == 3'b000)
            begin
               vc0_out <= 1'b1 ; 
            end
            else
            begin
               vc0_out <= 1'b0 ; 
            end 
            if (vc_sel == 3'b001)
            begin
               vc1_out <= 1'b1 ; 
            end
            else
            begin
               vc1_out <= 1'b0 ; 
            end 
         end 
      end 
   end 
   assign tx_req = mem_req ;
   assign tx_desc = tx_desc_mem ;

   // 13. Set the allocated completion credits in the RX buffer
   //---------------------------------------------------------------------------
   // Header credit is allocated based on the maximum number of completions
   // that the Completer can possibly break up the Read Requset into. Once the
   // entire completion is done, all the credits are released. Each unit is 16 bytes
   // as in PCI Express spec
   //
   // Data credit is allocated based on the length in the Read request and
   // is calculated in units of DW. As completions are returned regardless how
   // it is broken up, the data credits are released based each completion's length
   //---------------------------------------------------------------------------
   
   assign ko_cpl_spc_vc0_19to8 = ko_cpl_spc_vc0[19:8];  //xhdl
   assign ko_cpl_spc_vc1_19to8 = ko_cpl_spc_vc1[19:8];  //xhdl
   
   
   always @ (posedge hclk) begin         // 
      // Set the max rd req size based on buffer allocation
      casex (ko_cpl_spc_vc0[7:0])
      8'b1xxx_xxxx: koh_cfg_maxrdreq <= 3'h5;
      8'b01xx_xxxx: koh_cfg_maxrdreq <= 3'h5;
      8'b001x_xxxx: koh_cfg_maxrdreq <= 3'h4;
      8'b0001_xxxx: koh_cfg_maxrdreq <= 3'h3;
      8'b0000_1xxx: koh_cfg_maxrdreq <= 3'h2;
      8'b0000_01xx: koh_cfg_maxrdreq <= 3'h1;
      default: koh_cfg_maxrdreq <= 3'h0;
      endcase

      // Set the max rd req size based on data buffer allocation
      casex (ko_cpl_spc_vc0[19:12])
      8'b1xxx_xxxx: kod_cfg_maxrdreq <= 3'h5;
      8'b01xx_xxxx: kod_cfg_maxrdreq <= 3'h5;
      8'b001x_xxxx: kod_cfg_maxrdreq <= 3'h5;
      8'b0001_xxxx: kod_cfg_maxrdreq <= 3'h5;
      8'b0000_1xxx: kod_cfg_maxrdreq <= 3'h4;
      8'b0000_01xx: kod_cfg_maxrdreq <= 3'h3;
      8'b0000_001x: kod_cfg_maxrdreq <= 3'h2;
      8'b0000_0001: kod_cfg_maxrdreq <= 3'h1;
      default: kod_cfg_maxrdreq <= 3'h0;
      endcase
      ko_cfg_maxrdreq <= (koh_cfg_maxrdreq > kod_cfg_maxrdreq) ?
			 kod_cfg_maxrdreq : koh_cfg_maxrdreq;
      // Set the max read request size depending on the cpl buffer allocated.
            cfg_maxrdreq_i <= (ko_cfg_maxrdreq > cfg_maxrdreq) ? cfg_maxrdreq : ko_cfg_maxrdreq;  
   end
   
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         init <= 1'b0 ; 
         cur_cplh0 <= {8{1'b0}} ; 
         cur_cpld0 <= {14{1'b0}} ; 
         cur_cplh1 <= {8{1'b0}} ; 
         cur_cpld1 <= {14{1'b0}} ;  
      end
      else
      begin
         init <= 1'b1 ; 
       
      if (init == 1'b0)
         begin
            cur_cplh0 <= ko_cpl_spc_vc0[7:0] ; 
          //  cur_cpld0 <= {ko_cpl_spc_vc0[19:8], 2'b00} ;    // xhdl
		    cur_cpld0 <= {ko_cpl_spc_vc0_19to8, 2'b00} ;
         end
         else if (tx_ack == 1'b1 & vc_r == 3'b000 & req_cpl_chnl == 1'b1)
         begin
            cur_cplh0 <= cur_cplh0 - cplh_r ; 
            cur_cpld0 <= cur_cpld0 - cpld_r ; 
         end
         else if (cpl_hdrcred_rls == 1'b1 & cpl_vc == 3'b000)
           begin
            cur_cpld0 <= cur_cpld0 + cpl_datacred ; 
            cur_cplh0 <= cur_cplh0 + cpl_hdrcred ; 
         end 
         if (init == 1'b0)
         begin
            cur_cplh1 <= ko_cpl_spc_vc1[7:0] ; 
          //  cur_cpld1 <= {ko_cpl_spc_vc1[19:8], 2'b00} ; 
		    cur_cpld1 <= {ko_cpl_spc_vc1_19to8, 2'b00} ; 
         end
         else if (tx_ack == 1'b1 & vc_r == 3'b001 & req_cpl_chnl == 1'b1)
         begin
            cur_cplh1 <= cur_cplh1 - cplh_r ; 
            cur_cpld1 <= cur_cpld1 - cpld_r ; 
         end
         else if (cpl_hdrcred_rls == 1'b1 & cpl_vc == 3'b001)
           begin
            cur_cpld1 <= cur_cpld1 + cpl_datacred ; 
            cur_cplh1 <= cur_cplh1 + cpl_hdrcred ; 
         end 
      end 
   end 
   // 14. Monitor completion channel
   // select one completion channel
   assign cpl_busy_n = (cpl_busy0 == 1'b1 & cpl_busy1 == 1'b1 & cpl_busy2 
   == 1'b1 & cpl_busy3 == 1'b1) ? 1'b1 : 1'b0 ;

   always @(negedge hrstn or posedge hclk)
      if (hrstn == 1'b0)
	cpl_busy <= 1'b0;
      else
	cpl_busy <= cpl_busy_n;

   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         tim1ms <= {17{1'b0}} ; 
         ev1ms <= 1'b0 ; 
      end
      else
      begin
         if (testtim == 1'b1 & tim1ms == 17'b00000000001000111)
         begin
            // test mode
            tim1ms <= {17{1'b0}} ; 
         end
         else if (tim1ms == 17'b11110100001000111)
         begin
            // 125000 clock cycle at 8 ns clock period
            tim1ms <= {17{1'b0}} ; 
         end
         else
         begin
            tim1ms <= tim1ms + 1 ; 
         end 
         if (testtim == 1'b1 & tim1ms == 17'b00000000001000111)
         begin
            // test mode
            tim1ms <= {17{1'b0}} ; 
         end
         else if (tim1ms == 17'b11110100001000111)
         begin
            // 125000 clock cycle at 8 ns clock period
            ev1ms <= 1'b1 ; 
         end
         else
         begin
            ev1ms <= 1'b0 ; 
         end 
      end 
   end 

   // 15. Completion logic per channel for pipelined request
   // Completion channel 0
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         cpl_busy0 <= 1'b0 ; 
         hdrcred0 <= {7{1'b0}} ; 
         datacred0 <= {11{1'b0}} ; 
         cpl_dwcnt0 <= {11{1'b0}} ; 
         cpl_addr0 <= {RAM_SIZE{1'b0}} ; 
         timer0 <= {4{1'b0}} ;
      rx_tag0_hit <= 1'b0;
      end
      else
	begin
	if (rx_tag[1:0] == 2'b00)
	  rx_tag0_hit <= 1'b1;
	else
	  rx_tag0_hit <= 1'b0;

         if (mst_tag == 2'b00 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_busy0 <= 1'b1 ; 
         end
         else if (cpl_dwcnt0 == 0 | timer0 == 0)
         begin
            cpl_busy0 <= 1'b0 ; 
         end 
         if (cpl_busy0 == 1'b0)
         begin
            timer0 <= 4'b1010 ; // 10 ms
         end
         else if (ev1ms == 1'b1)
         begin
            timer0 <= timer0 - 1 ; 
         end 
         if (mst_tag == 2'b00 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_dwcnt0 <= ({dwcnt_msb, tx_desc_mem[105:96]}) ; 
         end
         else if (rx_ack_r == 1'b1 & rx_tag0_hit == 1'b1)
         begin
         cpl_dwcnt0 <= cpl_dwcnt0 - rx_dwcnt;
         end 
         if (mst_tag == 2'b00 & ack_req == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_addr0 <= cpl_addr_n ; 
         end
         else if (rx_ack_r == 1'b1 & rx_tag0_hit == 1'b1)
         begin
           // cpl_addr0 <= cpl_addr0 + {1'b0,rx_dwcnt[RAM_SIZE-1:1]} ; 
		      cpl_addr0 <= cpl_addr0 +  rx_dwcnt_div2 ;                 //xhdl
         end 
         if (mst_tag == 2'b00 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            hdrcred0 <= cplh_r ;
            datacred0 <= cpld_r ; 
         end 
      end 
   end 

   // Completion channel 1
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         cpl_busy1 <= 1'b0 ; 
         timer1 <= {4{1'b0}} ; 
         hdrcred1 <= {7{1'b0}} ; 
         datacred1 <= {11{1'b0}} ; 
         cpl_dwcnt1 <= {11{1'b0}} ; 
         cpl_addr1 <= {RAM_SIZE{1'b0}} ;
      rx_tag1_hit <= 1'b0;
      end
      else
	begin
	if (rx_tag[1:0] == 2'b01)
	  rx_tag1_hit <= 1'b1;
	else
	  rx_tag1_hit <= 1'b0;

         if (mst_tag == 2'b01 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_busy1 <= 1'b1 ; 
         end
         else if (cpl_dwcnt1 == 0 | timer1 == 0)
         begin
            cpl_busy1 <= 1'b0 ; 
         end 
         if (cpl_busy1 == 1'b0)
         begin
            timer1 <= 4'b1010 ; // 10 ms
         end
         else if (ev1ms == 1'b1)
         begin
            timer1 <= timer1 - 1 ; 
         end 
         if (mst_tag == 2'b01 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_dwcnt1 <= ({dwcnt_msb, tx_desc_mem[105:96]}) ; 
         end
         else if (rx_ack_r == 1'b1 & rx_tag1_hit == 1'b1)
         begin
            cpl_dwcnt1 <= cpl_dwcnt1 - rx_dwcnt ; 
         end 
         if (mst_tag == 2'b01 & ack_req == 1'b1 & req_cpl_chnl == 
         1'b1)
           begin
            cpl_addr1 <= cpl_addr_n ; 
         end
         else if (rx_ack_r == 1'b1 & rx_tag1_hit == 1'b1)
           begin
          //  cpl_addr1 <= cpl_addr1 + {1'b0,rx_dwcnt[RAM_SIZE-1:1]} ; 
			  cpl_addr1 <= cpl_addr1 +  rx_dwcnt_div2;                       //xhdl
         end 
         if (mst_tag == 2'b01 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            hdrcred1 <= cplh_r ;
            datacred1 <= cpld_r ; 
         end 
      end 
   end 

   // Completion channel 2
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         cpl_busy2 <= 1'b0 ; 
         timer2 <= {4{1'b0}} ; 
         hdrcred2 <= {7{1'b0}} ; 
         datacred2 <= {11{1'b0}} ; 
         cpl_dwcnt2 <= {11{1'b0}} ; 
         cpl_addr2 <= {RAM_SIZE{1'b0}} ; 
      rx_tag2_hit <= 1'b0;
      end
      else
	begin
	if (rx_tag[1:0] == 2'b10)
	  rx_tag2_hit <= 1'b1;
	else
	  rx_tag2_hit <= 1'b0;

         if (mst_tag == 2'b10 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_busy2 <= 1'b1 ; 
         end
         else if (cpl_dwcnt2 == 0 | timer2 == 0)
         begin
            cpl_busy2 <= 1'b0 ; 
         end 
         if (cpl_busy2 == 1'b0)
         begin
            timer2 <= 4'b1010 ; // 10 ms
         end
         else if (ev1ms == 1'b1)
         begin
            timer2 <= timer2 - 1 ; 
         end 
         if (mst_tag == 2'b10 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_dwcnt2 <= ({dwcnt_msb, tx_desc_mem[105:96]}) ; 
         end
         else if (rx_ack_r == 1'b1 & rx_tag2_hit == 1'b1)
         begin
            cpl_dwcnt2 <= cpl_dwcnt2 - rx_dwcnt ; 
         end 
         if (mst_tag == 2'b10 & ack_req == 1'b1 & req_cpl_chnl == 
         1'b1)
           begin
           cpl_addr2 <= cpl_addr_n;
         end
         else if (rx_ack_r == 1'b1 & rx_tag2_hit == 1'b1)
           begin
            // cpl_addr2 <= cpl_addr2 + {1'b0,rx_dwcnt[RAM_SIZE-1:1]} ; 
			  cpl_addr2 <= cpl_addr2 + rx_dwcnt_div2;                  // xhdl
         end 
         if (mst_tag == 2'b10 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            hdrcred2 <= cplh_r ; 
            datacred2 <= cpld_r ; 
         end 
      end 
   end 

   // Completion channel 3
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         cpl_busy3 <= 1'b0 ; 
         timer3 <= {4{1'b0}} ; 
         hdrcred3 <= {7{1'b0}} ; 
         datacred3 <= {11{1'b0}} ; 
         cpl_dwcnt3 <= {11{1'b0}} ; 
         cpl_addr3 <= {RAM_SIZE{1'b0}} ; 
      rx_tag3_hit <= 1'b0;
      end
      else
	begin
	if (rx_tag[1:0] == 2'b11)
	  rx_tag3_hit <= 1'b1;
	else
	  rx_tag3_hit <= 1'b0;

         if (mst_tag == 2'b11 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_busy3 <= 1'b1 ; 
         end
         else if (cpl_dwcnt3 == 0 | timer3 == 0)
         begin
            cpl_busy3 <= 1'b0 ; 
         end 
         if (cpl_busy3 == 1'b0)
         begin
            timer3 <= 4'b1010 ; // 10 ms
         end
         else if (ev1ms == 1'b1)
         begin
            timer3 <= timer3 - 1 ; 
         end 
         if (mst_tag == 2'b11 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            cpl_dwcnt3 <= ({dwcnt_msb, tx_desc_mem[105:96]}) ; 
         end
         else if (rx_ack_r == 1'b1 & rx_tag3_hit == 1'b1)
         begin
            cpl_dwcnt3 <= cpl_dwcnt3 - rx_dwcnt ; 
         end 
	if (mst_tag == 2'b11 & ack_req == 1'b1 & req_cpl_chnl == 
         1'b1)
           begin
           cpl_addr3 <= cpl_addr_n;
         end
         else if (rx_ack_r == 1'b1 & rx_tag3_hit == 1'b1)
           begin
          // cpl_addr3 <= cpl_addr3 + {1'b0,rx_dwcnt[RAM_SIZE-1:1]} ; 
			 cpl_addr3 <= cpl_addr3 + rx_dwcnt_div2;                // xhdl
         end 
         if (mst_tag == 2'b11 & tx_ack == 1'b1 & req_cpl_chnl == 
         1'b1)
         begin
            hdrcred3 <= cplh_r ; 
            datacred3 <= cpld_r ; 
         end 
      end 
   end 
   // 16. calculate the 'rx_cpld' and 'rx_cplh' of received completion     
   assign rx_tag = rx_desc_r[47:40] ;
   assign rx_addr7 = rx_desc_r[38:32] ;
   assign rx_dwcnt[10] = (rx_desc_r[105:96] == 10'b0000000000 & 
   (rx_desc_r[126]) == 1'b1) ? 1'b1 : 1'b0 ;
   assign rx_dwcnt[9:0] = rx_desc_r[105:96] ;
   assign rx_tc = rx_desc_r[118:116] ;
   assign rx_bytecnt[12] = (rx_desc_r[75:64] == 12'b000000000000) ? 1'b1 : 
   1'b0 ;
   assign rx_bytecnt[11:0] = rx_desc_r[75:64] ;
   assign rem_rxcpld = (rx_dwcnt[1:0] != 2'b00) ? 1 : 0 ;
   assign rx_cpld_n = rx_dwcnt[10:2] + rem_rxcpld ;
   assign rem_rxcplh = (rx_dwcnt[3:0] != 4'b0000 & rcb == 1'b0) ? 1 : 
   (rx_dwcnt[4:0] != 5'b00000 & rcb == 1'b1) ? 1 : 0 ;
   assign bas_rxcplh = (rcb == 1'b0) ? rx_dwcnt[10:4] : {1'b0, 
   rx_dwcnt[10:5]} ;
   assign rx_cplh_n = bas_rxcplh + rem_rxcplh ;
   
   
  // assign rx_addr_dw = ({6'b000000, rx_desc_r[38:34]}) ; // xhdl
  
   assign rx_desc_r_38to34 = rx_desc_r[38:34];             // xhdl
   assign rx_addr_dw = ({6'b000000, rx_desc_r_38to34}) ;   // xhdl
   
   assign rx_addrend_n = rx_dwcnt + rx_addr_dw ;

   // pipelining stage
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         rx_cplh <= {7{1'b0}} ; 
         rx_cpld <= {11{1'b0}} ; 
         rx_addrend <= {6{1'b0}} ; 
         cpl_vc <= 3'b000 ; 
      end
      else
      begin
         rx_cplh <= rx_cplh_n ; 
         rx_cpld <= rx_dwcnt[10:0] ; 
         cpl_vc <= cpl_vc_n ; 
         rx_addrend <= rx_addrend_n[10:5] ; 
      end 
   end 
   assign cpl_vc_n = (rx_tc == 3'b000) ? cfg_tcvcmap[2:0] : (rx_tc == 
   3'b001) ? cfg_tcvcmap[5:3] : (rx_tc == 3'b010) ? cfg_tcvcmap[8:6] : 
   (rx_tc == 3'b011) ? cfg_tcvcmap[11:9] : (rx_tc == 3'b100) ? 
   cfg_tcvcmap[14:12] : (rx_tc == 3'b101) ? cfg_tcvcmap[17:15] : (rx_tc 
   == 3'b110) ? cfg_tcvcmap[20:18] : cfg_tcvcmap[23:21] ;

   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         cpl_sel <= 4'b0000 ; 
         cpl_ack <= 1'b0 ; 
         cpl_ack_r <= 1'b0 ; 
         cpl_hdrcred_rls <= 1'b0 ;
      end
      else
      begin
         if (cpl_sm == cpl_rxdesc)
         begin
            if (rx_tag0_hit == 1'b1)
            begin
               cpl_sel <= 4'b0001 ;
            end
            else if (rx_tag1_hit == 1'b1)
            begin
               cpl_sel <= 4'b0010 ;
            end
            else if (rx_tag2_hit == 1'b1)
            begin
               cpl_sel <= 4'b0100 ;
            end
            else if (rx_tag3_hit == 1'b1)
            begin
               cpl_sel <= 4'b1000 ;
            end
            else
            begin
               cpl_sel <= 4'b0000 ; 
            end 
         end 
         if (rx_ack_r == 1'b1 & is_rxmst == 1'b1)
         begin
            cpl_ack <= 1'b1 ; 
         end
         else
         begin
            cpl_ack <= 1'b0 ; 
            cpl_ack_r <= cpl_ack ; 
            // release header credit when all completions are received
         //   if (cpl_ack_r & ((cpl_sel & ~({cpl_busy3, cpl_busy2, cpl_busy1, cpl_busy0})) != 4'b0000)) 
			if (cpl_ack_r & |(cpl_sel & ~({cpl_busy3, cpl_busy2, cpl_busy1, cpl_busy0})))begin  // XHDL
               cpl_hdrcred_rls <= 1'b1 ; 
            end
            else
            begin
               cpl_hdrcred_rls <= 1'b0 ; 
            end 
         end 
      end 
   end 
   assign cpl_hdrcred = ((cpl_sel[0]) == 1'b1) ? hdrcred0 : ((cpl_sel[1])
   == 1'b1) ? hdrcred1 : ((cpl_sel[2]) == 1'b1) ? hdrcred2 : ((cpl_sel[3]
   ) == 1'b1) ? hdrcred3 : {7{1'b0}} ;
   assign cpl_datacred = ((cpl_sel[0]) == 1'b1) ? datacred0 : 
   ((cpl_sel[1]) == 1'b1) ? datacred1 : ((cpl_sel[2]) == 1'b1) ? 
   datacred2 : ((cpl_sel[3]) == 1'b1) ? datacred3 : {11{1'b0}} ;
   //---------------------------------------------------------------------
   // Transmit state machine
   assign is_txwr = ((tx_desc_mem[126]) == 1'b1) ? 1'b1 : 1'b0 ;

   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         req_sm <= req_idle ; 
         tx_dfr_r <= 1'b0 ; 
         tx_dv_r <= 1'b0 ; 
         txaddr <= {13{1'b0}} ; 
         need_to_fetch <= 1'b0 ; 
         pipe_fill <= 1'b0 ; 
         fetch_cnt <= {2{1'b0}} ; 
         phasecnt <= {10{1'b0}} ; 
         tx_data_r <= {64{1'b0}} ; 
         fetch_first <= 1'b0 ; 
         mem_first <= 1'b0 ; 
		 mem_first_del <= 1'b0;
         data_ramp15 <= {15{1'b0}} ;   
         nextcyc_is_last <= 1'b0;
         last_datacyc    <= 1'b0;
      end
      else
      begin
	     
		 mem_first_del <= mem_first; 
		 
         case (req_sm)
            req_idle :
                     begin
                        if (mem_req == 1'b0 & mem_req_r == 1'b0 & 
                        mem_new == 1'b0 & en_req_r == 1'b1 & mem_desc == 
                        1'b1 & cpl_busy == 1'b0 & desc_setup_cnt == 3'h4)
                        begin
                           req_sm <= req_txdesc ; 
                        end 
                     end
            req_txdesc :
                     begin
                        if (tx_ack == 1'b1)
                        begin
                           if (is_txwr == 1'b0 | ((rd_out == 1'b1 & 
                           phasecnt == 0) ))
                           begin
                              req_sm <= req_idle ; 
                           end
                           else
                           begin
                              req_sm <= req_txdata ; 
                           end 
                        end 
                     end
            req_txdata :
                     begin
                        if ((tx_ws==1'b0) & (phasecnt == 0))    // xhdl
                        begin
                           req_sm <= req_idle ; 
                        end 
                     end
         endcase 
         // Memorize read request parameter
         // When DMA is setup, target address is latch
         // The address is incremented for the rest of the DMA
         if (mem_new == 1'b1 & desc_setup_cnt == 3'h2)
         begin
            txaddr <= mem_tgt_adr ; 
            need_to_fetch <= 1'b1 ; 
            fetch_cnt <= 2'b00 ; 
         end 
	     else if (need_to_fetch == 1'b1 & fetch_cnt != 2'b10)  
         begin
            txaddr <= txaddr + 1 ; 
            fetch_cnt <= fetch_cnt + 1 ; 
         end
         else if (need_to_fetch == 1'b1)
         begin
            // pipe is filled
            txaddr <= txaddr + 1 ; 
            need_to_fetch <= 1'b0 ; 
         end 
	      else if ((~tx_ws & tx_dv_r) | fetch_tx_data ) 
         begin
            // increment address when data is consumed
            txaddr <= txaddr + 1 ; 
         end 
         if (req_sm == req_idle & mem_req == 1'b0 & mem_req_r == 1'b0 & 
         mem_new == 1'b0 & en_req_r == 1'b1 & mem_desc == 1'b1 & cpl_busy 
         == 1'b0 & desc_setup_cnt == 3'h4 & is_txwr == 1'b1)
         begin
            fetch_first <= 1'b1 ; 
         end
         else
         begin
            fetch_first <= 1'b0 ; 
         end 
         mem_first <= fetch_first ; 
		 
		 //flags for external use
		 nextcyc_is_last <= (((rd_allow==1'b1) & (phasecnt == 2)) | ((rd_allow==1'b0) & (phasecnt == 1))) ? 1'b1 : 1'b0;  // xhdl
         last_datacyc    <= (((rd_allow==1'b1) & (phasecnt == 1)) | (phasecnt == 0)) ? 1'b1 : 1'b0;                       // xhdl
		 
         if (req_sm == req_idle & mem_req == 1'b0 & mem_req_r == 1'b0 & 
         mem_new == 1'b0 & en_req_r == 1'b1 & mem_desc == 1'b1 & desc_setup_cnt == 3'h4 & cpl_busy 
         == 1'b0 & is_txwr == 1'b1)
         begin
            // phasecnt includes prefetch from memory
			// Pop this many times per completion
            if ((tx_dwcnt_r[0]) == 1'b1 | (mem_addr32[2]) == 1'b1)
            begin
               phasecnt <= tx_dwcnt_r[10:1] + 1; 
            end
            else
            begin
               phasecnt <= tx_dwcnt_r[10:1]; 
            end 
         end 
		 else if (rd_allow)   
         begin
            phasecnt <= phasecnt - 1 ;  
         end 
		 

		 
		 
         if (req_sm == req_txdesc & (tx_desc_mem[126]) == 1'b1)
         begin
            //  tx_dfr is deasserted before the last data phase
            if ((phasecnt == 1 & (mem_first == 1'b1 | rd_out == 1'b1)) | 
            phasecnt == 0)
            begin
               tx_dfr_r <= 1'b0 ; 
            end
            else
            begin
               tx_dfr_r <= 1'b1 ; 
            end 
         end
         else if (rd_out == 1'b1 & (phasecnt == 1 | phasecnt == 0))
         begin
            tx_dfr_r <= 1'b0 ; 
         end 
         if (req_sm == req_txdesc)
         begin
            if (((tx_ws == 1'b0) | (fetch_tx_data==1'b1)) & (phasecnt == 0))   //xhdl
            begin
               tx_dv_r <= 1'b0 ; 
            end
            else if (tx_dfr_r == 1'b1)
            begin
               tx_dv_r <= 1'b1 ; 
            end 
         end
         else if (req_sm == req_txdata)
         begin
            if (((tx_ws == 1'b0) | (fetch_tx_data==1'b1)) & (phasecnt == 0))  //xhdl
            begin
               tx_dv_r <= 1'b0 ; 
            end
            else
            begin
               tx_dv_r <= tx_dv_r;    
            end 
         end
         else if (req_sm == req_idle)
         begin
            tx_dv_r <= 1'b0 ; 
         end 
         if (mem_desc == 1'b0 & tx_dfr_r == 1'b0)
         begin
            data_ramp15 <= 15'b000000000000000 ; 
         end
         else if ((rd_allow == 1'b1 ) & tx_dfr_r == 1'b1)
         begin
            data_ramp15 <= data_ramp15 + 1 ; 
         end 
         if (mem_zero == 1'b1)
         begin
            tx_data_r <= {64{1'b0}} ; 
         end
         else if (mem_ramp1 == 1'b1 | mem_ramp4 == 1'b1)
         begin
            tx_data_r <= data_ramp ; 
         end
         else if (rdclken == 1'b1)
         begin
            tx_data_r <= dataout ; 
         end 
      end 
   end 
 
 
   assign  data_ramp15_4to0 = data_ramp15[4:0];                              // xhdl

   // xhdl
   assign data_ramp_memramp1 = {data_ramp15_4to0, 3'b111,       
   data_ramp15_4to0, 3'b110, data_ramp15_4to0, 3'b101, data_ramp15_4to0, 
   3'b100, data_ramp15_4to0, 3'b011, data_ramp15_4to0, 3'b010, 
   data_ramp15_4to0, 3'b001, data_ramp15_4to0, 3'b000};
   
   assign data_ramp_memramp2 = {16'b0000000000000000, data_ramp15, 1'b1, 16'b0000000000000000, 
   data_ramp15, 1'b0} ;
   
   assign data_ramp = (mem_ramp1 == 1'b1) ? data_ramp_memramp1 : data_ramp_memramp2;
   
   
   
   assign tx_busy = (req_sm == req_idle) ? 1'b0 : 1'b1 ;
   assign tx_dv = tx_dv_r ;
   assign tx_dfr = tx_dfr_r ;
   assign tx_err = 1'b0 ;
   assign tx_adrd = txaddr ;

   // for DPRAM uninitialized value
   always @(tx_data_r)
   begin : xhdl_73
      integer i;    
      begin : xhdl_72
      //   integer i;   //xhdl
         for(i = 0; i <= 63; i = i + 1)
         begin
            if ((tx_data_r[i]) == 1'b1)
            begin
               tx_data[i] <= 1'b1 ; 
            end
            else
            begin
               tx_data[i] <= 1'b0 ; 
            end 
         end
      end 
   end 
   // data phase acknowledge
      assign rd_out = (tx_dv_r & ~tx_ws) ;  
   // read operation on DPRAM (including fetch)
  // assign rd_allow = mem_first | rd_out ;
   assign rd_allow = mem_first | rd_out | fetch_tx_data;    
  
   // read clock enable is used to stall the datapath pipeline
 //  assign rdclken = (need_to_fetch == 1'b1 | (tx_dv_r == 1'b1 & tx_ws == 
 //  1'b0)) ? 1'b1 : 1'b0 ; 

   assign rdclken = (need_to_fetch | (~tx_ws  & tx_dv_r)  | fetch_tx_data) ? 1'b1 : 1'b0 ;                      
   
   //-------------------------------------------------------------------
   // 17. receive state machine
   assign is_rxwr = ((rx_desc_r[126]) == 1'b1) ? 1'b1 : 1'b0 ;
   assign is_rxmst = (rx_desc_r[125:121] == 5'b00101) ? 1'b1 : 1'b0 ;
   assign is_rxdval = (rx_desc_r[127:126] == 2'b01) ? 1'b1 : 1'b0 ;
   
   always @ (posedge hclk) begin  // 
       rx_ack_rr <= rx_ack_r;
   end

   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         cpl_sm <= cpl_idl ; 
         rx_ack_r <= 1'b0 ; 
         rx_ws <= 1'b0 ;  
      end
      else
	begin
	
         case (cpl_sm)
            cpl_idl :
                     begin  
                        if (rx_req_r == 1'b1)
                        begin 
                           if (is_rxmst == 1'b1)
                           begin
                              cpl_sm <= cpl_rxdesc ; 
                              rx_ack_r <= 1'b1 ; 
                              rx_ws <= 1'b1 ;  
                           end 
                        end 
                     end
            cpl_rxdesc :
			         // app receives rx_ack_r and deasserts rx_ws
					 // in this state for one clock
                     begin
                        rx_ack_r <= 1'b0 ; 
                        rx_ws <= 1'b0 ; 
                        if (is_rxdval == 1'b1)
                        begin
                           cpl_sm <= cpl_rxdata ; 
                        end
                        else
                        begin
                           cpl_sm <= cpl_idl ; 
                        end 
                     end
            cpl_rxdata :
                     begin
                       // if (rx_dfr == 1'b0 & rx_dv == 1'b1)
					   if (rx_st_eop)
                        begin
                           cpl_sm <= cpl_idl ; 
                           rx_ws <= 1'b0 ; 
                        end 
                     end
         endcase 
      end 
   end 
   assign rx_ack = rx_ack_r ;
   assign rx_busy = (cpl_sm == cpl_idl) ? 1'b0 : 1'b1 ;

   //-------------------------------------------------------------
   // target write control for DPRAM interface                      
   //-------------------------------------------------------------
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         rx_addr <= {RAM_SIZE{1'b0}} ; 
         rx_wr <= 1'b0 ; 
      end
      else
      begin
         if (cpl_sm == cpl_idl)
         begin
            rx_wr <= is_rxwr ; 
         end 
         // latch target memory address on DMA setup
         if (cpl_sm  == cpl_rxdesc)
           begin
	   // rx_tag_hit are one-hot
	   if (rx_tag0_hit == 1'b1)
             rx_addr <= cpl_addr0;
	   if (rx_tag1_hit == 1'b1)
             rx_addr <= cpl_addr1;
	   if (rx_tag2_hit == 1'b1)
             rx_addr <= cpl_addr2;
	   if (rx_tag3_hit == 1'b1)
             rx_addr <= cpl_addr3;
         end
         else if (wr_allow == 1'b1)
         begin
            rx_addr <=rx_addr + 1 ; 
         end 
      end 
   end 
   
   
   assign rx_adwr =  rx_addr;    
   assign wr_allow = (rx_wr == 1'b1 & cpl_sm == cpl_rxdata & rx_dv ==   // brought rx_ws up one cycle
   1'b1) ? 1'b1 : 1'b0 ;
//   assign wr_allow = (rx_wr == 1'b1 & !cpl_sm == cpl_idl & rx_dv == 
///   1'b1) ? 1'b1 : 1'b0 ;                                       
   assign datain = rx_data ;
   assign wren = (wr_allow == 1'b1) ? rx_be : 8'b00000000 ;

 
   // dprambe write port pipeline stages
   // for performance.
   
   always @(posedge hclk or negedge hrstn) begin
       if (~hrstn) begin
           rx_adwr_del  <= {RAM_SIZE{1'b0}};
           datain_del   <= 64'h0;
           wren_del     <= 8'h0;
           rx_adwr_del2 <= {RAM_SIZE{1'b0}};
           datain_del2  <= 64'h0;
           wren_del2    <= 8'h0;
           rx_adwr_del3 <= {RAM_SIZE{1'b0}};
           datain_del3  <= 64'h0;
           wren_del3    <= 8'h0;
       end
       else begin
           rx_adwr_del2 <= rx_adwr;
           datain_del2  <= datain;
           wren_del2    <= wren;
           rx_adwr_del  <= rx_adwr_del3;
           datain_del   <= datain_del3;
           wren_del     <= wren_del3;
           rx_adwr_del3 <= rx_adwr_del2;
           datain_del3  <= datain_del2;
           wren_del3    <= wren_del2;
       end
   end

   altpcierd_dprambe #(RAM_SIZE, 64, 8) dprambe(
      .wrclk(hclk), 
      .wraddr(rx_adwr_del[RAM_SIZE - 1:0]), 
      .wrdata(datain_del), 
      .wrbe(wren_del), 
      .rdclk(hclk), 
      .rdclken(rdclken), 
      .rdaddr(tx_adrd[RAM_SIZE - 1:0]), 
      .rddata(dataout)
   ); 
 
   // pending transaction
   always @(posedge hclk or negedge hrstn)
   begin
      if (hrstn == 1'b0)
      begin
         trans_pending <= 1'b0 ; 
      end
      else
      begin
         if (cpl_busy0 == 1'b1 | cpl_busy1 == 1'b1 | cpl_busy2 == 1'b1 | 
         cpl_busy3 == 1'b1)
         begin
            trans_pending <= 1'b1 ; 
         end
         else
         begin
            trans_pending <= 1'b0 ; 
         end 
      end 
   end 
   // error check
   assign err_tim0 = (cpl_busy0 == 1'b1 & timer0 == 0) ? 1'b1 : 1'b0 ;
   assign err_tim1 = (cpl_busy1 == 1'b1 & timer1 == 0) ? 1'b1 : 1'b0 ;
   assign err_tim2 = (cpl_busy2 == 1'b1 & timer2 == 0) ? 1'b1 : 1'b0 ;
   assign err_tim3 = (cpl_busy3 == 1'b1 & timer3 == 0) ? 1'b1 : 1'b0 ;

   always @(posedge hclk or negedge hrstn)
   begin
      if (hrstn == 1'b0)
      begin
         err_cpltim <= 1'b0 ; 
         err_cplunexp <= 1'b0 ; 
      end
      else
      begin
         err_cpltim <= err_tim0 | err_tim1 | err_tim2 | err_tim3 ; 
         if (rx_ack_rr == 1'b1 & cpl_sel == 4'b0000)
         begin
            err_cplunexp <= 1'b1 ; 
         end
         else
         begin
            err_cplunexp <= 1'b0 ; 
         end 
      end 
   end 
endmodule
