// /**
// *
// *  This Verilog HDL File is used for simulation and synthesis in the simple DMA design example.  
// *  This contains the DMA engine configuration registers and buffer memory for target transactions.
// *
// */
// synthesis verilog_input_version verilog_2001
`timescale 1 ps / 1 ps 
//-----------------------------------------------------------------------------
// Title         : PCI Express Reference Design Slave Entity
// Project       : PCI Express MegaCore function
//-----------------------------------------------------------------------------
// File          : altpcierd_slave.v
// Author        : Altera Corporation
//-----------------------------------------------------------------------------
// Description :
// This is an entity that handles target transactions from the PCI Express interface
//-----------------------------------------------------------------------------
// Copyright (c) 2005-2006 Altera Corporation. All rights reserved.  Altera products are
// protected under numerous U.S. and foreign patents, maskwork rights, copyrights and
// other intellectual property laws.  
//
// This reference design file, and your use thereof, is subject to and governed by
// the terms and conditions of the applicable Altera Reference Design License Agreement.
// By using this reference design file, you indicate your acceptance of such terms and
// conditions between you and Altera Corporation.  In the event that you do not agree with
// such terms and conditions, you may not use the reference design file. Please promptly
// destroy any copies you have made.
//
// This reference design file being provided on an "as-is" basis and as an accommodation 
// and therefore all warranties, representations or guarantees of any kind 
// (whether express, implied or statutory) including, without limitation, warranties of 
// merchantability, non-infringement, or fitness for a particular purpose, are 
// specifically disclaimed.  By making this reference design file available, Altera
// expressly does not recommend, suggest or require that this reference design file be
// used in combination with any other product not provided by Altera. 
//-----------------------------------------------------------------------------
module altpcierd_slave (hclk, hrstn, cfg_busdev, cfg_maxpload, 
cfg_tcvcmap, m_req, m_size, m_addr, m_ctrl, m_tgt_adr, m_ack, tx_intreq, 
pm_event, err_cplabort, pm_event_pb, m_pending, vc0_out, vc1_out, 
rx_busy, tx_busy, rx_req, rx_ack, rx_desc, rx_ws, rx_data, rx_be, rx_dv, 
rx_dfr, tx_req, tx_desc, tx_ack, tx_dfr, tx_data, tx_dv, tx_err, tx_ws, 
app_msi_num, app_msi_tc,
fetch_tx_data, nextcyc_is_last, last_datacyc,
avalon_tx_data, rx_st_eop );

   parameter RAM_SIZE  = 8;
   input hclk; 
   input hrstn; 
   input[12:0] cfg_busdev; 
   input[2:0] cfg_maxpload; 
   input[23:0] cfg_tcvcmap; 
   output m_req; 
   wire m_req;
   output[31:0] m_size; 
   wire[31:0] m_size;
   output[63:0] m_addr; 
   wire[63:0] m_addr;
   output[31:0] m_ctrl; 
   wire[31:0] m_ctrl;
   output[12:0] m_tgt_adr; 
   wire[12:0] m_tgt_adr;
   input m_ack; 
   output tx_intreq; 
   wire tx_intreq;
   output pm_event; 
   reg pm_event;
   output err_cplabort; 
   reg err_cplabort;
   input pm_event_pb; // PM event push button
   input m_pending; // Indicates that a DMA is pending
   output vc0_out; 
   reg vc0_out;
   output vc1_out; 
   reg vc1_out;
   output rx_busy; 
   wire rx_busy;
   output tx_busy; 
   wire tx_busy;
   input rx_req; 
   output rx_ack; 
   reg rx_ack;
   input[135:0] rx_desc; 
   output rx_ws; 
   input[63:0] rx_data; 
   input[7:0] rx_be; 
   input rx_dv; 
   input rx_dfr; 
   output tx_req; 
   reg tx_req;
   output[127:0] tx_desc; 
   wire[127:0] tx_desc;
   input tx_ack; 
   output tx_dfr; 
   wire tx_dfr;
   output[63:0] tx_data; 
   reg[63:0] tx_data;
   output tx_dv; 
   wire tx_dv;
   output tx_err; 
   wire tx_err;
   input tx_ws;
   input rx_st_eop;
   
   input fetch_tx_data;     // external control to force a data read for tx
    
   
   output [4:0] app_msi_num;
   reg    [4:0] app_msi_num;
   output [2:0] app_msi_tc;
   reg    [2:0] app_msi_tc;
   
   output nextcyc_is_last;  // indicates that next data cycle is last for tx pkt (last dfr cycle)
   output last_datacyc;     // indicates that this data cycle is last for tx pkt (last dv cycle)
 
   // streaming interface 
   output[63:0] avalon_tx_data;
   wire[63:0]   avalon_tx_data; 

   parameter[1:0] bk_idl = 0; 
   parameter[1:0] bk_tgtack = 1; 
   parameter[1:0] bk_tgtdata = 2; 
   reg[1:0] bk_sm;
reg 	    bk_tgtack_r;
reg 	    bk_tgtack_rr;
reg 	    bk_tgtack_rrr;
reg 	    bk_tgtack_rrrr;
   parameter[3:0] tr_idle = 0; 
   parameter[3:0] tr_reload0 = 1; 
   parameter[3:0] tr_reqcpl = 2; 
   parameter[3:0] tr_reqdata = 3; 
   parameter[3:0] tr_busy = 4;
   parameter[3:0] tr_reload1 = 5;
   parameter[3:0] tr_reload2 = 6;
   parameter[3:0] tr_idle0 = 7;
   parameter[3:0] tr_idle1 = 8; 
   reg[3:0] tgtrd_sm; 
   wire is_tgt; 
   wire is_wr; 
   wire is_dval; 
   wire[9:0] r_len; 
   wire[31:0] r_ad32; 
   reg[11:0] rx_wrad; 
   reg rx_wr; 
   wire wr_allow; 
   wire[63:0] datain; 
   wire[63:0] dataout; 
   wire[11:0] tgt_adwr; 
   wire[11:0] tgt_adrd; 
   wire[7:0] wren; 
   wire[7:0] rx_bv; 
   wire[3:0] r_fbe; 
   wire[3:0] r_lbe; 
   reg[31:0] adr_mem; 
   reg[31:0] m_size_r; 
   reg[31:0] m_ctrl_r; 
   reg[63:0] m_addr_r; 
   reg[12:0] m_tgt_adr_r;
reg 	     tgt_adr_eq_0;
reg 	     tgt_adr_eq_1;
reg 	     tgt_adr_eq_2;
   reg tx_intreq_r; 
   reg cpl_abort_r; 
   reg[1:0] pm_event_pb_r; 
   reg[9:0] phasecnt; 
   reg[11:0] tgtrdaddr; 
   reg[1:0] fetch_cnt; 
   reg need_to_fetch; 
   wire rdclken; 
   reg[127:64] desc_mem; 
   reg fetch_first; 
   reg mem_first; 
   reg mem_first_del;
   reg tx_dv_r; 
   wire rd_allow; 
   wire rd_out; 
   reg tx_dfr_r; 
   wire[1:0] adr_lsb; 
   reg[63:0] tx_data_r; 
   reg[8:0] tgt_adrd9_r; 
   wire[10:0] t_len; 
   wire[2:0] b_lbe; 
   wire[2:0] b_fbe;
   reg[10:0] t_len_r; 
   reg[2:0] b_lbe_r; 
   reg[2:0] b_fbe_r;
   reg[12:0] t_len_rr; 
   reg[12:0] bytecount; 
   reg dpram_sel; 
   reg reg_sel; 
   reg m_req_r; 
   wire cpl_abort; 
   reg pm_event_mem; 
   reg[9:0] pm_cnt; 
   wire[2:0] cpl_vc; 
   wire[12:0] adb_mask; 
   wire[12:0] bc2adb_n; 
   reg[12:0] next_bytecount_r; 
   wire[12:0] next_bytecount; 
   reg[10:0] next_dwcount_r; 
   wire[12:0] lastoffset; 
   wire remain; 
   wire[12:0] adr_msk;
   reg[12:0] adr_msk_r; 

   wire [7:0] rx_be_i;
   reg [7:0] rx_be_1st;
reg 	     rx_1st_dataphase;
reg 	     rx_ws_i; 
   reg       nextcyc_is_last;    
   reg       last_datacyc; 
   reg       rx_req_del;
   
   // support for streaming interface
   reg       desc_phase2;
    
   assign avalon_tx_data = tx_data;	
   
   assign is_wr = ((rx_desc[126]) == 1'b1) ? 1'b1 : 1'b0 ;
   assign is_tgt = (rx_desc[124:122] == 3'b000) ? 1'b1 : 1'b0 ;
   assign is_dval = (rx_desc[127:126] == 2'b01) ? 1'b1 : 1'b0 ;
   assign rx_ws = rx_ws_i;
 
   
   
//--------------------------------------------------
// Figure out Byte enable
//--------------------------------------------------
   assign rx_be_i = (~rx_wr) ? 8'h00 :
	  (rx_1st_dataphase) ? rx_be_1st : 8'hFF;

   always @(negedge hrstn or posedge hclk)
     begin
      if (hrstn == 1'b0)
	     begin
	        rx_1st_dataphase <= 1'b0;
	        rx_be_1st <= 8'h00;
			rx_req_del <= 1'b0;
	     end
         else
	begin
    rx_req_del <= rx_req;
 
	if (wr_allow)
	  rx_1st_dataphase <= 1'b0;
	else if (rx_req)
	  rx_1st_dataphase <= 1'b1;
  
	if (rx_req)
	  begin
	  if (((rx_desc[125] == 1'b0) && (rx_desc[34] == 1'b1)) || // 3DW
	      ((rx_desc[125] == 1'b1) && (rx_desc[2] == 1'b1))) // 4DW
	    rx_be_1st <= 8'hf0;
	  else if (rx_desc[105:96] == 10'h1) // 1DW only
	    rx_be_1st <= 8'h0F;
	  else
	    rx_be_1st <= 8'hFF;
	  end
	end
     end

   always @ (posedge hclk) begin    // 
	   bk_tgtack_rr <= bk_tgtack_r;
	   bk_tgtack_rrr <= bk_tgtack_rr;
	   bk_tgtack_rrrr <= bk_tgtack_rrr;
   end
   
   
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         bk_sm <= bk_idl ; 
         rx_ack <= 1'b0 ; 
         rx_ws_i <= 1'b0 ; 
      dpram_sel <= 1'b0;
      reg_sel <= 1'b0;
      bk_tgtack_r <= 1'b0;
      end
      else
	begin

         case (bk_sm)
            bk_idl :
                     begin
                        if (rx_req == 1'b1)
                       begin
	                          if (is_tgt == 1'b1 & (is_wr == 1'b1 | 
                           tgtrd_sm == tr_idle))
                           begin
                              bk_sm <= bk_tgtack ; 
                              rx_ack <= 1'b1 ; 
                              rx_ws_i <= 1'b1 ;     
			   if ((rx_desc[133:132] != 2'b00) ||
			       (rx_desc[129:128] != 2'b00))
			     begin
			     dpram_sel <= 1'b1;
			     reg_sel <= 1'b0;
			     end
			   else if (rx_desc[131:130] != 2'b00)
			     begin
			     dpram_sel <= 1'b0;
			     reg_sel <= 1'b1;
			     end
			   else
			     begin
			     dpram_sel <= 1'b0;
			     reg_sel <= 1'b0;
			     end			     
                           end 
                        end 
                     end
            bk_tgtack :
                     begin
                        rx_ack <= 1'b0 ; 
                        rx_ws_i <= 1'b0 ; 
                        if (is_dval == 1'b1)
                        begin
                           bk_sm <= bk_tgtdata ; 
                        end
                        else
                        begin
                           bk_sm <= bk_idl ; 
                        end 
                     end
            bk_tgtdata :
                     begin
                       // if (rx_dfr == 1'b0 & rx_dv == 1'b1)
					    if (rx_st_eop)
                        begin
                           rx_ws_i <= 1'b0 ; 
                           bk_sm <= bk_idl ; 
                        end 
                     end
         endcase
	bk_tgtack_r <= (bk_sm == bk_tgtack) ? 1'b1 : 1'b0;  //xhdl

      end 
   end 
   assign rx_busy = (bk_sm == bk_idl) ? 1'b0 : 1'b1 ;
   //-------------------------------------------------------------
   // target write control for DPRAM interface                                        
   //-------------------------------------------------------------
   assign r_ad32 = ((rx_desc[125]) == 1'b1) ? rx_desc[31:0] : rx_desc[63:32] ;
   assign r_fbe = rx_desc[67:64] ;
   assign r_lbe = rx_desc[71:68] ;
   assign r_len = rx_desc[105:96] ;
   
   always @ (posedge hclk) begin   // 
         if (bk_sm == bk_idl)
         begin 
	        tgt_adr_eq_0 <= (r_ad32[6:3] == 4'h0) ? 1'b1 : 1'b0;  //xhdl
	        tgt_adr_eq_1 <= (r_ad32[6:3] == 4'h1) ? 1'b1 : 1'b0;  //xhdl
	        tgt_adr_eq_2 <= (r_ad32[6:3] == 4'h2) ? 1'b1 : 1'b0;  //xhdl
         end
   end

   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         rx_wrad <= {12{1'b0}} ; 
         rx_wr <= 1'b0 ;  
      end
      else
      begin
         if (bk_sm == bk_idl)
         begin
            rx_wr <= is_wr ; 
         end 
         if (bk_sm == bk_idl)
         begin
            rx_wrad <= r_ad32[14:3] ; 
         end
         else if (wr_allow == 1'b1)
         begin
            rx_wrad <= rx_wrad + 1 ; 
         end 
      end 
   end 
   assign tgt_adwr = rx_wrad ;
   
   
    assign wr_allow = (rx_wr == 1'b1 & bk_sm == bk_tgtdata & rx_dv == 1'b1) ? 1'b1 : 1'b0 ;  // brought rx_ws up one cycle in app module
//   assign wr_allow = (rx_wr == 1'b1 & !bk_sm == bk_idl & rx_dv == 1'b1) ? 1'b1 : 1'b0 ;
   assign datain = rx_data ;
   assign rx_bv = rx_be_i ;
   assign wren = (wr_allow == 1'b1 & dpram_sel == 1'b1) ? rx_bv : 8'b00000000 ;




   altpcierd_dprambe #(RAM_SIZE, 64, 8) dprambe(
      .wrclk(hclk), 
      .wraddr(tgt_adwr[RAM_SIZE - 1:0]), 
      .wrdata(datain), 
      .wrbe(wren), 
      .rdclk(hclk), 
      .rdclken(rdclken), 
      .rdaddr(tgt_adrd[RAM_SIZE - 1:0]), 
      .rddata(dataout)
   ); 

   always @(negedge hrstn or posedge hclk)
   begin : xhdl_23
      integer i; 
      integer j; 
      integer k; 
      if (hrstn == 1'b0)
      begin
         m_req_r <= 1'b0 ; 
         m_addr_r <= {64{1'b0}} ; 
         m_size_r <= {32{1'b0}} ; 
         m_ctrl_r <= {32{1'b0}} ; 
         m_tgt_adr_r <= {13{1'b0}} ; 
      end
      else
      begin
         if (m_req_r == 1'b0 & reg_sel == 1'b1 & wr_allow == 1'b1)
           begin
	   if (tgt_adr_eq_0 == 1'b1)
	     begin
	     if (rx_be_1st[0] == 1'b1)
	       m_addr_r[31:0] <= datain[31:0];
	     if (rx_be_1st[4] == 1'b1)
	       m_addr_r[63:32] <= datain[63:32];
	     end
	   else if (tgt_adr_eq_1 == 1'b1)
	     begin
	     if (rx_be_1st[0] == 1'b1)
	       m_size_r <= datain[31:0];
	     if (rx_be_1st[4] == 1'b1)
	       m_ctrl_r <= datain[63:32];
	     end
	   else if (tgt_adr_eq_2 == 1'b1)
	     begin
	     if (rx_be_1st[4] == 1'b1)
	       m_tgt_adr_r <= datain[47:35];
	     end
           end 
         if (m_req_r == 1'b0 & reg_sel == 1'b1 & wr_allow == 1'b1 & 
          tgt_adr_eq_1 == 1'b1 & rx_be_i[7:4] != 4'b0000)
         begin
            m_req_r <= 1'b1 ; 
         end
         else if (m_ack == 1'b1)
         begin
            m_req_r <= 1'b0 ; 
         end 
      end 
   end 
   assign m_req = m_req_r ;
   assign m_size = m_size_r ;
   assign m_addr = m_addr_r ;
   assign m_ctrl = m_ctrl_r ;
   assign m_tgt_adr = m_tgt_adr_r ;

   //-----------------------------------------------------------------------
   // Interrupt, PME and Abort Register
   
   always @ (posedge hclk) begin   //  
         if (reg_sel == 1'b1 & wr_allow == 1'b1 & tgt_adwr[3:0] == 4'b0010 & (rx_be_i[0]) == 1'b1)
         begin 
            app_msi_num <= datain[8:4]; 
            app_msi_tc  <= datain[14:12];
         end 
   end

   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         tx_intreq_r <= 1'b0 ; 
         pm_event_pb_r <= 2'b00 ; 
         pm_event_mem <= 1'b0 ; 
         pm_cnt <= {10{1'b0}} ; 
         pm_event <= 1'b0 ; 
         cpl_abort_r <= 1'b0 ; 
         err_cplabort <= 1'b0 ;  
      end
      else
      begin
         if (reg_sel == 1'b1 & wr_allow == 1'b1 & tgt_adwr[3:0] == 4'b0010 & (rx_be_i[0]) == 1'b1)
         begin
            tx_intreq_r <= datain[0] ;  
         end 
         // Detect push-button rising edge
         pm_event_pb_r <= {pm_event_pb_r[0], pm_event_pb} ; 
         if (reg_sel == 1'b1 & wr_allow == 1'b1 & tgt_adwr[3:0] == 4'b0011 & (rx_be_i[0]) == 1'b1)
         begin
            pm_event_mem <= datain[0] ; 
         end
         else if (pm_cnt == 0)
         begin
            pm_event_mem <= 1'b0 ; 
         end 
         if ((pm_event_mem == 1'b1 | pm_event_pb_r == 2'b01) & pm_cnt == 0)
         begin
            pm_event <= 1'b1 ; 
         end
         else
         begin
            pm_event <= 1'b0 ; 
         end 
         if (reg_sel == 1'b1 & wr_allow == 1'b1 & tgt_adwr[3:0] == 4'b0011 & (rx_be_i[0]) == 1'b1)
         begin
            pm_cnt <= 10'b1111111111 ; 
         end
         else if (pm_cnt != 0)
         begin
            pm_cnt <= pm_cnt - 1 ; 
         end 
         if (reg_sel == 1'b1 & wr_allow == 1'b1 & tgt_adwr[3:0] == 4'b0100 & (rx_be_i[0]) == 1'b1)
         begin
            cpl_abort_r <= datain[0] ; 
         end
         else if (tgtrd_sm == tr_reqcpl & tx_ack == 1'b1)
         begin
            cpl_abort_r <= 1'b0 ; 
         end 
         if (cpl_abort == 1'b1 & tgtrd_sm == tr_reqcpl & tx_ack == 1'b1)
         begin
            err_cplabort <= 1'b1 ; 
         end
         else
         begin
            err_cplabort <= 1'b0 ; 
         end 
      end 
   end 
   assign tx_intreq = tx_intreq_r ;
   assign cpl_abort = cpl_abort_r ;
   assign cpl_vc = (rx_desc[118:116] == 3'b000) ? cfg_tcvcmap[ 2: 0] : 
                   (rx_desc[118:116] == 3'b001) ? cfg_tcvcmap[ 5: 3] : 
                   (rx_desc[118:116] == 3'b010) ? cfg_tcvcmap[ 8: 6] : 
                   (rx_desc[118:116] == 3'b011) ? cfg_tcvcmap[11: 9] : 
                   (rx_desc[118:116] == 3'b100) ? cfg_tcvcmap[14:12] : 
                   (rx_desc[118:116] == 3'b101) ? cfg_tcvcmap[17:15] : 
                   (rx_desc[118:116] == 3'b110) ? cfg_tcvcmap[20:18] : 
                                                  cfg_tcvcmap[23:21] ;
   //----------------------------------------------------------------
   // target completion generation                                   
   //----------------------------------------------------------------
   // Compute byte count to reach next allowable boundary
   // 128 byte
   // 256 byte
   // 512 byte
   // 1K byte
   // 2K byte
   assign adb_mask = (cfg_maxpload == 3'b000) ? 13'b0000001111111 : 
   (cfg_maxpload == 3'b001) ? 13'b0000011111111 : (cfg_maxpload == 
   3'b010) ? 13'b0000111111111 : (cfg_maxpload == 3'b011) ? 
   13'b0001111111111 : (cfg_maxpload == 3'b100) ? 13'b0011111111111 : 
   13'b0111111111111 ;
   assign adr_msk = adb_mask & ~adr_mem[12:0] ;
   assign bc2adb_n = adr_msk_r + 1 ;
   assign remain = (lastoffset[1:0] != 2'b00) ? 1'b1 : 1'b0 ;
   assign next_bytecount = (bytecount > bc2adb_n) ? bc2adb_n : bytecount 
   ;
   assign lastoffset = next_bytecount_r + adr_mem[1:0] ;
   
   always @ (posedge hclk) begin      // 
       t_len_rr <= {t_len_r, 2'b00} - b_fbe_r - b_lbe_r ;     // xhdl
   end
   
   always @(negedge hrstn or posedge hclk)
   begin
      if (hrstn == 1'b0)
      begin
         tgtrd_sm <= tr_idle ; 
         tx_req <= 1'b0 ; 
         tx_dfr_r <= 1'b0 ; 
         tx_dv_r <= 1'b0 ; 
         desc_mem <= {64{1'b0}} ; 
         adr_mem <= {32{1'b0}} ; 
         tgtrdaddr <= {12{1'b0}} ; 
         phasecnt <= {10{1'b0}} ; 
         tgt_adrd9_r <= {9{1'b0}} ; 
         tx_data_r <= {64{1'b0}} ; 
         need_to_fetch <= 1'b0 ; 
         fetch_cnt <= {2{1'b0}} ; 
         fetch_first <= 1'b0 ; 
         mem_first <= 1'b0 ; 
		 mem_first_del <= 1'b0;
         bytecount <= {13{1'b0}} ; 
         vc0_out <= 1'b0 ; 
         vc1_out <= 1'b0 ; 
         next_bytecount_r <= {13{1'b0}} ; 
         next_dwcount_r <= {11{1'b0}} ;
         t_len_r <= {11{1'b0}} ; 
      b_fbe_r <= 3'h0;
      b_lbe_r <= 3'h0;
      adr_msk_r <= {13{1'b0}} ; 
		 nextcyc_is_last <= 1'b0;
		 last_datacyc    <= 1'b0;
		 desc_phase2     <= 1'b0;
      end
      else
      begin
	     mem_first_del <= mem_first; 
		 
         case (tgtrd_sm)
            tr_idle :
                     begin
                        if (bk_sm == bk_idl & rx_req == 1'b1 & is_tgt == 
                        1'b1 & is_wr == 1'b0)
                        begin
                           tgtrd_sm <= tr_idle0 ; 
                        end 
                     end
	 tr_idle0:
           tgtrd_sm <= tr_idle1;
	 tr_idle1:
           tgtrd_sm <= tr_busy;
	 tr_reload0:
           tgtrd_sm <= tr_reload1 ;
	 tr_reload1: begin
           tgtrd_sm <= tr_reload2; 
	 end
            tr_reload2 :
                     begin
                        tgtrd_sm <= tr_reqcpl ; 
                     end
            tr_reqcpl :
                     begin
                        if (tx_ack == 1'b1)
                        begin
                           if (cpl_abort == 1'b1 | (rd_out == 1'b1 & 
                           phasecnt == 0))
                           begin
                              tgtrd_sm <= tr_busy ; 
                           end
                           else
                           begin
                              tgtrd_sm <= tr_reqdata ; 
                           end 
                        end 
                     end
            tr_reqdata :
                     begin 
						if ((tx_ws==1'b0) & (phasecnt == 0))   
                        begin
                           tgtrd_sm <= tr_busy ; 
                        end 
                     end
            tr_busy :
                     begin
                        if ((bytecount == 13'b0000000000000))  
                        begin
                           tgtrd_sm <= tr_idle ; 
                        end
                        else  
                        begin
                           tgtrd_sm <= tr_reload0 ; 
                        end 
                     end
         endcase
      t_len_r <= t_len;
      b_fbe_r <= b_fbe;
      b_lbe_r <= b_lbe;
      adr_msk_r <= adr_msk; 
	  
         if (tgtrd_sm == tr_idle)
         begin
            desc_mem <= rx_desc[127:64] ; 
            adr_mem[31:2] <= r_ad32[31:2] ; 
            adr_mem[1:0] <= adr_lsb ; 
            // Memorize read request parameter
            if (cpl_vc == 3'b000)
            begin
               vc0_out <= 1'b1 ; 
            end
            else
            begin
               vc0_out <= 1'b0 ; 
            end 
            if (cpl_vc == 3'b001)
            begin
               vc1_out <= 1'b1 ; 
            end
            else
            begin
               vc1_out <= 1'b0 ; 
            end 
         end
         else if (tgtrd_sm == tr_idle1)
	   bytecount            <= t_len_rr;
         else if ((tgtrd_sm == tr_reqdata | (tgtrd_sm == tr_reqcpl & 
		 tx_ack == 1'b1)) & (tx_ws==1'b0) & (phasecnt == 0))  
         begin
            // Update address & bytecount
            adr_mem <= adr_mem + next_bytecount_r ; 
            bytecount <= bytecount - next_bytecount_r ; 
         end 
         // Compute next bytecount & dword count
         if (bytecount > bc2adb_n)
         begin
            next_bytecount_r <= bc2adb_n ; 
         end
         else
         begin
            next_bytecount_r <= bytecount ; 
         end 
         if (remain == 1'b1)
         begin
            next_dwcount_r <= lastoffset[12:2] + 1 ; 
         end
         else
           begin
            next_dwcount_r <= lastoffset[12:2] ; 
         end 
         if (bk_tgtack_rrrr)
         begin
            tgtrdaddr <= adr_mem[14:3] ; 
            need_to_fetch <= ~reg_sel ; // prefetch only when not targeting register
            fetch_cnt <= 2'b00 ; 
         end
         else if (need_to_fetch == 1'b1 & fetch_cnt != 2'b10)
         begin
            tgtrdaddr <= tgtrdaddr + 1 ; 
            fetch_cnt <= fetch_cnt + 1 ; 
         end
         else if (need_to_fetch == 1'b1)
         begin
            // pipe is filled
            tgtrdaddr <= tgtrdaddr + 1 ; 
            need_to_fetch <= 1'b0 ; 
         end 
		 else if ((~tx_ws & tx_dv_r)  | (fetch_tx_data & ~reg_sel)) 
         begin
            // increment address when data is consumed
            tgtrdaddr <= tgtrdaddr + 1 ; 
         end 
         if (tgtrd_sm == tr_reload2)
         begin
            tx_req <= 1'b1 ; 
         end
         else if (tx_ack == 1'b1)
         begin
            tx_req <= 1'b0 ; 
         end 
		 
		 //-----------------------------
		 // streaming interface support
		 //-----------------------------
/*		 
		 if (tx_ack) begin
		     desc_phase2 <= 1'b1;
	     end
		 else if (tx_avalon_ready) begin
		     desc_phase2 <= 1'b0;
		 end
*/		 
		 //-----------------------------
		 
         if (tgtrd_sm == tr_reload2)
         begin
            fetch_first <= 1'b1 ; 
         end
         else
         begin
            fetch_first <= 1'b0 ; 
         end 
         mem_first <= fetch_first ; 
		 
		 // flags for external use
		 nextcyc_is_last <= (((rd_allow==1'b1) & (phasecnt == 2)) | ((rd_allow==1'b0) & (phasecnt == 1))) ? 1'b1 : 1'b0;  //xhdl
         last_datacyc    <= (((rd_allow==1'b1) & (phasecnt == 1)) | (phasecnt == 0)) ? 1'b1 : 1'b0;                       //xhdl
		 
         if (tgtrd_sm == tr_reload2)
         begin
            // phasecnt includes prefetch from memory
            if ((next_dwcount_r[0]) == 1'b1 | (adr_mem[2]) == 1'b1)
            begin
               phasecnt <= next_dwcount_r[10:1] + 1 ;  
            end
            else
            begin
               phasecnt <= next_dwcount_r[10:1] ;  
            end 
         end 
	    else if (rd_allow)  
         begin
            phasecnt <= phasecnt - 1 ;  
         end  
		  
		 if (tgtrd_sm == tr_reqcpl)
         begin
            // tx_dfr is deasserted before the last data phase
            if (cpl_abort == 1'b1)
            begin
               tx_dfr_r <= 1'b0 ; 
            end
            else if ((phasecnt == 1 & (mem_first == 1'b1 | rd_out == 
            1'b1)) | phasecnt == 0)
            begin
               tx_dfr_r <= 1'b0 ; 
            end
            else
            begin
               tx_dfr_r <= 1'b1 ; 
            end 
         end
         else if (rd_out == 1'b1 & (phasecnt == 1 | phasecnt == 0))
         begin
            tx_dfr_r <= 1'b0 ; 
         end 
		 
         if (tgtrd_sm == tr_reqcpl)
         begin
            // direct path
            if (((tx_ws == 1'b0) | (fetch_tx_data==1'b1)) & (phasecnt == 0))  // xhdl
            begin
               tx_dv_r <= 1'b0 ; 
            end
            else if (tx_dfr_r == 1'b1)
            begin
               tx_dv_r <= 1'b1 ; 
            end 
         end
         else if (tgtrd_sm == tr_reqdata)
         begin
            if (((tx_ws == 1'b0) | (fetch_tx_data==1'b1)) & (phasecnt == 0))  // xhdl
            begin
               tx_dv_r <= 1'b0 ; 
            end
            else
            begin
            //   tx_dv_r <= 1'b1 ; 
			   tx_dv_r <= tx_dv_r;   
            end 
         end
         else if (tgtrd_sm == tr_idle)
         begin
            tx_dv_r <= 1'b0 ; 
         end 
         if (tgtrd_sm == tr_idle)
         begin
            tx_data_r <= {64{1'b0}} ; 
         end
         else if (rd_allow == 1'b1)
         begin
            if (reg_sel == 1'b0)
            begin
               if (rdclken == 1'b1)
               begin
                  tx_data_r <= dataout ; 
               end  
			/*
			   else if (desc_phase2) begin
			      tx_data_r <= tx_desc[63:0];          //  descriptor should be ready ahead of time
			   end
			   else if ((tgtrd_sm == tr_reload2) begin // this is assertion of tx_req
			      tx_data_r <= tx_desc[127:64];        //  descriptor should be ready ahead of time
			   end
			 */
            end
            else
            begin
               case (tgt_adrd9_r[3:0])
                  4'b0000 :
                           begin
                              tx_data_r <= m_addr_r ; 
                           end
                  4'b0001 :
                           begin
                              tx_data_r <= {m_pending, m_ctrl_r[30:0], 
                              m_size_r} ; 
                           end
                  4'b0010 :
                           begin
                              tx_data_r <= {16'b0000000000000000, 
                                            m_tgt_adr_r, 
                                            3'b000, 
                                            31'b0000000000000000000000000000000, 
                                            tx_intreq_r} ; 
                           end
                  4'b0011 :
                           begin
                              tx_data_r <= {64{1'b0}} ; 
                           end
                  4'b0100 :
                           begin
                              tx_data_r <= 
                              {32'b00000000000000000000000000000000, 
                              31'b0000000000000000000000000000000, 
                              cpl_abort_r} ; 
                           end
                  4'b0101 :
                           begin
                              tx_data_r <= {64{1'b0}} ; 
                           end
                  default :
                           begin
                              tx_data_r <= {64{1'b0}} ; 
                           end
               endcase 
            end 
         end 
         tgt_adrd9_r <= tgt_adrd[8:0] ; 
      end 
   end 
   assign tx_dv = tx_dv_r ;
   assign tx_busy = (tgtrd_sm == tr_idle) ? 1'b0 : 1'b1 ;

   // for DPRAM uninitialized value
   always @(tx_data_r)
   begin : xhdl_44
      integer i; 
      begin : xhdl_43
       //  integer i;   //xhdl
         for(i = 0; i <= 63; i = i + 1)
         begin
            if ((tx_data_r[i]) == 1'b1)
            begin
               tx_data[i] <= 1'b1 ; 
            end
            else
            begin
               tx_data[i] <= 1'b0 ; 
            end 
         end
      end 
   end 
   // data phase acknowledge
   assign rd_out = (tx_dv_r & ~tx_ws) ;
   // read operation on DPRAM (including fetch)
   // assign rd_allow = mem_first | rd_out ;
   assign rd_allow = mem_first | rd_out  | fetch_tx_data ;   
   
   // read clock enable is used to stall the datapath pipeline
 
 //  assign rdclken = (need_to_fetch | (~tx_ws & tx_dv_r) | fetch_tx_data) ? 1'b1 : 1'b0 ;  
   assign rdclken = (need_to_fetch | (~tx_ws & tx_dv_r ) | fetch_tx_data) ? 1'b1 : 1'b0 ; 
   
   // generate
   assign tx_desc[127:120] = (cpl_abort == 1'b0) ? 8'b01001010 : 8'b00001010 ;
   assign tx_desc[119:106] =  { desc_mem[119:112], 1'b0,desc_mem[110:106] } ;
   assign tx_desc[105:96]  =  next_dwcount_r[9:0] ;
  // assign tx_desc[95:80]   = {cfg_busdev, 3'b000}; 
   assign tx_desc[95:83]   =  cfg_busdev;           // xhdl
   assign tx_desc[82:80]   =  3'b000;               // xhdl
   assign tx_desc[79:77]   = (cpl_abort == 1'b0) ? 3'b000 : 3'b100 ;
   assign tx_desc[76:64]   = {1'b0, bytecount[11:0]} ;
   assign tx_desc[63:32]   = {desc_mem[95:72], 1'b0, adr_mem[6:0]} ;
   assign tx_desc[31:0]    = {32{1'b0}} ;
   // Dual-Port RAM read address
   assign tgt_adrd = tgtrdaddr ;
   assign tx_dfr   = tx_dfr_r ;
   assign tx_err   = 1'b0 ;
   assign b_fbe = (r_fbe == 4'b1111) ? 3'b000 : 
                  ((r_fbe == 4'b0000) & (r_len != 10'b0000000001)) ? 3'b100 :
                  (r_fbe == 4'b0000) ? 3'b011 :
                  (r_fbe == 4'b1110 | r_fbe == 4'b1101 |
                   r_fbe == 4'b1011 | r_fbe == 4'b0111 ) ? 3'b001 :
                  (r_fbe == 4'b1000 | r_fbe == 4'b0100 |
                   r_fbe == 4'b0010 | r_fbe == 4'b0001 ) ? 3'b011 :
                   3'b010 ;
   assign b_lbe = (r_lbe == 4'b1111 | r_lbe == 4'b0000 ) ? 3'b000 : 
                  (r_lbe == 4'b1110 | r_lbe == 4'b1101 |
                   r_lbe == 4'b1011 | r_lbe == 4'b0111 ) ? 3'b001 : 
                  (r_lbe == 4'b1000 | r_lbe == 4'b0100 |
                   r_lbe == 4'b0010 | r_lbe == 4'b0001 ) ? 3'b011 :
                   3'b010 ;
   assign t_len[10]  = (r_len == 10'b0000000000) ? 1'b1 : 1'b0 ;
   assign t_len[9:0] = r_len ;
   assign adr_lsb = ((r_fbe[0]) == 1'b1) ? 2'b00 : ((r_fbe[1]) == 1'b1) 
   ? 2'b01 : ((r_fbe[2]) == 1'b1) ? 2'b10 : ((r_fbe[3]) == 1'b1) ? 2'b11 
   : 2'b00 ;
endmodule
