#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/of_dma.h>
#include <linux/of_irq.h>
#include <linux/dmapool.h>
#include <linux/interrupt.h>
#include <linux/of_address.h>
#include <linux/pm_runtime.h>
#include "dmaengine.h"
#define DESC_TYPE 27
#define DESC_TYPE_HOST 0x10
#define DESC_TYPE_TEARD 0x13
#define TD_DESC_IS_RX (1 << 16)
#define TD_DESC_DMA_NUM 10
#define DESC_LENGTH_BITS_NUM 21
#define DESC_TYPE_USB (5 << 26)
#define DESC_PD_COMPLETE (1 << 31)
/* DMA engine */
#define DMA_TDFDQ 4
#define DMA_TXGCR(x) (0x800 + (x) * 0x20)
#define DMA_RXGCR(x) (0x808 + (x) * 0x20)
#define RXHPCRA0 4
#define GCR_CHAN_ENABLE (1 << 31)
#define GCR_TEARDOWN (1 << 30)
#define GCR_STARV_RETRY (1 << 24)
#define GCR_DESC_TYPE_HOST (1 << 14)
/* DMA scheduler */
#define DMA_SCHED_CTRL 0
#define DMA_SCHED_CTRL_EN (1 << 31)
#define DMA_SCHED_WORD(x) ((x) * 4 + 0x800)
#define SCHED_ENTRY0_CHAN(x) ((x) << 0)
#define SCHED_ENTRY0_IS_RX (1 << 7)
#define SCHED_ENTRY1_CHAN(x) ((x) << 8)
#define SCHED_ENTRY1_IS_RX (1 << 15)
#define SCHED_ENTRY2_CHAN(x) ((x) << 16)
#define SCHED_ENTRY2_IS_RX (1 << 23)
#define SCHED_ENTRY3_CHAN(x) ((x) << 24)
#define SCHED_ENTRY3_IS_RX (1 << 31)
/* Queue manager */
/* 4 KiB of memory for descriptors, 2 for each endpoint */
#define ALLOC_DECS_NUM 128
#define DESCS_AREAS 1
#define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS)
#define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4)
#define QMGR_LRAM0_BASE 0x80
#define QMGR_LRAM_SIZE 0x84
#define QMGR_LRAM1_BASE 0x88
#define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10)
#define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10)
#define QMGR_MEMCTRL_IDX_SH 16
#define QMGR_MEMCTRL_DESC_SH 8
#define QMGR_NUM_PEND 5
#define QMGR_PEND(x) (0x90 + (x) * 4)
#define QMGR_PENDING_SLOT_Q(x) (x / 32)
#define QMGR_PENDING_BIT_Q(x) (x % 32)
#define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10)
#define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10)
#define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10)
#define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10)
/* Glue layer specific */
/* USBSS / USB AM335x */
#define USBSS_IRQ_STATUS 0x28
#define USBSS_IRQ_ENABLER 0x2c
#define USBSS_IRQ_CLEARR 0x30
#define USBSS_IRQ_PD_COMP (1 << 2)
struct cppi41_channel {
struct dma_chan chan;
struct dma_async_tx_descriptor txd;
struct cppi41_dd *cdd;
struct cppi41_desc *desc;
dma_addr_t desc_phys;
void __iomem *gcr_reg;
int is_tx;
u32 residue;
unsigned int q_num;
unsigned int q_comp_num;
unsigned int port_num;
unsigned td_retry;
unsigned td_queued:1;
unsigned td_seen:1;
unsigned td_desc_seen:1;
};
struct cppi41_desc {
u32 pd0;
u32 pd1;
u32 pd2;
u32 pd3;
u32 pd4;
u32 pd5;
u32 pd6;
u32 pd7;
} __aligned(32);
struct chan_queues {
u16 submit;
u16 complete;
};
struct cppi41_dd {
struct dma_device ddev;
void *qmgr_scratch;
dma_addr_t scratch_phys;
struct cppi41_desc *cd;
dma_addr_t descs_phys;
u32 first_td_desc;
struct cppi41_channel *chan_busy[ALLOC_DECS_NUM];
void __iomem *usbss_mem;
void __iomem *ctrl_mem;
void __iomem *sched_mem;
void __iomem *qmgr_mem;
unsigned int irq;
const struct chan_queues *queues_rx;
const struct chan_queues *queues_tx;
struct chan_queues td_queue;
};
#define FIST_COMPLETION_QUEUE 93
static struct chan_queues usb_queues_tx[] = {
/* USB0 ENDP 1 */
[ 0] = { .submit = 32, .complete = 93},
[ 1] = { .submit = 34, .complete = 94},
[ 2] = { .submit = 36, .complete = 95},
[ 3] = { .submit = 38, .complete = 96},
[ 4] = { .submit = 40, .complete = 97},
[ 5] = { .submit = 42, .complete = 98},
[ 6] = { .submit = 44, .complete = 99},
[ 7] = { .submit = 46, .complete = 100},
[ 8] = { .submit = 48, .complete = 101},
[ 9] = { .submit = 50, .complete = 102},
[10] = { .submit = 52, .complete = 103},
[11] = { .submit = 54, .complete = 104},
[12] = { .submit = 56, .complete = 105},
[13] =