/*
 *  linux/arch/arm/mach-merlin/codec_reset.c
 *
 *  Copyright (C) 2008 Mobilygen Corp.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#ifndef DOXYGEN_SKIP

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
#include <linux/platform_device.h>
#include <asm/cacheflush.h>
#include <asm/io.h>

#include <mach/platform.h>
#include <mach/mobi_qcc.h>
#include <mach/mobi_codec_reset.h>

#include "codec_reset_qccdefs.h"
#include "codec_reset_cmdtable.h"

#endif

/* add nops between ldm/stm for speed */
#define SPLIT_RETRY_BUG

#define CODEC_RESET_DEBUG 0

#if CODEC_RESET_DEBUG
#define dprintk(x...) printk(x);
static struct proc_dir_entry *codec_reset_proc_dir;
#else
#define dprintk(x...)
#endif

extern struct qcc_cmd_def qcc_reset_cmds[];

static noinline void dtcmcopy(uint32_t src, uint32_t dst, uint32_t size)
{
	/* no boundry checking here, we always do 8 words at a time */
	asm volatile (
			"mov r0, %0\n\t"     /* src  */
			"mov r1, %1\n\t"     /* dst  */
			"mov r2, %2\n\t"     /* size */
			"add r2, r0, r2\n\t" /* end addr */
			"1:\n\t"
			"ldmia   r0!, {r3-r10}\n\t"
#ifdef SPLIT_RETRY_BUG
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
#endif
			"stmia   r1!, {r3-r10}\n\t"
			"cmp     r0, r2\n\t"
			"ble     1b"
			:
			: "r" (src), "r" (dst), "r" (size)
			: "r4", "r5", "r6", "r7", "r8", "r9", "r10"
			  );
}

/*
 * function to relocate code in the the function relocate is called from
 * copies code from register 'lr' to 'lr + size'.  lr should be the
 * address of the instruction following the call to relocate. that
 * why we cannot allow this function to be inlined, we need the lr
 */
/* no boundry checking here, we always do 8 words at a time so we
 * can end up with extra data.  we aren't reloacting enough to worry
 * about writing past the end of the DTCM
 */
/* attribute naked makes is so we have to take care of all the
 * register preservation.  we save to dst start address, which is
 * in the DTCM, since that is where we want to jump after the
 * copy is complete
 */
static noinline __attribute__((naked)) void relocate_and_jump(
		uint32_t dst_virt, uint32_t size, uint32_t dst_phys)
{

	asm volatile (
			"mov ip, sp\n\t"     /* dst */
			"stmdb sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip}\n\t"
			"mov r0, %0\n\t"     /* dst */
			"mov r1, %1\n\t"     /* size */
			"mov r3, %2\n\t"     /* dst */
			"mov r2, lr\n\t"     /* src */
			"add r1, lr, r1\n\t" /* end cp addr */
			"1:\n\t"
			"ldmia r2!, {r4-r11}\n\t"
#ifdef SPLIT_RETRY_BUG
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
			"mov r0, r0\n\t"
#endif
			"stmia r0!, {r4-r11}\n\t"
			"cmp r2, r1\n\t"
			"ble 1b\n\t"
			"mrc p15, 0, r0, c1, c0, 0\n\t"
			"bic r0, r0, #0x5\n\t"
			"ldmia sp, {r4, r5, r6, r7, r8, r9, sl, fp, sp}\n\t"
			"mcr p15, 0, r0, c1, c0, 0\n\t"  /* turn off mmu & dcache */
			"bx r3\n\t"    /* jump to physical address of code start */
			:
			: "r"(dst_virt), "r"(size), "r"(dst_phys)
			);
}

/*
 * physical addresses?
 * turn off mmu just before the 'mov pc, r3 above'
 * r3 needs to contain dtcm_phys+sizeof(qcc_reset_cmds)+32
 *
 * qcc_cmd = DTCM_BASE
 * qcc = QCC_SICS_BASE
 * turn on mmu again just before return
 */
static noinline __attribute__((naked)) int codec_reset_run(void __iomem *dtcm_virt)
{
	int i;
	qccreg_CSRCmd_t CSRCmd;
	qccreg_CSRStat_t CSRStat;
	struct qcc_cmd_def *qcc_cmd;
	qccbridge_t *qcc;

	/* use attribute naked, and we will just preserve our registers in DTCM
	 * this way, we can restore the registers with the MMU turned off.  push
	 * using dtcm_virt, pop using dtcm physical.  Just start at 8K offset
	 */
	asm volatile (
			"mov ip, sp\n\t"
			"mov sp, %0\n\t"
			"add sp, sp, #0x2000\n\t"
			"stmdb sp!, {r4, r5, r6, r7, fp, ip, lr, pc}\n\t"
			:
			: "r"(dtcm_virt)
			);

	CSRCmd.r = 0;
	qcc_cmd = (struct qcc_cmd_def *) DTCM_BASE;
	qcc = (qccbridge_t *) QCC_BASE;
	/* we only do writes, so this is always 1 */
	CSRCmd.b.CSRAccess = 1;

#if CODEC_RESET_DEBUG
#if 0
	/* just a way to print the cmd table */
	qcc_cmd = (struct qcc_cmd_def *) dtcm_virt;
	//for (i = 0; i < ARRAY_SIZE(qcc_reset_cmds); i++, qcc_cmd++) {
	for (i = 0; i < 16; i++, qcc_cmd++) {
		dprintk("blockID %02d addr "
				"0x%04x size 0x%02x data 0x%08x\n",
				qcc_cmd->blockID,
				qcc_cmd->addr,
				qcc_cmd->size,
				qcc_cmd->data);
	}
	qcc_cmd = (struct qcc_cmd_def *) DTCM_BASE;
#endif
#endif

	/*
	 * we copy in blocks of 8 words(32 bytes),
	 * we only have to copy code to the end of this function.
	 * we must be carefull not to copy past the end of the
	 * DTCM nor outside of the address space of this module
	 * when it is loaded
	 */
	/* lets just put the code at a 4K offset to make things simple */
	relocate_and_jump(
			((uint32_t)(dtcm_virt+SZ_4K)),
			512,
			((uint32_t)(DTCM_BASE+SZ_4K)));

	/*
	 * asm volatile("relocateStart:"); found this as a way to
	 * put a lable here but the compiler sticks an instruction
	 * after the jump so the relocation needs to include that
	 * insctruction, that's why we use a function and then copy
	 * from the lr
	 */

	for (i = 0; i < ARRAY_SIZE(qcc_reset_cmds); i++, qcc_cmd++) {

		/* skip any unsupported commands */
		if (qcc_cmd->cmd != QCC_WRITE)
			continue;

		/* clear the done bit */
		qcc->CSRStat.r = 0x1;

		CSRCmd.b.CSRBlockID = qcc_cmd->blockID;
		CSRCmd.b.CSRLen = qcc_cmd->size;
		qcc->CSRAddr.r = qcc_cmd->addr;
		qcc->CSRWrData.r = qcc_cmd->data;
		/* commit the write, access bit was set outside the loop */
		qcc->CSRCmd.r = CSRCmd.r;

		if (CSRCmd.b.CSRBlockID == 8 && qcc_cmd->addr == 0x00c8) {
			/* few things here,
			 * first, when doing the reset we can't test for
			 * status, so we skip the poll and status
			 * second, because of a bug in the QCC, when we write
			 * the reset the cmd will "bounce", causing multiple
			 * resets so we have to do a dummy write after the
			 * reset so the reset won't bounce
			 * third, we need a delay of about 480 arm cycles,
			 * between the reset and the dummy write, so stick
			 * a little counter in here
			 */

			/* add a delay but an empty for is optimized out, so use asm */
			asm volatile (
					"mov r0, #0xff\n\t"
					"1:\n\t"
					"subs r0, r0, #1\n\t"
					"bne 1b\n\t"
					: : : "r0"
					);
			qcc->CSRWrData.r = 0x0;
			qcc->CSRCmd.r = CSRCmd.r;
		} else {
			do {
				CSRStat = qcc->CSRStat;
			} while (CSRStat.b.CSRDone == 0);

			/* check for any errors, RespLen 1 for all writes */
			CSRStat = qcc->CSRStat;
			if ((CSRStat.b.CSRRespID != qcc_cmd->blockID) ||
					(CSRStat.b.CSRRespLen != 1) || CSRStat.b.CSRErr) {
				goto err;
			}
		}
	}
	asm volatile (
			"mov r0, #0\n\t" /* return 0 */
			"mrc p15, 0, r1, c1, c0, 0\n\t"
			"orr r1, r1, #0x5\n\t"
			"mov sp, #0x80000000\n\t" /* DTCM_BASE */
			"add sp, sp, #0x2000\n\t" /* SZ_8K offset */
			"sub sp, sp, #0x20\n\t"   /* we pushed 8 registers at entry point */
			"ldmia   sp, {r4, r5, r6, r7, fp, sp, lr }\n\t"
			"mov r0, r0\n\t"
			"mcr p15, 0, r1, c1, c0, 0\n\t" /* mmu/dcache on */
			"bx lr\n\t"
			);

err:
	asm volatile (
			"mvn r0, #4\n\t" /* return -EIO */
			"mrc p15, 0, r1, c1, c0, 0\n\t"
			"orr r1, r1, #0x5\n\t"
			"mov sp, #0x80000000\n\t" /* DTCM_BASE */
			"add sp, sp, #0x2000\n\t" /* SZ_8K offset */
			"sub sp, sp, #0x20\n\t"   /* we pushed 8 registers at entry point */
			"ldmia   sp, {r4, r5, r6, r7, fp, sp, lr }\n\t"
			"mov r0, r0\n\t"
			"mcr p15, 0, r1, c1, c0, 0\n\t" /* mmu/dcache on */
			"bx lr\n\t"
			);

	/* never reached but keeps the compiler happy */
	return 0;
}

static int codec_reset_prepare(void)
{
	/*
	 * ioremap the DTCM
	 * disable interrupts
	 * suspend registered devices
	 * - disable/suspend/wait for completion of outstanding DMAs
	 * save a copy of the DTCM
	 * clear DTCM
	 * cpy reset_cmds table to DTCM
	 * cp 12 or 16K from current PC counter to DTCM_BASE+ARRAY_SIZE(reset_cmds)+1
	 * jmp to DTCM_BASE+ARRAY_SIZE(reset_cmds)+1
	 * restore DTCM
	 * re-anble DMA
	 * resume registered devices
	 * re-enable interrupts
	 * iounmap DTCM
	 */
	void __iomem *dtcm_base = NULL;
	void __iomem *qcc_base = NULL;
	void *dtcm_ram_shadow = NULL;
	unsigned long flags;
	int i, hits = 0, ret = 0;
	struct qcc_cmd_def *dtcm_cmd_buffer = NULL;
	struct qcc_cmd_def *qcc_cmd = NULL;

	dtcm_ram_shadow = kmalloc(DTCM_SIZE, GFP_KERNEL);
	if (dtcm_ram_shadow == NULL) {
		printk("Failed to get memory for DTCM shadow\n");
		return -1;
	}

	qcc_base = ioremap(QCC_BASE, QCC_SIZE);
	if (qcc_base == NULL) {
		printk(KERN_ERR "Failed to remap QCC\n");
		kfree(dtcm_ram_shadow);
		return -1;
	}

	dtcm_base = ioremap(DTCM_BASE, DTCM_SIZE);
	if (dtcm_base == NULL) {
		printk(KERN_ERR "Failed to remap DTCM\n");
		iounmap(qcc_base);
		kfree(dtcm_ram_shadow);
		return -1;
	}

	/*
	 * instead of depending on a hardcoded cmd table which doesn't
	 * work for all setups, we will create a new command file by
	 * reading all(or most) of the registers, then using this as
	 * our command table.  this way, whatever reg file was used
	 * to create the bootloader, we will read these setting and
	 * use them for codec reset
	 */
	dtcm_cmd_buffer =
		(struct qcc_cmd_def *)kmalloc(sizeof(qcc_reset_cmds), GFP_KERNEL);
	if (dtcm_cmd_buffer == NULL) {
		printk(KERN_ERR "Failed to get memory\n");
		iounmap(qcc_base);
		kfree(dtcm_ram_shadow);
		return -1;
	}

	/*
	 * duplicate the cmd table and use it to read back the data and
	 * overwrite the data in the table
	 */
	memset(dtcm_cmd_buffer, 0x0, sizeof(qcc_reset_cmds));
	memcpy(dtcm_cmd_buffer, qcc_reset_cmds, sizeof(qcc_reset_cmds));

	/* skip the first two commands, we don't want them changed */
	qcc_cmd = dtcm_cmd_buffer + 2;

	for (i = 2; i < ARRAY_SIZE(qcc_reset_cmds); i++, qcc_cmd++) {

		/*
		 * note about size funnyness.  the qcc cmd table was
		 * generated to be "raw" access to the qcc, thus a size
		 * of 0 is actually 4 bytes.  So we need to catch this
		 * when reading
		 */
		ret = mobi_qcc_read(qcc_reset_cmds[i].blockID,
				qcc_reset_cmds[i].addr,
				(unsigned long *)&(qcc_cmd->data),
				(qcc_reset_cmds[i].size ? qcc_reset_cmds[i].size : 4));

		if (ret) {
			printk(KERN_ERR "ERROR: blockID %02d addr "
					"0x%04x size 0x%02x data 0x%08x\n",
					qcc_reset_cmds[i].blockID,
					qcc_reset_cmds[i].addr,
					qcc_reset_cmds[i].size,
					qcc_cmd->data);
			goto err;
		}

		/*
		 * this is a special reset sequence where bit 6 must be toggle
		 * since reading back only gives the last value written, we
		 * know we must set the reset bit on the second read of this
		 * address
		 */
		if (qcc_reset_cmds[i].blockID == 17 &&
				qcc_reset_cmds[i].addr == 0x10 && ++hits == 2) {
			qcc_cmd->data |= 0x40;
		}

#if 0
		if (qcc_cmd->data != qcc_reset_cmds[i].data) {
			printk("MISMATCH %2d, bid %d, addr 0x%04x: table 0x%08x != read 0x%08x\n",
					i, qcc_reset_cmds[i].blockID,
					qcc_reset_cmds[i].addr,
					qcc_reset_cmds[i].data,
					qcc_cmd->data);
			printk("qcc_cmd->data is 0x%x\n", qcc_cmd->data);
		} else {
			printk("%2d, bid %2d, addr 0x%04x: table 0x%08x, read 0x%08x\n",
					i, qcc_reset_cmds[i].blockID,
					qcc_reset_cmds[i].addr,
					qcc_reset_cmds[i].data,
					qcc_cmd->data);
		}
#endif
	}

	/* suspend devices, eth, usb and any dma channels */
	ret = codec_reset_suspend_devices();
	if (ret)
		goto err;

	/* save a copy of the dtcm  using local fast copy */
	dtcmcopy((uint32_t)dtcm_base,
			(uint32_t)dtcm_ram_shadow, DTCM_SIZE-32);

	/* this could be eliminated, for speed */
	memset(dtcm_base, 0x0, DTCM_SIZE);

	/*
	 * cpy from our generated cmd table to dtcm
	 * fast copy doesn't do exact size and I feel safer not
	 * copying the extra data, so...
	 */
	memcpy(dtcm_base, dtcm_cmd_buffer, sizeof(qcc_reset_cmds));

#if CODEC_RESET_DEBUG
#if 0
	{
		/* DEBUG */
		int i;
		dprintk("some data from the cmd table in dtcm:\n");
		for (i = 0;
				i < (sizeof(qcc_reset_cmds) > 64 ? 64 : sizeof(qcc_reset_cmds));
				i += 16)
			dprintk("0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
					(uint32_t)(dtcm_base+i),
					*(uint32_t *)(dtcm_base+i+0x0),
					*(uint32_t *)(dtcm_base+i+0x4),
					*(uint32_t *)(dtcm_base+i+0x8),
					*(uint32_t *)(dtcm_base+i+0xc));

		dprintk("Code starts in DTCM at 0x%08x\n",
				((uint32_t)(dtcm_base+SZ_4K)));
	}
#endif
#endif

	/*
	 * we are going to disable memory, lets make sure everything has
	 * been committed, not sure if really needed or if this is the
	 * best place but it works
	 */
	flush_cache_all();
	/* disable interrupts and lets for for it! */
	local_irq_save(flags);
	ret = codec_reset_run(dtcm_base);
	local_irq_restore(flags);
	dprintk("codec_reset_run returned: %d\n", ret);

	dtcmcopy((uint32_t)dtcm_ram_shadow, (uint32_t)dtcm_base, DTCM_SIZE-32);

	/* and resume suspended devices.. */
	codec_reset_resume_devices();

err:
	kfree(dtcm_cmd_buffer);
	kfree(dtcm_ram_shadow);
	iounmap(qcc_base);
	iounmap(dtcm_base);

	return ret;
}

int32_t mobi_codec_reset(void)
{
	return codec_reset_prepare();
}
EXPORT_SYMBOL(mobi_codec_reset);

#if CODEC_RESET_DEBUG
static int codec_reset_proc_wr(struct file *file,
		const char *buffer, unsigned long count, void *data)
{
	char *ptr = (char *)buffer;

	codec_reset_prepare();

	while (ptr-buffer < count)
		ptr++;

	return ptr - buffer;
}

static int __devinit codec_reset_probe(struct platform_device *dev)
{
#ifdef CONFIG_PROC_FS
	/* this should probably become debug only, and create a device which
	 * apps can open */
	struct proc_dir_entry *pentry;

	codec_reset_proc_dir = proc_mkdir("driver/codec_reset", NULL);
	pentry = create_proc_entry("reset",
			S_IRUSR | S_IRGRP | S_IROTH, codec_reset_proc_dir);
	if (pentry) {
		pentry->write_proc = codec_reset_proc_wr;
	}
#endif

	return 0;
}

static int __devexit codec_reset_remove(struct platform_device *pdev)
{
#ifdef CONFIG_PROC_FS
	remove_proc_entry("reset", codec_reset_proc_dir);
	remove_proc_entry("driver/codec_reset", NULL);
#endif

	return 0;
}
#else

static int __devinit codec_reset_probe(struct platform_device *dev)
{
	return 0;
}

static int __devexit codec_reset_remove(struct platform_device *pdev)
{
	return 0;
}
#endif

static void codec_reset_shutdown(struct platform_device *dev) { }

#ifdef CONFIG_PM
static int codec_reset_suspend(struct platform_device *dev, pm_message_t state)
{
	return 0;
}

static int codec_reset_resume(struct platform_device *dev)
{
	return 0;
}
#else
#define codec_reset_suspend NULL
#define codec_reset_resume  NULL
#endif

static struct platform_driver codec_reset_driver = {
	.probe		= codec_reset_probe,
	.remove		= codec_reset_remove,
	.suspend	= codec_reset_suspend,
	.resume		= codec_reset_resume,
	.shutdown	= codec_reset_shutdown,
	.driver		= {
		.name	= PLATFORM_NAME_CODEC_RESET,
	},
};

static int __init codec_reset_mod_init(void)
{
	return platform_driver_register(&codec_reset_driver);
}

static void __exit codec_reset_mod_exit(void)
{
	platform_driver_unregister(&codec_reset_driver);
}
module_init(codec_reset_mod_init);
module_exit(codec_reset_mod_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jeff Hane");
MODULE_DESCRIPTION("Codec reset module driver for Merlin");
