[go: nahoru, domu]

1/*
2 * AMD Cryptographic Coprocessor (CCP) driver
3 *
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/kthread.h>
18#include <linux/sched.h>
19#include <linux/interrupt.h>
20#include <linux/spinlock.h>
21#include <linux/mutex.h>
22#include <linux/delay.h>
23#include <linux/ccp.h>
24#include <linux/scatterlist.h>
25#include <crypto/scatterwalk.h>
26#include <crypto/sha.h>
27
28#include "ccp-dev.h"
29
30
31enum ccp_memtype {
32	CCP_MEMTYPE_SYSTEM = 0,
33	CCP_MEMTYPE_KSB,
34	CCP_MEMTYPE_LOCAL,
35	CCP_MEMTYPE__LAST,
36};
37
38struct ccp_dma_info {
39	dma_addr_t address;
40	unsigned int offset;
41	unsigned int length;
42	enum dma_data_direction dir;
43};
44
45struct ccp_dm_workarea {
46	struct device *dev;
47	struct dma_pool *dma_pool;
48	unsigned int length;
49
50	u8 *address;
51	struct ccp_dma_info dma;
52};
53
54struct ccp_sg_workarea {
55	struct scatterlist *sg;
56	unsigned int nents;
57	unsigned int length;
58
59	struct scatterlist *dma_sg;
60	struct device *dma_dev;
61	unsigned int dma_count;
62	enum dma_data_direction dma_dir;
63
64	unsigned int sg_used;
65
66	u64 bytes_left;
67};
68
69struct ccp_data {
70	struct ccp_sg_workarea sg_wa;
71	struct ccp_dm_workarea dm_wa;
72};
73
74struct ccp_mem {
75	enum ccp_memtype type;
76	union {
77		struct ccp_dma_info dma;
78		u32 ksb;
79	} u;
80};
81
82struct ccp_aes_op {
83	enum ccp_aes_type type;
84	enum ccp_aes_mode mode;
85	enum ccp_aes_action action;
86};
87
88struct ccp_xts_aes_op {
89	enum ccp_aes_action action;
90	enum ccp_xts_aes_unit_size unit_size;
91};
92
93struct ccp_sha_op {
94	enum ccp_sha_type type;
95	u64 msg_bits;
96};
97
98struct ccp_rsa_op {
99	u32 mod_size;
100	u32 input_len;
101};
102
103struct ccp_passthru_op {
104	enum ccp_passthru_bitwise bit_mod;
105	enum ccp_passthru_byteswap byte_swap;
106};
107
108struct ccp_ecc_op {
109	enum ccp_ecc_function function;
110};
111
112struct ccp_op {
113	struct ccp_cmd_queue *cmd_q;
114
115	u32 jobid;
116	u32 ioc;
117	u32 soc;
118	u32 ksb_key;
119	u32 ksb_ctx;
120	u32 init;
121	u32 eom;
122
123	struct ccp_mem src;
124	struct ccp_mem dst;
125
126	union {
127		struct ccp_aes_op aes;
128		struct ccp_xts_aes_op xts;
129		struct ccp_sha_op sha;
130		struct ccp_rsa_op rsa;
131		struct ccp_passthru_op passthru;
132		struct ccp_ecc_op ecc;
133	} u;
134};
135
136/* SHA initial context values */
137static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
138	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
139	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
140	cpu_to_be32(SHA1_H4), 0, 0, 0,
141};
142
143static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
144	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
145	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
146	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
147	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
148};
149
150static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
151	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
152	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
153	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
154	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
155};
156
157/* The CCP cannot perform zero-length sha operations so the caller
158 * is required to buffer data for the final operation.  However, a
159 * sha operation for a message with a total length of zero is valid
160 * so known values are required to supply the result.
161 */
162static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
163	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
164	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
165	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
166	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
167};
168
169static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
170	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
171	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
172	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
173	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
174};
175
176static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
177	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
178	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
179	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
180	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
181};
182
183static u32 ccp_addr_lo(struct ccp_dma_info *info)
184{
185	return lower_32_bits(info->address + info->offset);
186}
187
188static u32 ccp_addr_hi(struct ccp_dma_info *info)
189{
190	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
191}
192
193static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
194{
195	struct ccp_cmd_queue *cmd_q = op->cmd_q;
196	struct ccp_device *ccp = cmd_q->ccp;
197	void __iomem *cr_addr;
198	u32 cr0, cmd;
199	unsigned int i;
200	int ret = 0;
201
202	/* We could read a status register to see how many free slots
203	 * are actually available, but reading that register resets it
204	 * and you could lose some error information.
205	 */
206	cmd_q->free_slots--;
207
208	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
209	      | (op->jobid << REQ0_JOBID_SHIFT)
210	      | REQ0_WAIT_FOR_WRITE;
211
212	if (op->soc)
213		cr0 |= REQ0_STOP_ON_COMPLETE
214		       | REQ0_INT_ON_COMPLETE;
215
216	if (op->ioc || !cmd_q->free_slots)
217		cr0 |= REQ0_INT_ON_COMPLETE;
218
219	/* Start at CMD_REQ1 */
220	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
221
222	mutex_lock(&ccp->req_mutex);
223
224	/* Write CMD_REQ1 through CMD_REQx first */
225	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
226		iowrite32(*(cr + i), cr_addr);
227
228	/* Tell the CCP to start */
229	wmb();
230	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
231
232	mutex_unlock(&ccp->req_mutex);
233
234	if (cr0 & REQ0_INT_ON_COMPLETE) {
235		/* Wait for the job to complete */
236		ret = wait_event_interruptible(cmd_q->int_queue,
237					       cmd_q->int_rcvd);
238		if (ret || cmd_q->cmd_error) {
239			/* On error delete all related jobs from the queue */
240			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
241			      | op->jobid;
242
243			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
244
245			if (!ret)
246				ret = -EIO;
247		} else if (op->soc) {
248			/* Delete just head job from the queue on SoC */
249			cmd = DEL_Q_ACTIVE
250			      | (cmd_q->id << DEL_Q_ID_SHIFT)
251			      | op->jobid;
252
253			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
254		}
255
256		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
257
258		cmd_q->int_rcvd = 0;
259	}
260
261	return ret;
262}
263
264static int ccp_perform_aes(struct ccp_op *op)
265{
266	u32 cr[6];
267
268	/* Fill out the register contents for REQ1 through REQ6 */
269	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
270		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
271		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
272		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
273		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
274	cr[1] = op->src.u.dma.length - 1;
275	cr[2] = ccp_addr_lo(&op->src.u.dma);
276	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
277		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
278		| ccp_addr_hi(&op->src.u.dma);
279	cr[4] = ccp_addr_lo(&op->dst.u.dma);
280	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
281		| ccp_addr_hi(&op->dst.u.dma);
282
283	if (op->u.aes.mode == CCP_AES_MODE_CFB)
284		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
285
286	if (op->eom)
287		cr[0] |= REQ1_EOM;
288
289	if (op->init)
290		cr[0] |= REQ1_INIT;
291
292	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
293}
294
295static int ccp_perform_xts_aes(struct ccp_op *op)
296{
297	u32 cr[6];
298
299	/* Fill out the register contents for REQ1 through REQ6 */
300	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
301		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
302		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
303		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
304	cr[1] = op->src.u.dma.length - 1;
305	cr[2] = ccp_addr_lo(&op->src.u.dma);
306	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
307		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
308		| ccp_addr_hi(&op->src.u.dma);
309	cr[4] = ccp_addr_lo(&op->dst.u.dma);
310	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
311		| ccp_addr_hi(&op->dst.u.dma);
312
313	if (op->eom)
314		cr[0] |= REQ1_EOM;
315
316	if (op->init)
317		cr[0] |= REQ1_INIT;
318
319	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
320}
321
322static int ccp_perform_sha(struct ccp_op *op)
323{
324	u32 cr[6];
325
326	/* Fill out the register contents for REQ1 through REQ6 */
327	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
328		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
329		| REQ1_INIT;
330	cr[1] = op->src.u.dma.length - 1;
331	cr[2] = ccp_addr_lo(&op->src.u.dma);
332	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
333		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
334		| ccp_addr_hi(&op->src.u.dma);
335
336	if (op->eom) {
337		cr[0] |= REQ1_EOM;
338		cr[4] = lower_32_bits(op->u.sha.msg_bits);
339		cr[5] = upper_32_bits(op->u.sha.msg_bits);
340	} else {
341		cr[4] = 0;
342		cr[5] = 0;
343	}
344
345	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
346}
347
348static int ccp_perform_rsa(struct ccp_op *op)
349{
350	u32 cr[6];
351
352	/* Fill out the register contents for REQ1 through REQ6 */
353	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
354		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
355		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
356		| REQ1_EOM;
357	cr[1] = op->u.rsa.input_len - 1;
358	cr[2] = ccp_addr_lo(&op->src.u.dma);
359	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
360		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
361		| ccp_addr_hi(&op->src.u.dma);
362	cr[4] = ccp_addr_lo(&op->dst.u.dma);
363	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
364		| ccp_addr_hi(&op->dst.u.dma);
365
366	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
367}
368
369static int ccp_perform_passthru(struct ccp_op *op)
370{
371	u32 cr[6];
372
373	/* Fill out the register contents for REQ1 through REQ6 */
374	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
375		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
376		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
377
378	if (op->src.type == CCP_MEMTYPE_SYSTEM)
379		cr[1] = op->src.u.dma.length - 1;
380	else
381		cr[1] = op->dst.u.dma.length - 1;
382
383	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
384		cr[2] = ccp_addr_lo(&op->src.u.dma);
385		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
386			| ccp_addr_hi(&op->src.u.dma);
387
388		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
389			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
390	} else {
391		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
392		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
393	}
394
395	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
396		cr[4] = ccp_addr_lo(&op->dst.u.dma);
397		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
398			| ccp_addr_hi(&op->dst.u.dma);
399	} else {
400		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
401		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
402	}
403
404	if (op->eom)
405		cr[0] |= REQ1_EOM;
406
407	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
408}
409
410static int ccp_perform_ecc(struct ccp_op *op)
411{
412	u32 cr[6];
413
414	/* Fill out the register contents for REQ1 through REQ6 */
415	cr[0] = REQ1_ECC_AFFINE_CONVERT
416		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
417		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
418		| REQ1_EOM;
419	cr[1] = op->src.u.dma.length - 1;
420	cr[2] = ccp_addr_lo(&op->src.u.dma);
421	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
422		| ccp_addr_hi(&op->src.u.dma);
423	cr[4] = ccp_addr_lo(&op->dst.u.dma);
424	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
425		| ccp_addr_hi(&op->dst.u.dma);
426
427	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
428}
429
430static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
431{
432	int start;
433
434	for (;;) {
435		mutex_lock(&ccp->ksb_mutex);
436
437		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
438							ccp->ksb_count,
439							ccp->ksb_start,
440							count, 0);
441		if (start <= ccp->ksb_count) {
442			bitmap_set(ccp->ksb, start, count);
443
444			mutex_unlock(&ccp->ksb_mutex);
445			break;
446		}
447
448		ccp->ksb_avail = 0;
449
450		mutex_unlock(&ccp->ksb_mutex);
451
452		/* Wait for KSB entries to become available */
453		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
454			return 0;
455	}
456
457	return KSB_START + start;
458}
459
460static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
461			 unsigned int count)
462{
463	if (!start)
464		return;
465
466	mutex_lock(&ccp->ksb_mutex);
467
468	bitmap_clear(ccp->ksb, start - KSB_START, count);
469
470	ccp->ksb_avail = 1;
471
472	mutex_unlock(&ccp->ksb_mutex);
473
474	wake_up_interruptible_all(&ccp->ksb_queue);
475}
476
477static u32 ccp_gen_jobid(struct ccp_device *ccp)
478{
479	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
480}
481
482static void ccp_sg_free(struct ccp_sg_workarea *wa)
483{
484	if (wa->dma_count)
485		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
486
487	wa->dma_count = 0;
488}
489
490static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
491				struct scatterlist *sg, u64 len,
492				enum dma_data_direction dma_dir)
493{
494	memset(wa, 0, sizeof(*wa));
495
496	wa->sg = sg;
497	if (!sg)
498		return 0;
499
500	wa->nents = sg_nents(sg);
501	wa->length = sg->length;
502	wa->bytes_left = len;
503	wa->sg_used = 0;
504
505	if (len == 0)
506		return 0;
507
508	if (dma_dir == DMA_NONE)
509		return 0;
510
511	wa->dma_sg = sg;
512	wa->dma_dev = dev;
513	wa->dma_dir = dma_dir;
514	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
515	if (!wa->dma_count)
516		return -ENOMEM;
517
518
519	return 0;
520}
521
522static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
523{
524	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
525
526	if (!wa->sg)
527		return;
528
529	wa->sg_used += nbytes;
530	wa->bytes_left -= nbytes;
531	if (wa->sg_used == wa->sg->length) {
532		wa->sg = sg_next(wa->sg);
533		wa->sg_used = 0;
534	}
535}
536
537static void ccp_dm_free(struct ccp_dm_workarea *wa)
538{
539	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
540		if (wa->address)
541			dma_pool_free(wa->dma_pool, wa->address,
542				      wa->dma.address);
543	} else {
544		if (wa->dma.address)
545			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
546					 wa->dma.dir);
547		kfree(wa->address);
548	}
549
550	wa->address = NULL;
551	wa->dma.address = 0;
552}
553
554static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
555				struct ccp_cmd_queue *cmd_q,
556				unsigned int len,
557				enum dma_data_direction dir)
558{
559	memset(wa, 0, sizeof(*wa));
560
561	if (!len)
562		return 0;
563
564	wa->dev = cmd_q->ccp->dev;
565	wa->length = len;
566
567	if (len <= CCP_DMAPOOL_MAX_SIZE) {
568		wa->dma_pool = cmd_q->dma_pool;
569
570		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
571					     &wa->dma.address);
572		if (!wa->address)
573			return -ENOMEM;
574
575		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
576
577		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
578	} else {
579		wa->address = kzalloc(len, GFP_KERNEL);
580		if (!wa->address)
581			return -ENOMEM;
582
583		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
584						 dir);
585		if (!wa->dma.address)
586			return -ENOMEM;
587
588		wa->dma.length = len;
589	}
590	wa->dma.dir = dir;
591
592	return 0;
593}
594
595static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
596			    struct scatterlist *sg, unsigned int sg_offset,
597			    unsigned int len)
598{
599	WARN_ON(!wa->address);
600
601	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
602				 0);
603}
604
605static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
606			    struct scatterlist *sg, unsigned int sg_offset,
607			    unsigned int len)
608{
609	WARN_ON(!wa->address);
610
611	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
612				 1);
613}
614
615static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
616				    struct scatterlist *sg,
617				    unsigned int len, unsigned int se_len,
618				    bool sign_extend)
619{
620	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
621	u8 buffer[CCP_REVERSE_BUF_SIZE];
622
623	BUG_ON(se_len > sizeof(buffer));
624
625	sg_offset = len;
626	dm_offset = 0;
627	nbytes = len;
628	while (nbytes) {
629		ksb_len = min_t(unsigned int, nbytes, se_len);
630		sg_offset -= ksb_len;
631
632		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
633		for (i = 0; i < ksb_len; i++)
634			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
635
636		dm_offset += ksb_len;
637		nbytes -= ksb_len;
638
639		if ((ksb_len != se_len) && sign_extend) {
640			/* Must sign-extend to nearest sign-extend length */
641			if (wa->address[dm_offset - 1] & 0x80)
642				memset(wa->address + dm_offset, 0xff,
643				       se_len - ksb_len);
644		}
645	}
646}
647
648static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
649				    struct scatterlist *sg,
650				    unsigned int len)
651{
652	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
653	u8 buffer[CCP_REVERSE_BUF_SIZE];
654
655	sg_offset = 0;
656	dm_offset = len;
657	nbytes = len;
658	while (nbytes) {
659		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
660		dm_offset -= ksb_len;
661
662		for (i = 0; i < ksb_len; i++)
663			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
664		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
665
666		sg_offset += ksb_len;
667		nbytes -= ksb_len;
668	}
669}
670
671static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
672{
673	ccp_dm_free(&data->dm_wa);
674	ccp_sg_free(&data->sg_wa);
675}
676
677static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
678			 struct scatterlist *sg, u64 sg_len,
679			 unsigned int dm_len,
680			 enum dma_data_direction dir)
681{
682	int ret;
683
684	memset(data, 0, sizeof(*data));
685
686	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
687				   dir);
688	if (ret)
689		goto e_err;
690
691	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
692	if (ret)
693		goto e_err;
694
695	return 0;
696
697e_err:
698	ccp_free_data(data, cmd_q);
699
700	return ret;
701}
702
703static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
704{
705	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
706	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
707	unsigned int buf_count, nbytes;
708
709	/* Clear the buffer if setting it */
710	if (!from)
711		memset(dm_wa->address, 0, dm_wa->length);
712
713	if (!sg_wa->sg)
714		return 0;
715
716	/* Perform the copy operation
717	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
718	 *   an unsigned int
719	 */
720	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
721	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
722				 nbytes, from);
723
724	/* Update the structures and generate the count */
725	buf_count = 0;
726	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
727		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
728			     dm_wa->length - buf_count);
729		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
730
731		buf_count += nbytes;
732		ccp_update_sg_workarea(sg_wa, nbytes);
733	}
734
735	return buf_count;
736}
737
738static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
739{
740	return ccp_queue_buf(data, 0);
741}
742
743static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
744{
745	return ccp_queue_buf(data, 1);
746}
747
748static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
749			     struct ccp_op *op, unsigned int block_size,
750			     bool blocksize_op)
751{
752	unsigned int sg_src_len, sg_dst_len, op_len;
753
754	/* The CCP can only DMA from/to one address each per operation. This
755	 * requires that we find the smallest DMA area between the source
756	 * and destination. The resulting len values will always be <= UINT_MAX
757	 * because the dma length is an unsigned int.
758	 */
759	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
760	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
761
762	if (dst) {
763		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
764		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
765		op_len = min(sg_src_len, sg_dst_len);
766	} else
767		op_len = sg_src_len;
768
769	/* The data operation length will be at least block_size in length
770	 * or the smaller of available sg room remaining for the source or
771	 * the destination
772	 */
773	op_len = max(op_len, block_size);
774
775	/* Unless we have to buffer data, there's no reason to wait */
776	op->soc = 0;
777
778	if (sg_src_len < block_size) {
779		/* Not enough data in the sg element, so it
780		 * needs to be buffered into a blocksize chunk
781		 */
782		int cp_len = ccp_fill_queue_buf(src);
783
784		op->soc = 1;
785		op->src.u.dma.address = src->dm_wa.dma.address;
786		op->src.u.dma.offset = 0;
787		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
788	} else {
789		/* Enough data in the sg element, but we need to
790		 * adjust for any previously copied data
791		 */
792		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
793		op->src.u.dma.offset = src->sg_wa.sg_used;
794		op->src.u.dma.length = op_len & ~(block_size - 1);
795
796		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
797	}
798
799	if (dst) {
800		if (sg_dst_len < block_size) {
801			/* Not enough room in the sg element or we're on the
802			 * last piece of data (when using padding), so the
803			 * output needs to be buffered into a blocksize chunk
804			 */
805			op->soc = 1;
806			op->dst.u.dma.address = dst->dm_wa.dma.address;
807			op->dst.u.dma.offset = 0;
808			op->dst.u.dma.length = op->src.u.dma.length;
809		} else {
810			/* Enough room in the sg element, but we need to
811			 * adjust for any previously used area
812			 */
813			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
814			op->dst.u.dma.offset = dst->sg_wa.sg_used;
815			op->dst.u.dma.length = op->src.u.dma.length;
816		}
817	}
818}
819
820static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
821			     struct ccp_op *op)
822{
823	op->init = 0;
824
825	if (dst) {
826		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
827			ccp_empty_queue_buf(dst);
828		else
829			ccp_update_sg_workarea(&dst->sg_wa,
830					       op->dst.u.dma.length);
831	}
832}
833
834static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
835				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
836				u32 byte_swap, bool from)
837{
838	struct ccp_op op;
839
840	memset(&op, 0, sizeof(op));
841
842	op.cmd_q = cmd_q;
843	op.jobid = jobid;
844	op.eom = 1;
845
846	if (from) {
847		op.soc = 1;
848		op.src.type = CCP_MEMTYPE_KSB;
849		op.src.u.ksb = ksb;
850		op.dst.type = CCP_MEMTYPE_SYSTEM;
851		op.dst.u.dma.address = wa->dma.address;
852		op.dst.u.dma.length = wa->length;
853	} else {
854		op.src.type = CCP_MEMTYPE_SYSTEM;
855		op.src.u.dma.address = wa->dma.address;
856		op.src.u.dma.length = wa->length;
857		op.dst.type = CCP_MEMTYPE_KSB;
858		op.dst.u.ksb = ksb;
859	}
860
861	op.u.passthru.byte_swap = byte_swap;
862
863	return ccp_perform_passthru(&op);
864}
865
866static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
867			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
868			   u32 byte_swap)
869{
870	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
871}
872
873static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
874			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
875			     u32 byte_swap)
876{
877	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
878}
879
880static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
881				struct ccp_cmd *cmd)
882{
883	struct ccp_aes_engine *aes = &cmd->u.aes;
884	struct ccp_dm_workarea key, ctx;
885	struct ccp_data src;
886	struct ccp_op op;
887	unsigned int dm_offset;
888	int ret;
889
890	if (!((aes->key_len == AES_KEYSIZE_128) ||
891	      (aes->key_len == AES_KEYSIZE_192) ||
892	      (aes->key_len == AES_KEYSIZE_256)))
893		return -EINVAL;
894
895	if (aes->src_len & (AES_BLOCK_SIZE - 1))
896		return -EINVAL;
897
898	if (aes->iv_len != AES_BLOCK_SIZE)
899		return -EINVAL;
900
901	if (!aes->key || !aes->iv || !aes->src)
902		return -EINVAL;
903
904	if (aes->cmac_final) {
905		if (aes->cmac_key_len != AES_BLOCK_SIZE)
906			return -EINVAL;
907
908		if (!aes->cmac_key)
909			return -EINVAL;
910	}
911
912	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
913	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
914
915	ret = -EIO;
916	memset(&op, 0, sizeof(op));
917	op.cmd_q = cmd_q;
918	op.jobid = ccp_gen_jobid(cmd_q->ccp);
919	op.ksb_key = cmd_q->ksb_key;
920	op.ksb_ctx = cmd_q->ksb_ctx;
921	op.init = 1;
922	op.u.aes.type = aes->type;
923	op.u.aes.mode = aes->mode;
924	op.u.aes.action = aes->action;
925
926	/* All supported key sizes fit in a single (32-byte) KSB entry
927	 * and must be in little endian format. Use the 256-bit byte
928	 * swap passthru option to convert from big endian to little
929	 * endian.
930	 */
931	ret = ccp_init_dm_workarea(&key, cmd_q,
932				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
933				   DMA_TO_DEVICE);
934	if (ret)
935		return ret;
936
937	dm_offset = CCP_KSB_BYTES - aes->key_len;
938	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
939	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
940			      CCP_PASSTHRU_BYTESWAP_256BIT);
941	if (ret) {
942		cmd->engine_error = cmd_q->cmd_error;
943		goto e_key;
944	}
945
946	/* The AES context fits in a single (32-byte) KSB entry and
947	 * must be in little endian format. Use the 256-bit byte swap
948	 * passthru option to convert from big endian to little endian.
949	 */
950	ret = ccp_init_dm_workarea(&ctx, cmd_q,
951				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
952				   DMA_BIDIRECTIONAL);
953	if (ret)
954		goto e_key;
955
956	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
957	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
958	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
959			      CCP_PASSTHRU_BYTESWAP_256BIT);
960	if (ret) {
961		cmd->engine_error = cmd_q->cmd_error;
962		goto e_ctx;
963	}
964
965	/* Send data to the CCP AES engine */
966	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
967			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
968	if (ret)
969		goto e_ctx;
970
971	while (src.sg_wa.bytes_left) {
972		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
973		if (aes->cmac_final && !src.sg_wa.bytes_left) {
974			op.eom = 1;
975
976			/* Push the K1/K2 key to the CCP now */
977			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
978						op.ksb_ctx,
979						CCP_PASSTHRU_BYTESWAP_256BIT);
980			if (ret) {
981				cmd->engine_error = cmd_q->cmd_error;
982				goto e_src;
983			}
984
985			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
986					aes->cmac_key_len);
987			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
988					      CCP_PASSTHRU_BYTESWAP_256BIT);
989			if (ret) {
990				cmd->engine_error = cmd_q->cmd_error;
991				goto e_src;
992			}
993		}
994
995		ret = ccp_perform_aes(&op);
996		if (ret) {
997			cmd->engine_error = cmd_q->cmd_error;
998			goto e_src;
999		}
1000
1001		ccp_process_data(&src, NULL, &op);
1002	}
1003
1004	/* Retrieve the AES context - convert from LE to BE using
1005	 * 32-byte (256-bit) byteswapping
1006	 */
1007	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1008				CCP_PASSTHRU_BYTESWAP_256BIT);
1009	if (ret) {
1010		cmd->engine_error = cmd_q->cmd_error;
1011		goto e_src;
1012	}
1013
1014	/* ...but we only need AES_BLOCK_SIZE bytes */
1015	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1016	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1017
1018e_src:
1019	ccp_free_data(&src, cmd_q);
1020
1021e_ctx:
1022	ccp_dm_free(&ctx);
1023
1024e_key:
1025	ccp_dm_free(&key);
1026
1027	return ret;
1028}
1029
1030static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1031{
1032	struct ccp_aes_engine *aes = &cmd->u.aes;
1033	struct ccp_dm_workarea key, ctx;
1034	struct ccp_data src, dst;
1035	struct ccp_op op;
1036	unsigned int dm_offset;
1037	bool in_place = false;
1038	int ret;
1039
1040	if (aes->mode == CCP_AES_MODE_CMAC)
1041		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1042
1043	if (!((aes->key_len == AES_KEYSIZE_128) ||
1044	      (aes->key_len == AES_KEYSIZE_192) ||
1045	      (aes->key_len == AES_KEYSIZE_256)))
1046		return -EINVAL;
1047
1048	if (((aes->mode == CCP_AES_MODE_ECB) ||
1049	     (aes->mode == CCP_AES_MODE_CBC) ||
1050	     (aes->mode == CCP_AES_MODE_CFB)) &&
1051	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1052		return -EINVAL;
1053
1054	if (!aes->key || !aes->src || !aes->dst)
1055		return -EINVAL;
1056
1057	if (aes->mode != CCP_AES_MODE_ECB) {
1058		if (aes->iv_len != AES_BLOCK_SIZE)
1059			return -EINVAL;
1060
1061		if (!aes->iv)
1062			return -EINVAL;
1063	}
1064
1065	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1066	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1067
1068	ret = -EIO;
1069	memset(&op, 0, sizeof(op));
1070	op.cmd_q = cmd_q;
1071	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1072	op.ksb_key = cmd_q->ksb_key;
1073	op.ksb_ctx = cmd_q->ksb_ctx;
1074	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1075	op.u.aes.type = aes->type;
1076	op.u.aes.mode = aes->mode;
1077	op.u.aes.action = aes->action;
1078
1079	/* All supported key sizes fit in a single (32-byte) KSB entry
1080	 * and must be in little endian format. Use the 256-bit byte
1081	 * swap passthru option to convert from big endian to little
1082	 * endian.
1083	 */
1084	ret = ccp_init_dm_workarea(&key, cmd_q,
1085				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1086				   DMA_TO_DEVICE);
1087	if (ret)
1088		return ret;
1089
1090	dm_offset = CCP_KSB_BYTES - aes->key_len;
1091	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1092	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1093			      CCP_PASSTHRU_BYTESWAP_256BIT);
1094	if (ret) {
1095		cmd->engine_error = cmd_q->cmd_error;
1096		goto e_key;
1097	}
1098
1099	/* The AES context fits in a single (32-byte) KSB entry and
1100	 * must be in little endian format. Use the 256-bit byte swap
1101	 * passthru option to convert from big endian to little endian.
1102	 */
1103	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1104				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1105				   DMA_BIDIRECTIONAL);
1106	if (ret)
1107		goto e_key;
1108
1109	if (aes->mode != CCP_AES_MODE_ECB) {
1110		/* Load the AES context - conver to LE */
1111		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1112		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1113		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1114				      CCP_PASSTHRU_BYTESWAP_256BIT);
1115		if (ret) {
1116			cmd->engine_error = cmd_q->cmd_error;
1117			goto e_ctx;
1118		}
1119	}
1120
1121	/* Prepare the input and output data workareas. For in-place
1122	 * operations we need to set the dma direction to BIDIRECTIONAL
1123	 * and copy the src workarea to the dst workarea.
1124	 */
1125	if (sg_virt(aes->src) == sg_virt(aes->dst))
1126		in_place = true;
1127
1128	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1129			    AES_BLOCK_SIZE,
1130			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1131	if (ret)
1132		goto e_ctx;
1133
1134	if (in_place)
1135		dst = src;
1136	else {
1137		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1138				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1139		if (ret)
1140			goto e_src;
1141	}
1142
1143	/* Send data to the CCP AES engine */
1144	while (src.sg_wa.bytes_left) {
1145		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1146		if (!src.sg_wa.bytes_left) {
1147			op.eom = 1;
1148
1149			/* Since we don't retrieve the AES context in ECB
1150			 * mode we have to wait for the operation to complete
1151			 * on the last piece of data
1152			 */
1153			if (aes->mode == CCP_AES_MODE_ECB)
1154				op.soc = 1;
1155		}
1156
1157		ret = ccp_perform_aes(&op);
1158		if (ret) {
1159			cmd->engine_error = cmd_q->cmd_error;
1160			goto e_dst;
1161		}
1162
1163		ccp_process_data(&src, &dst, &op);
1164	}
1165
1166	if (aes->mode != CCP_AES_MODE_ECB) {
1167		/* Retrieve the AES context - convert from LE to BE using
1168		 * 32-byte (256-bit) byteswapping
1169		 */
1170		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1171					CCP_PASSTHRU_BYTESWAP_256BIT);
1172		if (ret) {
1173			cmd->engine_error = cmd_q->cmd_error;
1174			goto e_dst;
1175		}
1176
1177		/* ...but we only need AES_BLOCK_SIZE bytes */
1178		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1179		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1180	}
1181
1182e_dst:
1183	if (!in_place)
1184		ccp_free_data(&dst, cmd_q);
1185
1186e_src:
1187	ccp_free_data(&src, cmd_q);
1188
1189e_ctx:
1190	ccp_dm_free(&ctx);
1191
1192e_key:
1193	ccp_dm_free(&key);
1194
1195	return ret;
1196}
1197
1198static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1199			       struct ccp_cmd *cmd)
1200{
1201	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1202	struct ccp_dm_workarea key, ctx;
1203	struct ccp_data src, dst;
1204	struct ccp_op op;
1205	unsigned int unit_size, dm_offset;
1206	bool in_place = false;
1207	int ret;
1208
1209	switch (xts->unit_size) {
1210	case CCP_XTS_AES_UNIT_SIZE_16:
1211		unit_size = 16;
1212		break;
1213	case CCP_XTS_AES_UNIT_SIZE_512:
1214		unit_size = 512;
1215		break;
1216	case CCP_XTS_AES_UNIT_SIZE_1024:
1217		unit_size = 1024;
1218		break;
1219	case CCP_XTS_AES_UNIT_SIZE_2048:
1220		unit_size = 2048;
1221		break;
1222	case CCP_XTS_AES_UNIT_SIZE_4096:
1223		unit_size = 4096;
1224		break;
1225
1226	default:
1227		return -EINVAL;
1228	}
1229
1230	if (xts->key_len != AES_KEYSIZE_128)
1231		return -EINVAL;
1232
1233	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1234		return -EINVAL;
1235
1236	if (xts->iv_len != AES_BLOCK_SIZE)
1237		return -EINVAL;
1238
1239	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1240		return -EINVAL;
1241
1242	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1243	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1244
1245	ret = -EIO;
1246	memset(&op, 0, sizeof(op));
1247	op.cmd_q = cmd_q;
1248	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1249	op.ksb_key = cmd_q->ksb_key;
1250	op.ksb_ctx = cmd_q->ksb_ctx;
1251	op.init = 1;
1252	op.u.xts.action = xts->action;
1253	op.u.xts.unit_size = xts->unit_size;
1254
1255	/* All supported key sizes fit in a single (32-byte) KSB entry
1256	 * and must be in little endian format. Use the 256-bit byte
1257	 * swap passthru option to convert from big endian to little
1258	 * endian.
1259	 */
1260	ret = ccp_init_dm_workarea(&key, cmd_q,
1261				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1262				   DMA_TO_DEVICE);
1263	if (ret)
1264		return ret;
1265
1266	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1267	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1268	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1269	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1270			      CCP_PASSTHRU_BYTESWAP_256BIT);
1271	if (ret) {
1272		cmd->engine_error = cmd_q->cmd_error;
1273		goto e_key;
1274	}
1275
1276	/* The AES context fits in a single (32-byte) KSB entry and
1277	 * for XTS is already in little endian format so no byte swapping
1278	 * is needed.
1279	 */
1280	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1281				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1282				   DMA_BIDIRECTIONAL);
1283	if (ret)
1284		goto e_key;
1285
1286	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1287	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1288			      CCP_PASSTHRU_BYTESWAP_NOOP);
1289	if (ret) {
1290		cmd->engine_error = cmd_q->cmd_error;
1291		goto e_ctx;
1292	}
1293
1294	/* Prepare the input and output data workareas. For in-place
1295	 * operations we need to set the dma direction to BIDIRECTIONAL
1296	 * and copy the src workarea to the dst workarea.
1297	 */
1298	if (sg_virt(xts->src) == sg_virt(xts->dst))
1299		in_place = true;
1300
1301	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1302			    unit_size,
1303			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1304	if (ret)
1305		goto e_ctx;
1306
1307	if (in_place)
1308		dst = src;
1309	else {
1310		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1311				    unit_size, DMA_FROM_DEVICE);
1312		if (ret)
1313			goto e_src;
1314	}
1315
1316	/* Send data to the CCP AES engine */
1317	while (src.sg_wa.bytes_left) {
1318		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1319		if (!src.sg_wa.bytes_left)
1320			op.eom = 1;
1321
1322		ret = ccp_perform_xts_aes(&op);
1323		if (ret) {
1324			cmd->engine_error = cmd_q->cmd_error;
1325			goto e_dst;
1326		}
1327
1328		ccp_process_data(&src, &dst, &op);
1329	}
1330
1331	/* Retrieve the AES context - convert from LE to BE using
1332	 * 32-byte (256-bit) byteswapping
1333	 */
1334	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1335				CCP_PASSTHRU_BYTESWAP_256BIT);
1336	if (ret) {
1337		cmd->engine_error = cmd_q->cmd_error;
1338		goto e_dst;
1339	}
1340
1341	/* ...but we only need AES_BLOCK_SIZE bytes */
1342	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1343	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1344
1345e_dst:
1346	if (!in_place)
1347		ccp_free_data(&dst, cmd_q);
1348
1349e_src:
1350	ccp_free_data(&src, cmd_q);
1351
1352e_ctx:
1353	ccp_dm_free(&ctx);
1354
1355e_key:
1356	ccp_dm_free(&key);
1357
1358	return ret;
1359}
1360
1361static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1362{
1363	struct ccp_sha_engine *sha = &cmd->u.sha;
1364	struct ccp_dm_workarea ctx;
1365	struct ccp_data src;
1366	struct ccp_op op;
1367	int ret;
1368
1369	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1370		return -EINVAL;
1371
1372	if (!sha->ctx)
1373		return -EINVAL;
1374
1375	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1376		return -EINVAL;
1377
1378	if (!sha->src_len) {
1379		const u8 *sha_zero;
1380
1381		/* Not final, just return */
1382		if (!sha->final)
1383			return 0;
1384
1385		/* CCP can't do a zero length sha operation so the caller
1386		 * must buffer the data.
1387		 */
1388		if (sha->msg_bits)
1389			return -EINVAL;
1390
1391		/* A sha operation for a message with a total length of zero,
1392		 * return known result.
1393		 */
1394		switch (sha->type) {
1395		case CCP_SHA_TYPE_1:
1396			sha_zero = ccp_sha1_zero;
1397			break;
1398		case CCP_SHA_TYPE_224:
1399			sha_zero = ccp_sha224_zero;
1400			break;
1401		case CCP_SHA_TYPE_256:
1402			sha_zero = ccp_sha256_zero;
1403			break;
1404		default:
1405			return -EINVAL;
1406		}
1407
1408		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1409					 sha->ctx_len, 1);
1410
1411		return 0;
1412	}
1413
1414	if (!sha->src)
1415		return -EINVAL;
1416
1417	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1418
1419	memset(&op, 0, sizeof(op));
1420	op.cmd_q = cmd_q;
1421	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1422	op.ksb_ctx = cmd_q->ksb_ctx;
1423	op.u.sha.type = sha->type;
1424	op.u.sha.msg_bits = sha->msg_bits;
1425
1426	/* The SHA context fits in a single (32-byte) KSB entry and
1427	 * must be in little endian format. Use the 256-bit byte swap
1428	 * passthru option to convert from big endian to little endian.
1429	 */
1430	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1431				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1432				   DMA_BIDIRECTIONAL);
1433	if (ret)
1434		return ret;
1435
1436	if (sha->first) {
1437		const __be32 *init;
1438
1439		switch (sha->type) {
1440		case CCP_SHA_TYPE_1:
1441			init = ccp_sha1_init;
1442			break;
1443		case CCP_SHA_TYPE_224:
1444			init = ccp_sha224_init;
1445			break;
1446		case CCP_SHA_TYPE_256:
1447			init = ccp_sha256_init;
1448			break;
1449		default:
1450			ret = -EINVAL;
1451			goto e_ctx;
1452		}
1453		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1454	} else
1455		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1456
1457	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1458			      CCP_PASSTHRU_BYTESWAP_256BIT);
1459	if (ret) {
1460		cmd->engine_error = cmd_q->cmd_error;
1461		goto e_ctx;
1462	}
1463
1464	/* Send data to the CCP SHA engine */
1465	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1466			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1467	if (ret)
1468		goto e_ctx;
1469
1470	while (src.sg_wa.bytes_left) {
1471		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1472		if (sha->final && !src.sg_wa.bytes_left)
1473			op.eom = 1;
1474
1475		ret = ccp_perform_sha(&op);
1476		if (ret) {
1477			cmd->engine_error = cmd_q->cmd_error;
1478			goto e_data;
1479		}
1480
1481		ccp_process_data(&src, NULL, &op);
1482	}
1483
1484	/* Retrieve the SHA context - convert from LE to BE using
1485	 * 32-byte (256-bit) byteswapping to BE
1486	 */
1487	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1488				CCP_PASSTHRU_BYTESWAP_256BIT);
1489	if (ret) {
1490		cmd->engine_error = cmd_q->cmd_error;
1491		goto e_data;
1492	}
1493
1494	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1495
1496	if (sha->final && sha->opad) {
1497		/* HMAC operation, recursively perform final SHA */
1498		struct ccp_cmd hmac_cmd;
1499		struct scatterlist sg;
1500		u64 block_size, digest_size;
1501		u8 *hmac_buf;
1502
1503		switch (sha->type) {
1504		case CCP_SHA_TYPE_1:
1505			block_size = SHA1_BLOCK_SIZE;
1506			digest_size = SHA1_DIGEST_SIZE;
1507			break;
1508		case CCP_SHA_TYPE_224:
1509			block_size = SHA224_BLOCK_SIZE;
1510			digest_size = SHA224_DIGEST_SIZE;
1511			break;
1512		case CCP_SHA_TYPE_256:
1513			block_size = SHA256_BLOCK_SIZE;
1514			digest_size = SHA256_DIGEST_SIZE;
1515			break;
1516		default:
1517			ret = -EINVAL;
1518			goto e_data;
1519		}
1520
1521		if (sha->opad_len != block_size) {
1522			ret = -EINVAL;
1523			goto e_data;
1524		}
1525
1526		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1527		if (!hmac_buf) {
1528			ret = -ENOMEM;
1529			goto e_data;
1530		}
1531		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1532
1533		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1534		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1535
1536		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1537		hmac_cmd.engine = CCP_ENGINE_SHA;
1538		hmac_cmd.u.sha.type = sha->type;
1539		hmac_cmd.u.sha.ctx = sha->ctx;
1540		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1541		hmac_cmd.u.sha.src = &sg;
1542		hmac_cmd.u.sha.src_len = block_size + digest_size;
1543		hmac_cmd.u.sha.opad = NULL;
1544		hmac_cmd.u.sha.opad_len = 0;
1545		hmac_cmd.u.sha.first = 1;
1546		hmac_cmd.u.sha.final = 1;
1547		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1548
1549		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1550		if (ret)
1551			cmd->engine_error = hmac_cmd.engine_error;
1552
1553		kfree(hmac_buf);
1554	}
1555
1556e_data:
1557	ccp_free_data(&src, cmd_q);
1558
1559e_ctx:
1560	ccp_dm_free(&ctx);
1561
1562	return ret;
1563}
1564
1565static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1566{
1567	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1568	struct ccp_dm_workarea exp, src;
1569	struct ccp_data dst;
1570	struct ccp_op op;
1571	unsigned int ksb_count, i_len, o_len;
1572	int ret;
1573
1574	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1575		return -EINVAL;
1576
1577	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1578		return -EINVAL;
1579
1580	/* The RSA modulus must precede the message being acted upon, so
1581	 * it must be copied to a DMA area where the message and the
1582	 * modulus can be concatenated.  Therefore the input buffer
1583	 * length required is twice the output buffer length (which
1584	 * must be a multiple of 256-bits).
1585	 */
1586	o_len = ((rsa->key_size + 255) / 256) * 32;
1587	i_len = o_len * 2;
1588
1589	ksb_count = o_len / CCP_KSB_BYTES;
1590
1591	memset(&op, 0, sizeof(op));
1592	op.cmd_q = cmd_q;
1593	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1594	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1595	if (!op.ksb_key)
1596		return -EIO;
1597
1598	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1599	 * be in little endian format. Reverse copy each 32-byte chunk
1600	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1601	 * and each byte within that chunk and do not perform any byte swap
1602	 * operations on the passthru operation.
1603	 */
1604	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1605	if (ret)
1606		goto e_ksb;
1607
1608	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1609				false);
1610	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1611			      CCP_PASSTHRU_BYTESWAP_NOOP);
1612	if (ret) {
1613		cmd->engine_error = cmd_q->cmd_error;
1614		goto e_exp;
1615	}
1616
1617	/* Concatenate the modulus and the message. Both the modulus and
1618	 * the operands must be in little endian format.  Since the input
1619	 * is in big endian format it must be converted.
1620	 */
1621	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1622	if (ret)
1623		goto e_exp;
1624
1625	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1626				false);
1627	src.address += o_len;	/* Adjust the address for the copy operation */
1628	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1629				false);
1630	src.address -= o_len;	/* Reset the address to original value */
1631
1632	/* Prepare the output area for the operation */
1633	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1634			    o_len, DMA_FROM_DEVICE);
1635	if (ret)
1636		goto e_src;
1637
1638	op.soc = 1;
1639	op.src.u.dma.address = src.dma.address;
1640	op.src.u.dma.offset = 0;
1641	op.src.u.dma.length = i_len;
1642	op.dst.u.dma.address = dst.dm_wa.dma.address;
1643	op.dst.u.dma.offset = 0;
1644	op.dst.u.dma.length = o_len;
1645
1646	op.u.rsa.mod_size = rsa->key_size;
1647	op.u.rsa.input_len = i_len;
1648
1649	ret = ccp_perform_rsa(&op);
1650	if (ret) {
1651		cmd->engine_error = cmd_q->cmd_error;
1652		goto e_dst;
1653	}
1654
1655	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1656
1657e_dst:
1658	ccp_free_data(&dst, cmd_q);
1659
1660e_src:
1661	ccp_dm_free(&src);
1662
1663e_exp:
1664	ccp_dm_free(&exp);
1665
1666e_ksb:
1667	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1668
1669	return ret;
1670}
1671
1672static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1673				struct ccp_cmd *cmd)
1674{
1675	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1676	struct ccp_dm_workarea mask;
1677	struct ccp_data src, dst;
1678	struct ccp_op op;
1679	bool in_place = false;
1680	unsigned int i;
1681	int ret;
1682
1683	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1684		return -EINVAL;
1685
1686	if (!pt->src || !pt->dst)
1687		return -EINVAL;
1688
1689	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1690		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1691			return -EINVAL;
1692		if (!pt->mask)
1693			return -EINVAL;
1694	}
1695
1696	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1697
1698	memset(&op, 0, sizeof(op));
1699	op.cmd_q = cmd_q;
1700	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1701
1702	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1703		/* Load the mask */
1704		op.ksb_key = cmd_q->ksb_key;
1705
1706		ret = ccp_init_dm_workarea(&mask, cmd_q,
1707					   CCP_PASSTHRU_KSB_COUNT *
1708					   CCP_KSB_BYTES,
1709					   DMA_TO_DEVICE);
1710		if (ret)
1711			return ret;
1712
1713		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1714		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1715				      CCP_PASSTHRU_BYTESWAP_NOOP);
1716		if (ret) {
1717			cmd->engine_error = cmd_q->cmd_error;
1718			goto e_mask;
1719		}
1720	}
1721
1722	/* Prepare the input and output data workareas. For in-place
1723	 * operations we need to set the dma direction to BIDIRECTIONAL
1724	 * and copy the src workarea to the dst workarea.
1725	 */
1726	if (sg_virt(pt->src) == sg_virt(pt->dst))
1727		in_place = true;
1728
1729	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1730			    CCP_PASSTHRU_MASKSIZE,
1731			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1732	if (ret)
1733		goto e_mask;
1734
1735	if (in_place)
1736		dst = src;
1737	else {
1738		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1739				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1740		if (ret)
1741			goto e_src;
1742	}
1743
1744	/* Send data to the CCP Passthru engine
1745	 *   Because the CCP engine works on a single source and destination
1746	 *   dma address at a time, each entry in the source scatterlist
1747	 *   (after the dma_map_sg call) must be less than or equal to the
1748	 *   (remaining) length in the destination scatterlist entry and the
1749	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1750	 */
1751	dst.sg_wa.sg_used = 0;
1752	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1753		if (!dst.sg_wa.sg ||
1754		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1755			ret = -EINVAL;
1756			goto e_dst;
1757		}
1758
1759		if (i == src.sg_wa.dma_count) {
1760			op.eom = 1;
1761			op.soc = 1;
1762		}
1763
1764		op.src.type = CCP_MEMTYPE_SYSTEM;
1765		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1766		op.src.u.dma.offset = 0;
1767		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1768
1769		op.dst.type = CCP_MEMTYPE_SYSTEM;
1770		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1771		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1772		op.dst.u.dma.length = op.src.u.dma.length;
1773
1774		ret = ccp_perform_passthru(&op);
1775		if (ret) {
1776			cmd->engine_error = cmd_q->cmd_error;
1777			goto e_dst;
1778		}
1779
1780		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1781		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1782			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1783			dst.sg_wa.sg_used = 0;
1784		}
1785		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1786	}
1787
1788e_dst:
1789	if (!in_place)
1790		ccp_free_data(&dst, cmd_q);
1791
1792e_src:
1793	ccp_free_data(&src, cmd_q);
1794
1795e_mask:
1796	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1797		ccp_dm_free(&mask);
1798
1799	return ret;
1800}
1801
1802static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1803{
1804	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1805	struct ccp_dm_workarea src, dst;
1806	struct ccp_op op;
1807	int ret;
1808	u8 *save;
1809
1810	if (!ecc->u.mm.operand_1 ||
1811	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1812		return -EINVAL;
1813
1814	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1815		if (!ecc->u.mm.operand_2 ||
1816		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1817			return -EINVAL;
1818
1819	if (!ecc->u.mm.result ||
1820	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1821		return -EINVAL;
1822
1823	memset(&op, 0, sizeof(op));
1824	op.cmd_q = cmd_q;
1825	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1826
1827	/* Concatenate the modulus and the operands. Both the modulus and
1828	 * the operands must be in little endian format.  Since the input
1829	 * is in big endian format it must be converted and placed in a
1830	 * fixed length buffer.
1831	 */
1832	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1833				   DMA_TO_DEVICE);
1834	if (ret)
1835		return ret;
1836
1837	/* Save the workarea address since it is updated in order to perform
1838	 * the concatenation
1839	 */
1840	save = src.address;
1841
1842	/* Copy the ECC modulus */
1843	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1844				CCP_ECC_OPERAND_SIZE, false);
1845	src.address += CCP_ECC_OPERAND_SIZE;
1846
1847	/* Copy the first operand */
1848	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1849				ecc->u.mm.operand_1_len,
1850				CCP_ECC_OPERAND_SIZE, false);
1851	src.address += CCP_ECC_OPERAND_SIZE;
1852
1853	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1854		/* Copy the second operand */
1855		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1856					ecc->u.mm.operand_2_len,
1857					CCP_ECC_OPERAND_SIZE, false);
1858		src.address += CCP_ECC_OPERAND_SIZE;
1859	}
1860
1861	/* Restore the workarea address */
1862	src.address = save;
1863
1864	/* Prepare the output area for the operation */
1865	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1866				   DMA_FROM_DEVICE);
1867	if (ret)
1868		goto e_src;
1869
1870	op.soc = 1;
1871	op.src.u.dma.address = src.dma.address;
1872	op.src.u.dma.offset = 0;
1873	op.src.u.dma.length = src.length;
1874	op.dst.u.dma.address = dst.dma.address;
1875	op.dst.u.dma.offset = 0;
1876	op.dst.u.dma.length = dst.length;
1877
1878	op.u.ecc.function = cmd->u.ecc.function;
1879
1880	ret = ccp_perform_ecc(&op);
1881	if (ret) {
1882		cmd->engine_error = cmd_q->cmd_error;
1883		goto e_dst;
1884	}
1885
1886	ecc->ecc_result = le16_to_cpup(
1887		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1888	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1889		ret = -EIO;
1890		goto e_dst;
1891	}
1892
1893	/* Save the ECC result */
1894	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1895
1896e_dst:
1897	ccp_dm_free(&dst);
1898
1899e_src:
1900	ccp_dm_free(&src);
1901
1902	return ret;
1903}
1904
1905static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1906{
1907	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1908	struct ccp_dm_workarea src, dst;
1909	struct ccp_op op;
1910	int ret;
1911	u8 *save;
1912
1913	if (!ecc->u.pm.point_1.x ||
1914	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1915	    !ecc->u.pm.point_1.y ||
1916	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1917		return -EINVAL;
1918
1919	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1920		if (!ecc->u.pm.point_2.x ||
1921		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1922		    !ecc->u.pm.point_2.y ||
1923		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1924			return -EINVAL;
1925	} else {
1926		if (!ecc->u.pm.domain_a ||
1927		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1928			return -EINVAL;
1929
1930		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1931			if (!ecc->u.pm.scalar ||
1932			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1933				return -EINVAL;
1934	}
1935
1936	if (!ecc->u.pm.result.x ||
1937	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1938	    !ecc->u.pm.result.y ||
1939	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1940		return -EINVAL;
1941
1942	memset(&op, 0, sizeof(op));
1943	op.cmd_q = cmd_q;
1944	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1945
1946	/* Concatenate the modulus and the operands. Both the modulus and
1947	 * the operands must be in little endian format.  Since the input
1948	 * is in big endian format it must be converted and placed in a
1949	 * fixed length buffer.
1950	 */
1951	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1952				   DMA_TO_DEVICE);
1953	if (ret)
1954		return ret;
1955
1956	/* Save the workarea address since it is updated in order to perform
1957	 * the concatenation
1958	 */
1959	save = src.address;
1960
1961	/* Copy the ECC modulus */
1962	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1963				CCP_ECC_OPERAND_SIZE, false);
1964	src.address += CCP_ECC_OPERAND_SIZE;
1965
1966	/* Copy the first point X and Y coordinate */
1967	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1968				ecc->u.pm.point_1.x_len,
1969				CCP_ECC_OPERAND_SIZE, false);
1970	src.address += CCP_ECC_OPERAND_SIZE;
1971	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1972				ecc->u.pm.point_1.y_len,
1973				CCP_ECC_OPERAND_SIZE, false);
1974	src.address += CCP_ECC_OPERAND_SIZE;
1975
1976	/* Set the first point Z coordianate to 1 */
1977	*(src.address) = 0x01;
1978	src.address += CCP_ECC_OPERAND_SIZE;
1979
1980	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1981		/* Copy the second point X and Y coordinate */
1982		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1983					ecc->u.pm.point_2.x_len,
1984					CCP_ECC_OPERAND_SIZE, false);
1985		src.address += CCP_ECC_OPERAND_SIZE;
1986		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1987					ecc->u.pm.point_2.y_len,
1988					CCP_ECC_OPERAND_SIZE, false);
1989		src.address += CCP_ECC_OPERAND_SIZE;
1990
1991		/* Set the second point Z coordianate to 1 */
1992		*(src.address) = 0x01;
1993		src.address += CCP_ECC_OPERAND_SIZE;
1994	} else {
1995		/* Copy the Domain "a" parameter */
1996		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1997					ecc->u.pm.domain_a_len,
1998					CCP_ECC_OPERAND_SIZE, false);
1999		src.address += CCP_ECC_OPERAND_SIZE;
2000
2001		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2002			/* Copy the scalar value */
2003			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2004						ecc->u.pm.scalar_len,
2005						CCP_ECC_OPERAND_SIZE, false);
2006			src.address += CCP_ECC_OPERAND_SIZE;
2007		}
2008	}
2009
2010	/* Restore the workarea address */
2011	src.address = save;
2012
2013	/* Prepare the output area for the operation */
2014	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2015				   DMA_FROM_DEVICE);
2016	if (ret)
2017		goto e_src;
2018
2019	op.soc = 1;
2020	op.src.u.dma.address = src.dma.address;
2021	op.src.u.dma.offset = 0;
2022	op.src.u.dma.length = src.length;
2023	op.dst.u.dma.address = dst.dma.address;
2024	op.dst.u.dma.offset = 0;
2025	op.dst.u.dma.length = dst.length;
2026
2027	op.u.ecc.function = cmd->u.ecc.function;
2028
2029	ret = ccp_perform_ecc(&op);
2030	if (ret) {
2031		cmd->engine_error = cmd_q->cmd_error;
2032		goto e_dst;
2033	}
2034
2035	ecc->ecc_result = le16_to_cpup(
2036		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2037	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2038		ret = -EIO;
2039		goto e_dst;
2040	}
2041
2042	/* Save the workarea address since it is updated as we walk through
2043	 * to copy the point math result
2044	 */
2045	save = dst.address;
2046
2047	/* Save the ECC result X and Y coordinates */
2048	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2049				CCP_ECC_MODULUS_BYTES);
2050	dst.address += CCP_ECC_OUTPUT_SIZE;
2051	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2052				CCP_ECC_MODULUS_BYTES);
2053	dst.address += CCP_ECC_OUTPUT_SIZE;
2054
2055	/* Restore the workarea address */
2056	dst.address = save;
2057
2058e_dst:
2059	ccp_dm_free(&dst);
2060
2061e_src:
2062	ccp_dm_free(&src);
2063
2064	return ret;
2065}
2066
2067static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2068{
2069	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2070
2071	ecc->ecc_result = 0;
2072
2073	if (!ecc->mod ||
2074	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2075		return -EINVAL;
2076
2077	switch (ecc->function) {
2078	case CCP_ECC_FUNCTION_MMUL_384BIT:
2079	case CCP_ECC_FUNCTION_MADD_384BIT:
2080	case CCP_ECC_FUNCTION_MINV_384BIT:
2081		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2082
2083	case CCP_ECC_FUNCTION_PADD_384BIT:
2084	case CCP_ECC_FUNCTION_PMUL_384BIT:
2085	case CCP_ECC_FUNCTION_PDBL_384BIT:
2086		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2087
2088	default:
2089		return -EINVAL;
2090	}
2091}
2092
2093int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2094{
2095	int ret;
2096
2097	cmd->engine_error = 0;
2098	cmd_q->cmd_error = 0;
2099	cmd_q->int_rcvd = 0;
2100	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2101
2102	switch (cmd->engine) {
2103	case CCP_ENGINE_AES:
2104		ret = ccp_run_aes_cmd(cmd_q, cmd);
2105		break;
2106	case CCP_ENGINE_XTS_AES_128:
2107		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2108		break;
2109	case CCP_ENGINE_SHA:
2110		ret = ccp_run_sha_cmd(cmd_q, cmd);
2111		break;
2112	case CCP_ENGINE_RSA:
2113		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2114		break;
2115	case CCP_ENGINE_PASSTHRU:
2116		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2117		break;
2118	case CCP_ENGINE_ECC:
2119		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2120		break;
2121	default:
2122		ret = -EINVAL;
2123	}
2124
2125	return ret;
2126}
2127