[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMj1kXF6RpaAsN2zUgkO0NW7gMwwhXMHEEM-wpQXxeNJbGJ79A@mail.gmail.com>
Date: Wed, 22 Sep 2021 12:10:41 +0200
From: Ard Biesheuvel <ardb@...nel.org>
To: Emmanuel Gil Peyrot <linkmauve@...kmauve.fr>
Cc: Linux Crypto Mailing List <linux-crypto@...r.kernel.org>,
Ash Logan <ash@...quark.com>,
Jonathan Neuschäfer <j.ne@...teo.net>,
Herbert Xu <herbert@...dor.apana.org.au>,
"David S. Miller" <davem@...emloft.net>,
Rob Herring <robh+dt@...nel.org>,
Michael Ellerman <mpe@...erman.id.au>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Paul Mackerras <paulus@...ba.org>,
"open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS"
<devicetree@...r.kernel.org>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
"open list:LINUX FOR POWERPC (32-BIT AND 64-BIT)"
<linuxppc-dev@...ts.ozlabs.org>
Subject: Re: [PATCH 1/4] crypto: nintendo-aes - add a new AES driver
On Tue, 21 Sept 2021 at 23:49, Emmanuel Gil Peyrot
<linkmauve@...kmauve.fr> wrote:
>
> This engine implements AES in CBC mode, using 128-bit keys only. It is
> present on both the Wii and the Wii U, and is apparently identical in
> both consoles.
>
> The hardware is capable of firing an interrupt when the operation is
> done, but this driver currently uses a busy loop, I’m not too sure
> whether it would be preferable to switch, nor how to achieve that.
>
> It also supports a mode where no operation is done, and thus could be
> used as a DMA copy engine, but I don’t know how to expose that to the
> kernel or whether it would even be useful.
>
> In my testing, on a Wii U, this driver reaches 80.7 MiB/s, while the
> aes-generic driver only reaches 30.9 MiB/s, so it is a quite welcome
> speedup.
>
> This driver was written based on reversed documentation, see:
> https://wiibrew.org/wiki/Hardware/AES
>
> Signed-off-by: Emmanuel Gil Peyrot <linkmauve@...kmauve.fr>
> Tested-by: Emmanuel Gil Peyrot <linkmauve@...kmauve.fr> # on Wii U
This is redundant - everybody should test the code they submit.
...
> + /* TODO: figure out how to use interrupts here, this will probably
> + * lower throughput but let the CPU do other things while the AES
> + * engine is doing its work. */
So is it worthwhile like this? How much faster is it to use this
accelerator rather than the CPU?
> + do {
> + status = ioread32be(base + AES_CTRL);
> + cpu_relax();
> + } while ((status & AES_CTRL_EXEC) && --counter);
> +
> + /* Do we ever get called with dst ≠ src? If so we have to invalidate
> + * dst in addition to the earlier flush of src. */
> + if (unlikely(dst != src)) {
> + for (i = 0; i < len; i += 32)
> + __asm__("dcbi 0, %0" : : "r" (dst + i));
> + __asm__("sync" : : : "memory");
> + }
> +
> + return counter ? 0 : 1;
> +}
> +
> +static void
> +nintendo_aes_crypt(const void *src, void *dst, u32 len, u8 *iv, int dir,
> + bool firstchunk)
> +{
> + u32 flags = 0;
> + unsigned long iflags;
> + int ret;
> +
> + flags |= AES_CTRL_EXEC_INIT /* | AES_CTRL_IRQ */ | AES_CTRL_ENA;
> +
> + if (dir == AES_DIR_DECRYPT)
> + flags |= AES_CTRL_DEC;
> +
> + if (!firstchunk)
> + flags |= AES_CTRL_IV;
> +
> + /* Start the critical section */
> + spin_lock_irqsave(&lock, iflags);
> +
> + if (firstchunk)
> + writefield(AES_IV, iv);
> +
> + ret = do_crypt(src, dst, len, flags);
> + BUG_ON(ret);
> +
> + spin_unlock_irqrestore(&lock, iflags);
> +}
> +
> +static int nintendo_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
> + unsigned int len)
> +{
> + /* The hardware only supports AES-128 */
> + if (len != AES_KEYSIZE_128)
> + return -EINVAL;
> +
> + writefield(AES_KEY, key);
> + return 0;
> +}
> +
> +static int nintendo_skcipher_crypt(struct skcipher_request *req, int dir)
> +{
> + struct skcipher_walk walk;
> + unsigned int nbytes;
> + int err;
> + char ivbuf[AES_BLOCK_SIZE];
> + unsigned int ivsize;
> +
> + bool firstchunk = true;
> +
> + /* Reset the engine */
> + iowrite32be(0, base + AES_CTRL);
> +
> + err = skcipher_walk_virt(&walk, req, false);
> + ivsize = min(sizeof(ivbuf), walk.ivsize);
> +
> + while ((nbytes = walk.nbytes) != 0) {
> + unsigned int chunkbytes = round_down(nbytes, AES_BLOCK_SIZE);
> + unsigned int ret = nbytes % AES_BLOCK_SIZE;
> +
> + if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> + /* If this is the last chunk and we're decrypting, take
> + * note of the IV (which is the last ciphertext block)
> + */
> + memcpy(ivbuf, walk.src.virt.addr + walk.total - ivsize,
> + ivsize);
> + }
> +
> + nintendo_aes_crypt(walk.src.virt.addr, walk.dst.virt.addr,
> + chunkbytes, walk.iv, dir, firstchunk);
> +
> + if (walk.total == chunkbytes && dir == AES_DIR_ENCRYPT) {
> + /* If this is the last chunk and we're encrypting, take
> + * note of the IV (which is the last ciphertext block)
> + */
> + memcpy(walk.iv,
> + walk.dst.virt.addr + walk.total - ivsize,
> + ivsize);
> + } else if (walk.total == chunkbytes && dir == AES_DIR_DECRYPT) {
> + memcpy(walk.iv, ivbuf, ivsize);
> + }
> +
> + err = skcipher_walk_done(&walk, ret);
> + firstchunk = false;
> + }
> +
> + return err;
> +}
> +
> +static int nintendo_cbc_encrypt(struct skcipher_request *req)
> +{
> + return nintendo_skcipher_crypt(req, AES_DIR_ENCRYPT);
> +}
> +
> +static int nintendo_cbc_decrypt(struct skcipher_request *req)
> +{
> + return nintendo_skcipher_crypt(req, AES_DIR_DECRYPT);
> +}
> +
> +static struct skcipher_alg nintendo_alg = {
> + .base.cra_name = "cbc(aes)",
> + .base.cra_driver_name = "cbc-aes-nintendo",
> + .base.cra_priority = 400,
> + .base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY,
> + .base.cra_blocksize = AES_BLOCK_SIZE,
> + .base.cra_alignmask = 15,
> + .base.cra_module = THIS_MODULE,
> + .setkey = nintendo_setkey_skcipher,
> + .encrypt = nintendo_cbc_encrypt,
> + .decrypt = nintendo_cbc_decrypt,
> + .min_keysize = AES_KEYSIZE_128,
> + .max_keysize = AES_KEYSIZE_128,
> + .ivsize = AES_BLOCK_SIZE,
> +};
> +
> +static int nintendo_aes_remove(struct platform_device *pdev)
> +{
> + struct device *dev = &pdev->dev;
> +
> + crypto_unregister_skcipher(&nintendo_alg);
> + devm_iounmap(dev, base);
> + base = NULL;
> +
> + return 0;
> +}
> +
> +static int nintendo_aes_probe(struct platform_device *pdev)
> +{
> + struct device *dev = &pdev->dev;
> + struct resource *res;
> + int ret;
> +
> + res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> + base = devm_ioremap_resource(dev, res);
> + if (IS_ERR(base))
> + return PTR_ERR(base);
> +
> + spin_lock_init(&lock);
> +
> + ret = crypto_register_skcipher(&nintendo_alg);
> + if (ret)
> + goto eiomap;
> +
> + dev_notice(dev, "Nintendo Wii and Wii U AES engine enabled\n");
> + return 0;
> +
> + eiomap:
> + devm_iounmap(dev, base);
> +
> + dev_err(dev, "Nintendo Wii and Wii U AES initialization failed\n");
> + return ret;
> +}
> +
> +static const struct of_device_id nintendo_aes_of_match[] = {
> + { .compatible = "nintendo,hollywood-aes", },
> + { .compatible = "nintendo,latte-aes", },
> + {/* sentinel */},
> +};
> +MODULE_DEVICE_TABLE(of, nintendo_aes_of_match);
> +
> +static struct platform_driver nintendo_aes_driver = {
> + .driver = {
> + .name = "nintendo-aes",
> + .of_match_table = nintendo_aes_of_match,
> + },
> + .probe = nintendo_aes_probe,
> + .remove = nintendo_aes_remove,
> +};
> +
> +module_platform_driver(nintendo_aes_driver);
> +
> +MODULE_AUTHOR("Emmanuel Gil Peyrot <linkmauve@...kmauve.fr>");
> +MODULE_DESCRIPTION("Nintendo Wii and Wii U Hardware AES driver");
> +MODULE_LICENSE("GPL");
> --
> 2.33.0
>
Powered by blists - more mailing lists