lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Tue, 13 May 2014 14:17:35 +0200
From:	Greg KH <gregkh@...uxfoundation.org>
To:	linux-kernel@...r.kernel.org,
	Andrew Morton <akpm@...ux-foundation.org>,
	torvalds@...ux-foundation.org, stable@...r.kernel.org
Cc:	lwn@....net, Jiri Slaby <jslaby@...e.cz>
Subject: Re: Linux 3.4.90

diff --git a/Makefile b/Makefile
index 2c2ec2cedd52..aa1001213eb1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 4
-SUBLEVEL = 89
+SUBLEVEL = 90
 EXTRAVERSION =
 NAME = Saber-toothed Squirrel
 
diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S
index f8a751c03282..5bf34ec89669 100644
--- a/arch/mips/power/hibernate.S
+++ b/arch/mips/power/hibernate.S
@@ -44,6 +44,7 @@ LEAF(swsusp_arch_resume)
 	bne t1, t3, 1b
 	PTR_L t0, PBE_NEXT(t0)
 	bnez t0, 0b
+	jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */
 	PTR_LA t0, saved_regs
 	PTR_L ra, PT_R31(t0)
 	PTR_L sp, PT_R29(t0)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 1eb7f90cb7b9..eb4d2a254b35 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -24,10 +24,6 @@
 .align 16
 .Lbswap_mask:
 	.octa 0x000102030405060708090a0b0c0d0e0f
-.Lpoly:
-	.octa 0xc2000000000000000000000000000001
-.Ltwo_one:
-	.octa 0x00000001000000000000000000000001
 
 #define DATA	%xmm0
 #define SHASH	%xmm1
@@ -131,27 +127,3 @@ ENTRY(clmul_ghash_update)
 	movups DATA, (%rdi)
 .Lupdate_just_ret:
 	ret
-
-/*
- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
- *
- * Calculate hash_key << 1 mod poly
- */
-ENTRY(clmul_ghash_setkey)
-	movaps .Lbswap_mask, BSWAP
-	movups (%rsi), %xmm0
-	PSHUFB_XMM BSWAP %xmm0
-	movaps %xmm0, %xmm1
-	psllq $1, %xmm0
-	psrlq $63, %xmm1
-	movaps %xmm1, %xmm2
-	pslldq $8, %xmm1
-	psrldq $8, %xmm2
-	por %xmm1, %xmm0
-	# reduction
-	pshufd $0b00100100, %xmm2, %xmm1
-	pcmpeqd .Ltwo_one, %xmm1
-	pand .Lpoly, %xmm1
-	pxor %xmm1, %xmm0
-	movups %xmm0, (%rdi)
-	ret
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index b4bf0a63b520..c07446d17463 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash);
 void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
 			const be128 *shash);
 
-void clmul_ghash_setkey(be128 *shash, const u8 *key);
-
 struct ghash_async_ctx {
 	struct cryptd_ahash *cryptd_tfm;
 };
@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm,
 			const u8 *key, unsigned int keylen)
 {
 	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+	be128 *x = (be128 *)key;
+	u64 a, b;
 
 	if (keylen != GHASH_BLOCK_SIZE) {
 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
-	clmul_ghash_setkey(&ctx->shash, key);
+	/* perform multiplication by 'x' in GF(2^128) */
+	a = be64_to_cpu(x->a);
+	b = be64_to_cpu(x->b);
+
+	ctx->shash.a = (__be64)((b << 1) | (a >> 63));
+	ctx->shash.b = (__be64)((a << 1) | (b >> 63));
+
+	if (a >> 63)
+		ctx->shash.b ^= cpu_to_be64(0xc2);
 
 	return 0;
 }
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index f2f37171e21a..6e67fdebdada 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4700,21 +4700,26 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
 {
 	struct ata_queued_cmd *qc = NULL;
-	unsigned int i;
+	unsigned int i, tag;
 
 	/* no command while frozen */
 	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
 		return NULL;
 
-	/* the last tag is reserved for internal command. */
-	for (i = 0; i < ATA_MAX_QUEUE - 1; i++)
-		if (!test_and_set_bit(i, &ap->qc_allocated)) {
-			qc = __ata_qc_from_tag(ap, i);
+	for (i = 0; i < ATA_MAX_QUEUE; i++) {
+		tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE;
+
+		/* the last tag is reserved for internal command. */
+		if (tag == ATA_TAG_INTERNAL)
+			continue;
+
+		if (!test_and_set_bit(tag, &ap->qc_allocated)) {
+			qc = __ata_qc_from_tag(ap, tag);
+			qc->tag = tag;
+			ap->last_tag = tag;
 			break;
 		}
-
-	if (qc)
-		qc->tag = i;
+	}
 
 	return qc;
 }
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index c82f06e639f8..2cac6e64b67d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3058,7 +3058,10 @@ static int raw_cmd_copyout(int cmd, void __user *param,
 	int ret;
 
 	while (ptr) {
-		ret = copy_to_user(param, ptr, sizeof(*ptr));
+		struct floppy_raw_cmd cmd = *ptr;
+		cmd.next = NULL;
+		cmd.kernel_data = NULL;
+		ret = copy_to_user(param, &cmd, sizeof(cmd));
 		if (ret)
 			return -EFAULT;
 		param += sizeof(struct floppy_raw_cmd);
@@ -3112,10 +3115,11 @@ loop:
 		return -ENOMEM;
 	*rcmd = ptr;
 	ret = copy_from_user(ptr, param, sizeof(*ptr));
-	if (ret)
-		return -EFAULT;
 	ptr->next = NULL;
 	ptr->buffer_length = 0;
+	ptr->kernel_data = NULL;
+	if (ret)
+		return -EFAULT;
 	param += sizeof(struct floppy_raw_cmd);
 	if (ptr->cmd_count > 33)
 			/* the command may now also take up the space
@@ -3131,7 +3135,6 @@ loop:
 	for (i = 0; i < 16; i++)
 		ptr->reply[i] = 0;
 	ptr->resultcode = 0;
-	ptr->kernel_data = NULL;
 
 	if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) {
 		if (ptr->length <= 0)
diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c
index 385c58e8405b..0f8114de0877 100644
--- a/drivers/gpio/gpio-mxs.c
+++ b/drivers/gpio/gpio-mxs.c
@@ -167,7 +167,8 @@ static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port)
 	ct->regs.ack = PINCTRL_IRQSTAT(port->id) + MXS_CLR;
 	ct->regs.mask = PINCTRL_IRQEN(port->id);
 
-	irq_setup_generic_chip(gc, IRQ_MSK(32), 0, IRQ_NOREQUEST, 0);
+	irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK,
+			       IRQ_NOREQUEST, 0);
 }
 
 static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 342ffb7ec3d2..a83f7acdbe03 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -579,6 +579,14 @@ static const struct dmi_system_id intel_no_crt[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "ZGB"),
 		},
 	},
+	{
+		.callback = intel_no_crt_dmi_callback,
+		.ident = "DELL XPS 8700",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "XPS 8700"),
+		},
+	},
 	{ }
 };
 
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c540dfff1f81..b253744fc3c8 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1446,9 +1446,9 @@ static void process_deferred_bios(struct pool *pool)
 		 */
 		if (ensure_next_mapping(pool)) {
 			spin_lock_irqsave(&pool->lock, flags);
+			bio_list_add(&pool->deferred_bios, bio);
 			bio_list_merge(&pool->deferred_bios, &bios);
 			spin_unlock_irqrestore(&pool->lock, flags);
-
 			break;
 		}
 
diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c
index 8febe46e1105..9f55d40ec69e 100644
--- a/drivers/mtd/nand/nuc900_nand.c
+++ b/drivers/mtd/nand/nuc900_nand.c
@@ -250,7 +250,7 @@ static void nuc900_nand_enable(struct nuc900_nand *nand)
 	val = __raw_readl(nand->reg + REG_FMICSR);
 
 	if (!(val & NAND_EN))
-		__raw_writel(val | NAND_EN, REG_FMICSR);
+		__raw_writel(val | NAND_EN, nand->reg + REG_FMICSR);
 
 	val = __raw_readl(nand->reg + REG_SMCSR);
 
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 9e2dfd517aa5..539835fabe61 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -59,15 +59,12 @@ struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl)
 	struct attribute_group *attr_group;
 	struct attribute **attributes;
 	struct sm_sysfs_attribute *vendor_attribute;
+	char *vendor;
 
-	int vendor_len = strnlen(ftl->cis_buffer + SM_CIS_VENDOR_OFFSET,
-					SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET);
-
-	char *vendor = kmalloc(vendor_len, GFP_KERNEL);
+	vendor = kstrndup(ftl->cis_buffer + SM_CIS_VENDOR_OFFSET,
+			  SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET, GFP_KERNEL);
 	if (!vendor)
 		goto error1;
-	memcpy(vendor, ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, vendor_len);
-	vendor[vendor_len] = 0;
 
 	/* Initialize sysfs attributes */
 	vendor_attribute =
@@ -78,7 +75,7 @@ struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl)
 	sysfs_attr_init(&vendor_attribute->dev_attr.attr);
 
 	vendor_attribute->data = vendor;
-	vendor_attribute->len = vendor_len;
+	vendor_attribute->len = strlen(vendor);
 	vendor_attribute->dev_attr.attr.name = "vendor";
 	vendor_attribute->dev_attr.attr.mode = S_IRUGO;
 	vendor_attribute->dev_attr.show = sm_attr_show;
diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c
index 4ce3e1f036cc..547964dff355 100644
--- a/drivers/net/wireless/b43/phy_n.c
+++ b/drivers/net/wireless/b43/phy_n.c
@@ -4599,22 +4599,22 @@ static void b43_nphy_channel_setup(struct b43_wldev *dev,
 	int ch = new_channel->hw_value;
 
 	u16 old_band_5ghz;
-	u32 tmp32;
+	u16 tmp16;
 
 	old_band_5ghz =
 		b43_phy_read(dev, B43_NPHY_BANDCTL) & B43_NPHY_BANDCTL_5GHZ;
 	if (new_channel->band == IEEE80211_BAND_5GHZ && !old_band_5ghz) {
-		tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
-		b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
+		tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
+		b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
 		b43_phy_set(dev, B43_PHY_B_BBCFG, 0xC000);
-		b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
+		b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
 		b43_phy_set(dev, B43_NPHY_BANDCTL, B43_NPHY_BANDCTL_5GHZ);
 	} else if (new_channel->band == IEEE80211_BAND_2GHZ && old_band_5ghz) {
 		b43_phy_mask(dev, B43_NPHY_BANDCTL, ~B43_NPHY_BANDCTL_5GHZ);
-		tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR);
-		b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4);
+		tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR);
+		b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4);
 		b43_phy_mask(dev, B43_PHY_B_BBCFG, 0x3FFF);
-		b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32);
+		b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16);
 	}
 
 	b43_chantab_phy_upload(dev, e);
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
index 91d2e28db4d7..a4387acbf220 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
@@ -985,6 +985,17 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw)
 	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
 	int err = 0;
 	static bool iqk_initialized;
+	unsigned long flags;
+
+	/* As this function can take a very long time (up to 350 ms)
+	 * and can be called with irqs disabled, reenable the irqs
+	 * to let the other devices continue being serviced.
+	 *
+	 * It is safe doing so since our own interrupts will only be enabled
+	 * in a subsequent step.
+	 */
+	local_save_flags(flags);
+	local_irq_enable();
 
 	rtlhal->hw_type = HARDWARE_TYPE_RTL8192CU;
 	err = _rtl92cu_init_mac(hw);
@@ -997,7 +1008,7 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw)
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
 			 "Failed to download FW. Init HW without FW now..\n");
 		err = 1;
-		return err;
+		goto exit;
 	}
 	rtlhal->last_hmeboxnum = 0; /* h2c */
 	_rtl92cu_phy_param_tab_init(hw);
@@ -1034,6 +1045,8 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw)
 	_InitPABias(hw);
 	_update_mac_setting(hw);
 	rtl92c_dm_init(hw);
+exit:
+	local_irq_restore(flags);
 	return err;
 }
 
diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c
index b141c35bf926..f90eb0cd7ae5 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c
@@ -922,7 +922,7 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	u8 tmp_byte = 0;
-
+	unsigned long flags;
 	bool rtstatus = true;
 	u8 tmp_u1b;
 	int err = false;
@@ -934,6 +934,16 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
 
 	rtlpci->being_init_adapter = true;
 
+	/* As this function can take a very long time (up to 350 ms)
+	 * and can be called with irqs disabled, reenable the irqs
+	 * to let the other devices continue being serviced.
+	 *
+	 * It is safe doing so since our own interrupts will only be enabled
+	 * in a subsequent step.
+	 */
+	local_save_flags(flags);
+	local_irq_enable();
+
 	rtlpriv->intf_ops->disable_aspm(hw);
 
 	/* 1. MAC Initialize */
@@ -951,7 +961,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
 			 "Failed to download FW. Init HW without FW now... "
 			 "Please copy FW into /lib/firmware/rtlwifi\n");
-		return 1;
+		err = 1;
+		goto exit;
 	}
 
 	/* After FW download, we have to reset MAC register */
@@ -964,7 +975,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
 	/* 3. Initialize MAC/PHY Config by MACPHY_reg.txt */
 	if (!rtl92s_phy_mac_config(hw)) {
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "MAC Config failed\n");
-		return rtstatus;
+		err = rtstatus;
+		goto exit;
 	}
 
 	/* Make sure BB/RF write OK. We should prevent enter IPS. radio off. */
@@ -974,7 +986,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
 	/* 4. Initialize BB After MAC Config PHY_reg.txt, AGC_Tab.txt */
 	if (!rtl92s_phy_bb_config(hw)) {
 		RT_TRACE(rtlpriv, COMP_INIT, DBG_EMERG, "BB Config failed\n");
-		return rtstatus;
+		err = rtstatus;
+		goto exit;
 	}
 
 	/* 5. Initiailze RF RAIO_A.txt RF RAIO_B.txt */
@@ -1010,7 +1023,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
 
 	if (!rtl92s_phy_rf_config(hw)) {
 		RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "RF Config failed\n");
-		return rtstatus;
+		err = rtstatus;
+		goto exit;
 	}
 
 	/* After read predefined TXT, we must set BB/MAC/RF
@@ -1084,8 +1098,9 @@ int rtl92se_hw_init(struct ieee80211_hw *hw)
 
 	rtlpriv->cfg->ops->led_control(hw, LED_CTL_POWER_ON);
 	rtl92s_dm_init(hw);
+exit:
+	local_irq_restore(flags);
 	rtlpci->being_init_adapter = false;
-
 	return err;
 }
 
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 35a05d1df9cf..9b2fb60d49c4 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -8086,7 +8086,6 @@ _scsih_suspend(struct pci_dev *pdev, pm_message_t state)
 
 	mpt2sas_base_free_resources(ioc);
 	pci_save_state(pdev);
-	pci_disable_device(pdev);
 	pci_set_power_state(pdev, device_state);
 	return 0;
 }
diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 1fbffaa9958e..1da617f1c588 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -190,7 +190,7 @@ static struct tty_driver *hvc_console_device(struct console *c, int *index)
 	return hvc_driver;
 }
 
-static int __init hvc_console_setup(struct console *co, char *options)
+static int hvc_console_setup(struct console *co, char *options)
 {	
 	if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES)
 		return -ENODEV;
diff --git a/drivers/video/aty/mach64_accel.c b/drivers/video/aty/mach64_accel.c
index e45833ce975b..182bd680141f 100644
--- a/drivers/video/aty/mach64_accel.c
+++ b/drivers/video/aty/mach64_accel.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/delay.h>
+#include <asm/unaligned.h>
 #include <linux/fb.h>
 #include <video/mach64.h>
 #include "atyfb.h"
@@ -419,7 +420,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image)
 		u32 *pbitmap, dwords = (src_bytes + 3) / 4;
 		for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) {
 			wait_for_fifo(1, par);
-			aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par);
+			aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par);
 		}
 	}
 
diff --git a/drivers/video/aty/mach64_cursor.c b/drivers/video/aty/mach64_cursor.c
index 46f72ed53510..4b87318dcf44 100644
--- a/drivers/video/aty/mach64_cursor.c
+++ b/drivers/video/aty/mach64_cursor.c
@@ -5,6 +5,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include "../fb_draw.h"
 
 #include <asm/io.h>
 
@@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 
 	    for (i = 0; i < height; i++) {
 		for (j = 0; j < width; j++) {
+			u16 l = 0xaaaa;
 			b = *src++;
 			m = *msk++;
 			switch (cursor->rop) {
 			case ROP_XOR:
 			    // Upper 4 bits of mask data
-			    fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++);
+			    l = cursor_bits_lookup[(b ^ m) >> 4] |
 			    // Lower 4 bits of mask
-			    fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f],
-				      dst++);
+				    (cursor_bits_lookup[(b ^ m) & 0x0f] << 8);
 			    break;
 			case ROP_COPY:
 			    // Upper 4 bits of mask data
-			    fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++);
+			    l = cursor_bits_lookup[(b & m) >> 4] |
 			    // Lower 4 bits of mask
-			    fb_writeb(cursor_bits_lookup[(b & m) & 0x0f],
-				      dst++);
+				    (cursor_bits_lookup[(b & m) & 0x0f] << 8);
 			    break;
 			}
+			/*
+			 * If cursor size is not a multiple of 8 characters
+			 * we must pad it with transparent pattern (0xaaaa).
+			 */
+			if ((j + 1) * 8 > cursor->image.width) {
+				l = comp(l, 0xaaaa,
+				    (1 << ((cursor->image.width & 7) * 2)) - 1);
+			}
+			fb_writeb(l & 0xff, dst++);
+			fb_writeb(l >> 8, dst++);
 		}
 		dst += offset;
 	    }
diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
index bb5a96b1645d..bcb57235fcc7 100644
--- a/drivers/video/cfbcopyarea.c
+++ b/drivers/video/cfbcopyarea.c
@@ -43,13 +43,22 @@
      */
 
 static void
-bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
-		const unsigned long __iomem *src, int src_idx, int bits,
+bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
+		const unsigned long __iomem *src, unsigned src_idx, int bits,
 		unsigned n, u32 bswapmask)
 {
 	unsigned long first, last;
 	int const shift = dst_idx-src_idx;
-	int left, right;
+
+#if 0
+	/*
+	 * If you suspect bug in this function, compare it with this simple
+	 * memmove implementation.
+	 */
+	fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
+		   (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
+	return;
+#endif
 
 	first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask);
 	last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask);
@@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 		unsigned long d0, d1;
 		int m;
 
-		right = shift & (bits - 1);
-		left = -shift & (bits - 1);
-		bswapmask &= shift;
+		int const left = shift & (bits - 1);
+		int const right = -shift & (bits - 1);
 
 		if (dst_idx+n <= bits) {
 			// Single destination word
@@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
 			if (shift > 0) {
 				// Single source word
-				d0 >>= right;
+				d0 <<= left;
 			} else if (src_idx+n <= bits) {
 				// Single source word
-				d0 <<= left;
+				d0 >>= right;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src + 1);
 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
-				d0 = d0<<left | d1>>right;
+				d0 = d0 >> right | d1 << left;
 			}
 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
 			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
@@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 			if (shift > 0) {
 				// Single source word
 				d1 = d0;
-				d0 >>= right;
-				dst++;
+				d0 <<= left;
 				n -= bits - dst_idx;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src++);
 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
 
-				d0 = d0<<left | d1>>right;
-				dst++;
+				d0 = d0 >> right | d1 << left;
 				n -= bits - dst_idx;
 			}
 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
 			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
 			d0 = d1;
+			dst++;
 
 			// Main chunk
 			m = n % bits;
 			n /= bits;
 			while ((n >= 4) && !bswapmask) {
 				d1 = FB_READL(src++);
-				FB_WRITEL(d0 << left | d1 >> right, dst++);
+				FB_WRITEL(d0 >> right | d1 << left, dst++);
 				d0 = d1;
 				d1 = FB_READL(src++);
-				FB_WRITEL(d0 << left | d1 >> right, dst++);
+				FB_WRITEL(d0 >> right | d1 << left, dst++);
 				d0 = d1;
 				d1 = FB_READL(src++);
-				FB_WRITEL(d0 << left | d1 >> right, dst++);
+				FB_WRITEL(d0 >> right | d1 << left, dst++);
 				d0 = d1;
 				d1 = FB_READL(src++);
-				FB_WRITEL(d0 << left | d1 >> right, dst++);
+				FB_WRITEL(d0 >> right | d1 << left, dst++);
 				d0 = d1;
 				n -= 4;
 			}
 			while (n--) {
 				d1 = FB_READL(src++);
 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
-				d0 = d0 << left | d1 >> right;
+				d0 = d0 >> right | d1 << left;
 				d0 = fb_rev_pixels_in_long(d0, bswapmask);
 				FB_WRITEL(d0, dst++);
 				d0 = d1;
 			}
 
 			// Trailing bits
-			if (last) {
-				if (m <= right) {
+			if (m) {
+				if (m <= bits - right) {
 					// Single source word
-					d0 <<= left;
+					d0 >>= right;
 				} else {
 					// 2 source words
 					d1 = FB_READL(src);
 					d1 = fb_rev_pixels_in_long(d1,
 								bswapmask);
-					d0 = d0<<left | d1>>right;
+					d0 = d0 >> right | d1 << left;
 				}
 				d0 = fb_rev_pixels_in_long(d0, bswapmask);
 				FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
@@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
      */
 
 static void
-bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
-		const unsigned long __iomem *src, int src_idx, int bits,
+bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx,
+		const unsigned long __iomem *src, unsigned src_idx, int bits,
 		unsigned n, u32 bswapmask)
 {
 	unsigned long first, last;
 	int shift;
 
-	dst += (n-1)/bits;
-	src += (n-1)/bits;
-	if ((n-1) % bits) {
-		dst_idx += (n-1) % bits;
-		dst += dst_idx >> (ffs(bits) - 1);
-		dst_idx &= bits - 1;
-		src_idx += (n-1) % bits;
-		src += src_idx >> (ffs(bits) - 1);
-		src_idx &= bits - 1;
-	}
+#if 0
+	/*
+	 * If you suspect bug in this function, compare it with this simple
+	 * memmove implementation.
+	 */
+	fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8,
+		   (char *)src + ((src_idx & (bits - 1))) / 8, n / 8);
+	return;
+#endif
+
+	dst += (dst_idx + n - 1) / bits;
+	src += (src_idx + n - 1) / bits;
+	dst_idx = (dst_idx + n - 1) % bits;
+	src_idx = (src_idx + n - 1) % bits;
 
 	shift = dst_idx-src_idx;
 
-	first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask);
-	last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits),
-					    bswapmask);
+	first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, bswapmask);
+	last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, bswapmask);
 
 	if (!shift) {
 		// Same alignment for source and dest
 
 		if ((unsigned long)dst_idx+1 >= n) {
 			// Single word
-			if (last)
-				first &= last;
-			FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
+			if (first)
+				last &= first;
+			FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
 		} else {
 			// Multiple destination words
 
 			// Leading bits
-			if (first != ~0UL) {
+			if (first) {
 				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
 				dst--;
 				src--;
@@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 				FB_WRITEL(FB_READL(src--), dst--);
 
 			// Trailing bits
-			if (last)
+			if (last != -1UL)
 				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
 		}
 	} else {
@@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 		unsigned long d0, d1;
 		int m;
 
-		int const left = -shift & (bits-1);
-		int const right = shift & (bits-1);
-		bswapmask &= shift;
+		int const left = shift & (bits-1);
+		int const right = -shift & (bits-1);
 
 		if ((unsigned long)dst_idx+1 >= n) {
 			// Single destination word
-			if (last)
-				first &= last;
+			if (first)
+				last &= first;
 			d0 = FB_READL(src);
 			if (shift < 0) {
 				// Single source word
-				d0 <<= left;
+				d0 >>= right;
 			} else if (1+(unsigned long)src_idx >= n) {
 				// Single source word
-				d0 >>= right;
+				d0 <<= left;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src - 1);
 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
-				d0 = d0>>right | d1<<left;
+				d0 = d0 << left | d1 >> right;
 			}
 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
-			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+			FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
 		} else {
 			// Multiple destination words
 			/** We must always remember the last value read, because in case
@@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 			if (shift < 0) {
 				// Single source word
 				d1 = d0;
-				d0 <<= left;
+				d0 >>= right;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src--);
 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
-				d0 = d0>>right | d1<<left;
+				d0 = d0 << left | d1 >> right;
 			}
 			d0 = fb_rev_pixels_in_long(d0, bswapmask);
 			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
@@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 			n /= bits;
 			while ((n >= 4) && !bswapmask) {
 				d1 = FB_READL(src--);
-				FB_WRITEL(d0 >> right | d1 << left, dst--);
+				FB_WRITEL(d0 << left | d1 >> right, dst--);
 				d0 = d1;
 				d1 = FB_READL(src--);
-				FB_WRITEL(d0 >> right | d1 << left, dst--);
+				FB_WRITEL(d0 << left | d1 >> right, dst--);
 				d0 = d1;
 				d1 = FB_READL(src--);
-				FB_WRITEL(d0 >> right | d1 << left, dst--);
+				FB_WRITEL(d0 << left | d1 >> right, dst--);
 				d0 = d1;
 				d1 = FB_READL(src--);
-				FB_WRITEL(d0 >> right | d1 << left, dst--);
+				FB_WRITEL(d0 << left | d1 >> right, dst--);
 				d0 = d1;
 				n -= 4;
 			}
 			while (n--) {
 				d1 = FB_READL(src--);
 				d1 = fb_rev_pixels_in_long(d1, bswapmask);
-				d0 = d0 >> right | d1 << left;
+				d0 = d0 << left | d1 >> right;
 				d0 = fb_rev_pixels_in_long(d0, bswapmask);
 				FB_WRITEL(d0, dst--);
 				d0 = d1;
 			}
 
 			// Trailing bits
-			if (last) {
-				if (m <= left) {
+			if (m) {
+				if (m <= bits - left) {
 					// Single source word
-					d0 >>= right;
+					d0 <<= left;
 				} else {
 					// 2 source words
 					d1 = FB_READL(src);
 					d1 = fb_rev_pixels_in_long(d1,
 								bswapmask);
-					d0 = d0>>right | d1<<left;
+					d0 = d0 << left | d1 >> right;
 				}
 				d0 = fb_rev_pixels_in_long(d0, bswapmask);
 				FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
@@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 	u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
 	u32 height = area->height, width = area->width;
 	unsigned long const bits_per_line = p->fix.line_length*8u;
-	unsigned long __iomem *dst = NULL, *src = NULL;
+	unsigned long __iomem *base = NULL;
 	int bits = BITS_PER_LONG, bytes = bits >> 3;
-	int dst_idx = 0, src_idx = 0, rev_copy = 0;
+	unsigned dst_idx = 0, src_idx = 0, rev_copy = 0;
 	u32 bswapmask = fb_compute_bswapmask(p);
 
 	if (p->state != FBINFO_STATE_RUNNING)
@@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 
 	// split the base of the framebuffer into a long-aligned address and the
 	// index of the first bit
-	dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
+	base = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
 	dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1));
 	// add offset of source and target area
 	dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel;
@@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 		while (height--) {
 			dst_idx -= bits_per_line;
 			src_idx -= bits_per_line;
-			dst += dst_idx >> (ffs(bits) - 1);
-			dst_idx &= (bytes - 1);
-			src += src_idx >> (ffs(bits) - 1);
-			src_idx &= (bytes - 1);
-			bitcpy_rev(p, dst, dst_idx, src, src_idx, bits,
+			bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits,
+				base + (src_idx / bits), src_idx % bits, bits,
 				width*p->var.bits_per_pixel, bswapmask);
 		}
 	} else {
 		while (height--) {
-			dst += dst_idx >> (ffs(bits) - 1);
-			dst_idx &= (bytes - 1);
-			src += src_idx >> (ffs(bits) - 1);
-			src_idx &= (bytes - 1);
-			bitcpy(p, dst, dst_idx, src, src_idx, bits,
+			bitcpy(p, base + (dst_idx / bits), dst_idx % bits,
+				base + (src_idx / bits), src_idx % bits, bits,
 				width*p->var.bits_per_pixel, bswapmask);
 			dst_idx += bits_per_line;
 			src_idx += bits_per_line;
diff --git a/drivers/video/matrox/matroxfb_accel.c b/drivers/video/matrox/matroxfb_accel.c
index 8335a6fe303e..0d5cb85d071a 100644
--- a/drivers/video/matrox/matroxfb_accel.c
+++ b/drivers/video/matrox/matroxfb_accel.c
@@ -192,10 +192,18 @@ void matrox_cfbX_init(struct matrox_fb_info *minfo)
 	minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO;
 	if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC;
 	minfo->accel.m_opmode = mopmode;
+	minfo->accel.m_access = maccess;
+	minfo->accel.m_pitch = mpitch;
 }
 
 EXPORT_SYMBOL(matrox_cfbX_init);
 
+static void matrox_accel_restore_maccess(struct matrox_fb_info *minfo)
+{
+	mga_outl(M_MACCESS, minfo->accel.m_access);
+	mga_outl(M_PITCH, minfo->accel.m_pitch);
+}
+
 static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy,
 			       int sx, int dy, int dx, int height, int width)
 {
@@ -207,7 +215,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy,
 	CRITBEGIN
 
 	if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
-		mga_fifo(2);
+		mga_fifo(4);
+		matrox_accel_restore_maccess(minfo);
 		mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO |
 			 M_DWG_BFCOL | M_DWG_REPLACE);
 		mga_outl(M_AR5, vxres);
@@ -215,7 +224,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy,
 		start = sy*vxres+sx+curr_ydstorg(minfo);
 		end = start+width;
 	} else {
-		mga_fifo(3);
+		mga_fifo(5);
+		matrox_accel_restore_maccess(minfo);
 		mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE);
 		mga_outl(M_SGN, 5);
 		mga_outl(M_AR5, -vxres);
@@ -224,7 +234,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy,
 		start = end+width;
 		dy += height-1;
 	}
-	mga_fifo(4);
+	mga_fifo(6);
+	matrox_accel_restore_maccess(minfo);
 	mga_outl(M_AR0, end);
 	mga_outl(M_AR3, start);
 	mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
@@ -246,7 +257,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres,
 	CRITBEGIN
 
 	if ((dy < sy) || ((dy == sy) && (dx <= sx))) {
-		mga_fifo(2);
+		mga_fifo(4);
+		matrox_accel_restore_maccess(minfo);
 		mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO |
 			M_DWG_BFCOL | M_DWG_REPLACE);
 		mga_outl(M_AR5, vxres);
@@ -254,7 +266,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres,
 		start = sy*vxres+sx+curr_ydstorg(minfo);
 		end = start+width;
 	} else {
-		mga_fifo(3);
+		mga_fifo(5);
+		matrox_accel_restore_maccess(minfo);
 		mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE);
 		mga_outl(M_SGN, 5);
 		mga_outl(M_AR5, -vxres);
@@ -263,7 +276,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres,
 		start = end+width;
 		dy += height-1;
 	}
-	mga_fifo(5);
+	mga_fifo(7);
+	matrox_accel_restore_maccess(minfo);
 	mga_outl(M_AR0, end);
 	mga_outl(M_AR3, start);
 	mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx);
@@ -298,7 +312,8 @@ static void matroxfb_accel_clear(struct matrox_fb_info *minfo, u_int32_t color,
 
 	CRITBEGIN
 
-	mga_fifo(5);
+	mga_fifo(7);
+	matrox_accel_restore_maccess(minfo);
 	mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE);
 	mga_outl(M_FCOL, color);
 	mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
@@ -341,7 +356,8 @@ static void matroxfb_cfb4_clear(struct matrox_fb_info *minfo, u_int32_t bgx,
 	width >>= 1;
 	sx >>= 1;
 	if (width) {
-		mga_fifo(5);
+		mga_fifo(7);
+		matrox_accel_restore_maccess(minfo);
 		mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2);
 		mga_outl(M_FCOL, bgx);
 		mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx);
@@ -415,7 +431,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx,
 
 	CRITBEGIN
 
-	mga_fifo(3);
+	mga_fifo(5);
+	matrox_accel_restore_maccess(minfo);
 	if (easy)
 		mga_outl(M_DWGCTL, M_DWG_ILOAD | M_DWG_SGNZERO | M_DWG_SHIFTZERO | M_DWG_BMONOWF | M_DWG_LINEAR | M_DWG_REPLACE);
 	else
@@ -425,7 +442,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx,
 	fxbndry = ((xx + width - 1) << 16) | xx;
 	mmio = minfo->mmio.vbase;
 
-	mga_fifo(6);
+	mga_fifo(8);
+	matrox_accel_restore_maccess(minfo);
 	mga_writel(mmio, M_FXBNDRY, fxbndry);
 	mga_writel(mmio, M_AR0, ar0);
 	mga_writel(mmio, M_AR3, 0);
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index 11ed57bb704e..556d96ce40bf 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -307,6 +307,8 @@ struct matrox_accel_data {
 #endif
 	u_int32_t	m_dwg_rect;
 	u_int32_t	m_opmode;
+	u_int32_t	m_access;
+	u_int32_t	m_pitch;
 };
 
 struct v4l2_queryctrl;
diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c
index aba7686b1a32..ac2cf6dcc598 100644
--- a/drivers/video/tgafb.c
+++ b/drivers/video/tgafb.c
@@ -1146,222 +1146,57 @@ copyarea_line_32bpp(struct fb_info *info, u32 dy, u32 sy,
 	__raw_writel(TGA_MODE_SBM_24BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG);
 }
 
-/* The general case of forward copy in 8bpp mode.  */
+/* The (almost) general case of backward copy in 8bpp mode.  */
 static inline void
-copyarea_foreward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy,
-		       u32 height, u32 width, u32 line_length)
+copyarea_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy,
+	      u32 height, u32 width, u32 line_length,
+	      const struct fb_copyarea *area)
 {
 	struct tga_par *par = (struct tga_par *) info->par;
-	unsigned long i, copied, left;
-	unsigned long dpos, spos, dalign, salign, yincr;
-	u32 smask_first, dmask_first, dmask_last;
-	int pixel_shift, need_prime, need_second;
-	unsigned long n64, n32, xincr_first;
+	unsigned i, yincr;
+	int depos, sepos, backward, last_step, step;
+	u32 mask_last;
+	unsigned n32;
 	void __iomem *tga_regs;
 	void __iomem *tga_fb;
 
-	yincr = line_length;
-	if (dy > sy) {
-		dy += height - 1;
-		sy += height - 1;
-		yincr = -yincr;
-	}
-
-	/* Compute the offsets and alignments in the frame buffer.
-	   More than anything else, these control how we do copies.  */
-	dpos = dy * line_length + dx;
-	spos = sy * line_length + sx;
-	dalign = dpos & 7;
-	salign = spos & 7;
-	dpos &= -8;
-	spos &= -8;
-
-	/* Compute the value for the PIXELSHIFT register.  This controls
-	   both non-co-aligned source and destination and copy direction.  */
-	if (dalign >= salign)
-		pixel_shift = dalign - salign;
-	else
-		pixel_shift = 8 - (salign - dalign);
-
-	/* Figure out if we need an additional priming step for the
-	   residue register.  */
-	need_prime = (salign > dalign);
-	if (need_prime)
-		dpos -= 8;
-
-	/* Begin by copying the leading unaligned destination.  Copy enough
-	   to make the next destination address 32-byte aligned.  */
-	copied = 32 - (dalign + (dpos & 31));
-	if (copied == 32)
-		copied = 0;
-	xincr_first = (copied + 7) & -8;
-	smask_first = dmask_first = (1ul << copied) - 1;
-	smask_first <<= salign;
-	dmask_first <<= dalign + need_prime*8;
-	if (need_prime && copied > 24)
-		copied -= 8;
-	left = width - copied;
-
-	/* Care for small copies.  */
-	if (copied > width) {
-		u32 t;
-		t = (1ul << width) - 1;
-		t <<= dalign + need_prime*8;
-		dmask_first &= t;
-		left = 0;
-	}
-
-	/* Attempt to use 64-byte copies.  This is only possible if the
-	   source and destination are co-aligned at 64 bytes.  */
-	n64 = need_second = 0;
-	if ((dpos & 63) == (spos & 63)
-	    && (height == 1 || line_length % 64 == 0)) {
-		/* We may need a 32-byte copy to ensure 64 byte alignment.  */
-		need_second = (dpos + xincr_first) & 63;
-		if ((need_second & 32) != need_second)
-			printk(KERN_ERR "tgafb: need_second wrong\n");
-		if (left >= need_second + 64) {
-			left -= need_second;
-			n64 = left / 64;
-			left %= 64;
-		} else
-			need_second = 0;
-	}
-
-	/* Copy trailing full 32-byte sections.  This will be the main
-	   loop if the 64 byte loop can't be used.  */
-	n32 = left / 32;
-	left %= 32;
-
-	/* Copy the trailing unaligned destination.  */
-	dmask_last = (1ul << left) - 1;
-
-	tga_regs = par->tga_regs_base;
-	tga_fb = par->tga_fb_base;
-
-	/* Set up the MODE and PIXELSHIFT registers.  */
-	__raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_COPY, tga_regs+TGA_MODE_REG);
-	__raw_writel(pixel_shift, tga_regs+TGA_PIXELSHIFT_REG);
-	wmb();
-
-	for (i = 0; i < height; ++i) {
-		unsigned long j;
-		void __iomem *sfb;
-		void __iomem *dfb;
-
-		sfb = tga_fb + spos;
-		dfb = tga_fb + dpos;
-		if (dmask_first) {
-			__raw_writel(smask_first, sfb);
-			wmb();
-			__raw_writel(dmask_first, dfb);
-			wmb();
-			sfb += xincr_first;
-			dfb += xincr_first;
-		}
-
-		if (need_second) {
-			__raw_writel(0xffffffff, sfb);
-			wmb();
-			__raw_writel(0xffffffff, dfb);
-			wmb();
-			sfb += 32;
-			dfb += 32;
-		}
-
-		if (n64 && (((unsigned long)sfb | (unsigned long)dfb) & 63))
-			printk(KERN_ERR
-			       "tgafb: misaligned copy64 (s:%p, d:%p)\n",
-			       sfb, dfb);
-
-		for (j = 0; j < n64; ++j) {
-			__raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC);
-			wmb();
-			__raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST);
-			wmb();
-			sfb += 64;
-			dfb += 64;
-		}
-
-		for (j = 0; j < n32; ++j) {
-			__raw_writel(0xffffffff, sfb);
-			wmb();
-			__raw_writel(0xffffffff, dfb);
-			wmb();
-			sfb += 32;
-			dfb += 32;
-		}
-
-		if (dmask_last) {
-			__raw_writel(0xffffffff, sfb);
-			wmb();
-			__raw_writel(dmask_last, dfb);
-			wmb();
-		}
-
-		spos += yincr;
-		dpos += yincr;
+	/* Do acceleration only if we are aligned on 8 pixels */
+	if ((dx | sx | width) & 7) {
+		cfb_copyarea(info, area);
+		return;
 	}
 
-	/* Reset the MODE register to normal.  */
-	__raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG);
-}
-
-/* The (almost) general case of backward copy in 8bpp mode.  */
-static inline void
-copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy,
-		       u32 height, u32 width, u32 line_length,
-		       const struct fb_copyarea *area)
-{
-	struct tga_par *par = (struct tga_par *) info->par;
-	unsigned long i, left, yincr;
-	unsigned long depos, sepos, dealign, sealign;
-	u32 mask_first, mask_last;
-	unsigned long n32;
-	void __iomem *tga_regs;
-	void __iomem *tga_fb;
-
 	yincr = line_length;
 	if (dy > sy) {
 		dy += height - 1;
 		sy += height - 1;
 		yincr = -yincr;
 	}
+	backward = dy == sy && dx > sx && dx < sx + width;
 
 	/* Compute the offsets and alignments in the frame buffer.
 	   More than anything else, these control how we do copies.  */
-	depos = dy * line_length + dx + width;
-	sepos = sy * line_length + sx + width;
-	dealign = depos & 7;
-	sealign = sepos & 7;
-
-	/* ??? The documentation appears to be incorrect (or very
-	   misleading) wrt how pixel shifting works in backward copy
-	   mode, i.e. when PIXELSHIFT is negative.  I give up for now.
-	   Do handle the common case of co-aligned backward copies,
-	   but frob everything else back on generic code.  */
-	if (dealign != sealign) {
-		cfb_copyarea(info, area);
-		return;
-	}
-
-	/* We begin the copy with the trailing pixels of the
-	   unaligned destination.  */
-	mask_first = (1ul << dealign) - 1;
-	left = width - dealign;
-
-	/* Care for small copies.  */
-	if (dealign > width) {
-		mask_first ^= (1ul << (dealign - width)) - 1;
-		left = 0;
-	}
+	depos = dy * line_length + dx;
+	sepos = sy * line_length + sx;
+	if (backward)
+		depos += width, sepos += width;
 
 	/* Next copy full words at a time.  */
-	n32 = left / 32;
-	left %= 32;
+	n32 = width / 32;
+	last_step = width % 32;
 
 	/* Finally copy the unaligned head of the span.  */
-	mask_last = -1 << (32 - left);
+	mask_last = (1ul << last_step) - 1;
+
+	if (!backward) {
+		step = 32;
+		last_step = 32;
+	} else {
+		step = -32;
+		last_step = -last_step;
+		sepos -= 32;
+		depos -= 32;
+	}
 
 	tga_regs = par->tga_regs_base;
 	tga_fb = par->tga_fb_base;
@@ -1378,25 +1213,33 @@ copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy,
 
 		sfb = tga_fb + sepos;
 		dfb = tga_fb + depos;
-		if (mask_first) {
-			__raw_writel(mask_first, sfb);
-			wmb();
-			__raw_writel(mask_first, dfb);
-			wmb();
-		}
 
-		for (j = 0; j < n32; ++j) {
-			sfb -= 32;
-			dfb -= 32;
+		for (j = 0; j < n32; j++) {
+			if (j < 2 && j + 1 < n32 && !backward &&
+			    !(((unsigned long)sfb | (unsigned long)dfb) & 63)) {
+				do {
+					__raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC);
+					wmb();
+					__raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST);
+					wmb();
+					sfb += 64;
+					dfb += 64;
+					j += 2;
+				} while (j + 1 < n32);
+				j--;
+				continue;
+			}
 			__raw_writel(0xffffffff, sfb);
 			wmb();
 			__raw_writel(0xffffffff, dfb);
 			wmb();
+			sfb += step;
+			dfb += step;
 		}
 
 		if (mask_last) {
-			sfb -= 32;
-			dfb -= 32;
+			sfb += last_step - step;
+			dfb += last_step - step;
 			__raw_writel(mask_last, sfb);
 			wmb();
 			__raw_writel(mask_last, dfb);
@@ -1457,14 +1300,9 @@ tgafb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
 	else if (bpp == 32)
 		cfb_copyarea(info, area);
 
-	/* Detect overlapping source and destination that requires
-	   a backward copy.  */
-	else if (dy == sy && dx > sx && dx < sx + width)
-		copyarea_backward_8bpp(info, dx, dy, sx, sy, height,
-				       width, line_length, area);
 	else
-		copyarea_foreward_8bpp(info, dx, dy, sx, sy, height,
-				       width, line_length);
+		copyarea_8bpp(info, dx, dy, sx, sy, height,
+			      width, line_length, area);
 }
 
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 8807fe501d20..436625457311 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -305,6 +305,12 @@ static int balloon(void *_vballoon)
 		else if (diff < 0)
 			leak_balloon(vb, -diff);
 		update_balloon_size(vb);
+
+		/*
+		 * For large balloon changes, we could spend a lot of time
+		 * and always have work to do.  Be nice if preempt disabled.
+		 */
+		cond_resched();
 	}
 	return 0;
 }
diff --git a/fs/locks.c b/fs/locks.c
index fcc50ab71cc6..d4f1d89d9bc6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1253,11 +1253,10 @@ int __break_lease(struct inode *inode, unsigned int mode)
 
 restart:
 	break_time = flock->fl_break_time;
-	if (break_time != 0) {
+	if (break_time != 0)
 		break_time -= jiffies;
-		if (break_time == 0)
-			break_time++;
-	}
+	if (break_time == 0)
+		break_time++;
 	locks_insert_block(flock, new_fl);
 	unlock_flocks();
 	error = wait_event_interruptible_timeout(new_fl->fl_wait,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7e13eb428cb2..50d7cb1ee947 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -762,6 +762,7 @@ struct ata_port {
 	unsigned long		qc_allocated;
 	unsigned int		qc_active;
 	int			nr_active_links; /* #links with active qcs */
+	unsigned int		last_tag;	/* track next tag hw expects */
 
 	struct ata_link		link;		/* host default link */
 	struct ata_link		*slave_link;	/* see ata_slave_link_init() */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ