@@ -20,7 +20,6 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
{
volatile unsigned int *a;
- mb();
a = __ldcw_align(x);
while (__ldcw(a) == 0)
while (*a == 0)
@@ -30,17 +29,16 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *x,
local_irq_disable();
} else
cpu_relax();
- mb();
}
#define arch_spin_lock_flags arch_spin_lock_flags
static inline void arch_spin_unlock(arch_spinlock_t *x)
{
volatile unsigned int *a;
- mb();
+
a = __ldcw_align(x);
- *a = 1;
mb();
+ *a = 1;
}
static inline int arch_spin_trylock(arch_spinlock_t *x)
@@ -48,10 +46,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *x)
volatile unsigned int *a;
int ret;
- mb();
a = __ldcw_align(x);
ret = __ldcw(a) != 0;
- mb();
return ret;
}
Now that mb() is an instruction barrier, it will slow performance if we issue unnecessary barriers. The spinlock defines have a number of unnecessary barriers. The __ldcw() define is both a hardware and compiler barrier. The mb() barriers in the routines using __ldcw() serve no purpose. The only barrier needed is the one in arch_spin_unlock(). We need to ensure all accesses are complete prior to releasing the lock. Signed-off-by: John David Anglin <dave.anglin@bell.net>