/* * bakery_lock.c - Lamport's bakery algorithm * * Copyright (C) 2015 ARM Limited. All rights reserved. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE.txt file. * * * Simplest implementation of Lamport's bakery lock [1]. Applies only to device * memory with attributes non-gathering and non-reordering. * * This algorithm's strength resides in the fact that it doesn't rely on * hardware synchronisation mechanisms and as such, doesn't require normal * cacheable memory on ARMv8. CPUs write only to their own memory locations, * and read from all other CPUs' ones, in order to decide whose turn it is to * have the lock. * * The algorithm correctness is based on the following assumptions: * * 1) Accesses to choosing[k] (here tickets[k].choosing) are done atomically. * In other words, simultaneous read and write to choosing[k] do not occur. * In this implementation, it is guaranteed by single-copy atomicity, for * accesses of type Device with non-gathering attributes. The algorithm * doesn't require accesses to number[k] to be atomic, even though this * implementation guarantees that as well. * * 2) Storage of number[k] allows it to become large enough for practical use of * the lock. Indeed, if the lock is contended all of the time, the value of * max(number[1..N]) will keep increasing, and this algorithm doesn't handle * wrapping of the ticket number. In this implementation, we assume that we * will never reach 32766 (0x7fff) overlapping calls to bakery_lock. * * [1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming Problem" */ #include #include /* * Return the result of (number_a, cpu_a) < (number_b, cpu_b) */ static unsigned int less_than(unsigned long cpu_a, unsigned long number_a, unsigned long cpu_b, unsigned long number_b) { if (number_a == number_b) return cpu_a < cpu_b; return number_a < number_b; } static unsigned int choose_number(bakery_ticket_t *tickets, unsigned self) { int cpu; unsigned int max_number = 0; bakery_ticket_t ticket; for (cpu = 0; cpu < NR_CPUS; cpu++) { if (cpu == self) continue; ticket = read_ticket_once(tickets[cpu]); if (max_number < ticket.number) max_number = ticket.number; } return 1 + max_number; } /** * Wait for our turn to enter a critical section * * @tickets: array of size NR_CPUS, indexed by logical IDs. * @self: logical ID of the current CPU * * Note: since this implementation assumes that all loads and stores to tickets * are of Device type with non-gathering and non-reordering attributes, we * expect all of them to be performed, in program order. As a result, the * following function is pretty relaxed in terms of barriers: we only * synchronize before sev(), and introduce system-wide memory barriers around * the critical section. */ void bakery_lock(bakery_ticket_t *tickets, unsigned self) { int cpu, number_self; bakery_ticket_t ticket; /* Doorway */ write_ticket_once(tickets[self], 1, 0); number_self = choose_number(tickets, self); write_ticket_once(tickets[self], 0, number_self); dsb(st); sev(); /* Bakery */ for (cpu = 0; cpu < NR_CPUS; cpu++) { uint16_t number_cpu; if (cpu == self) continue; ticket = read_ticket_once(tickets[cpu]); while (ticket.choosing) { wfe(); ticket = read_ticket_once(tickets[cpu]); } number_cpu = ticket.number; /* * Wait until that CPU updates its ticket. We only need to do * the comparison once, since any update to tickets[cpu].number * will be to a value greater than ours, or zero. */ if (number_cpu != 0 && less_than(cpu, number_cpu, self, number_self)) { do { wfe(); ticket = read_ticket_once(tickets[cpu]); } while (number_cpu == ticket.number); } } dmb(sy); } void bakery_unlock(bakery_ticket_t *tickets, unsigned self) { dmb(sy); write_ticket_once(tickets[self], 0, 0); dsb(st); sev(); }