@@ -260,13 +260,20 @@ struct swap_cluster_info {
#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */
+/*
+ * The first page in the swap file is the swap header, which is always marked
+ * bad to prevent it from being allocated as an entry. This also prevents the
+ * cluster to which it belongs being marked free. Therefore 0 is safe to use as
+ * a sentinel to indicate next is not valid in percpu_cluster.
+ */
+#define SWAP_NEXT_INVALID 0
+
/*
* We assign a cluster to each CPU, so each CPU can allocate swap entry from
* its own cluster and swapout sequentially. The purpose is to optimize swapout
* throughput.
*/
struct percpu_cluster {
- struct swap_cluster_info index; /* Current cluster index */
unsigned int next; /* Likely next allocation offset */
};
@@ -609,7 +609,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
return false;
percpu_cluster = this_cpu_ptr(si->percpu_cluster);
- cluster_set_null(&percpu_cluster->index);
+ percpu_cluster->next = SWAP_NEXT_INVALID;
return true;
}
@@ -622,14 +622,14 @@ static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
{
struct percpu_cluster *cluster;
struct swap_cluster_info *ci;
- unsigned long tmp, max;
+ unsigned int tmp, max;
new_cluster:
cluster = this_cpu_ptr(si->percpu_cluster);
- if (cluster_is_null(&cluster->index)) {
+ tmp = cluster->next;
+ if (tmp == SWAP_NEXT_INVALID) {
if (!cluster_list_empty(&si->free_clusters)) {
- cluster->index = si->free_clusters.head;
- cluster->next = cluster_next(&cluster->index) *
+ tmp = cluster_next(&si->free_clusters.head) *
SWAPFILE_CLUSTER;
} else if (!cluster_list_empty(&si->discard_clusters)) {
/*
@@ -649,9 +649,7 @@ static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
* Other CPUs can use our cluster if they can't find a free cluster,
* check if there is still free entry in the cluster
*/
- tmp = cluster->next;
- max = min_t(unsigned long, si->max,
- (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
+ max = min_t(unsigned long, si->max, ALIGN(tmp + 1, SWAPFILE_CLUSTER));
if (tmp < max) {
ci = lock_cluster(si, tmp);
while (tmp < max) {
@@ -662,12 +660,13 @@ static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
unlock_cluster(ci);
}
if (tmp >= max) {
- cluster_set_null(&cluster->index);
+ cluster->next = SWAP_NEXT_INVALID;
goto new_cluster;
}
- cluster->next = tmp + 1;
*offset = tmp;
*scan_base = tmp;
+ tmp += 1;
+ cluster->next = tmp < max ? tmp : SWAP_NEXT_INVALID;
return true;
}
@@ -3138,8 +3137,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
for_each_possible_cpu(cpu) {
struct percpu_cluster *cluster;
+
cluster = per_cpu_ptr(p->percpu_cluster, cpu);
- cluster_set_null(&cluster->index);
+ cluster->next = SWAP_NEXT_INVALID;
}
} else {
atomic_inc(&nr_rotate_swap);