1 /*
2 * In-kernel transcendent memory (generic implementation)
3 *
4 * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
5 *
6 * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
7 * "handles" (triples containing a pool id, and object id, and an index), to
8 * pages in a page-accessible memory (PAM). Tmem references the PAM pages via
9 * an abstract "pampd" (PAM page-descriptor), which can be operated on by a
10 * set of functions (pamops). Each pampd contains some representation of
11 * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of
12 * pages and must be able to insert, find, and delete these pages at a
13 * potential frequency of thousands per second concurrently across many CPUs,
14 * (and, if used with KVM, across many vcpus across many guests).
15 * Tmem is tracked with a hierarchy of data structures, organized by
16 * the elements in a handle-tuple: pool_id, object_id, and page index.
17 * One or more "clients" (e.g. guests) each provide one or more tmem_pools.
18 * Each pool, contains a hash table of rb_trees of tmem_objs. Each
19 * tmem_obj contains a radix-tree-like tree of pointers, with intermediate
20 * nodes called tmem_objnodes. Each leaf pointer in this tree points to
21 * a pampd, which is accessible only through a small set of callbacks
22 * registered by the PAM implementation (see tmem_register_pamops). Tmem
23 * does all memory allocation via a set of callbacks registered by the tmem
24 * host implementation (e.g. see tmem_register_hostops).
25 */
26
27 #include <linux/list.h>
28 #include <linux/spinlock.h>
29 #include <linux/atomic.h>
30
31 #include "tmem.h"
32
33 /* data structure sentinels used for debugging... see tmem.h */
34 #define POOL_SENTINEL 0x87658765
35 #define OBJ_SENTINEL 0x12345678
36 #define OBJNODE_SENTINEL 0xfedcba09
37
38 /*
39 * A tmem host implementation must use this function to register callbacks
40 * for memory allocation.
41 */
42 static struct tmem_hostops tmem_hostops;
43
44 static void tmem_objnode_tree_init(void);
45
tmem_register_hostops(struct tmem_hostops * m)46 void tmem_register_hostops(struct tmem_hostops *m)
47 {
48 tmem_objnode_tree_init();
49 tmem_hostops = *m;
50 }
51
52 /*
53 * A tmem host implementation must use this function to register
54 * callbacks for a page-accessible memory (PAM) implementation
55 */
56 static struct tmem_pamops tmem_pamops;
57
tmem_register_pamops(struct tmem_pamops * m)58 void tmem_register_pamops(struct tmem_pamops *m)
59 {
60 tmem_pamops = *m;
61 }
62
63 /*
64 * Oid's are potentially very sparse and tmem_objs may have an indeterminately
65 * short life, being added and deleted at a relatively high frequency.
66 * So an rb_tree is an ideal data structure to manage tmem_objs. But because
67 * of the potentially huge number of tmem_objs, each pool manages a hashtable
68 * of rb_trees to reduce search, insert, delete, and rebalancing time.
69 * Each hashbucket also has a lock to manage concurrent access.
70 *
71 * The following routines manage tmem_objs. When any tmem_obj is accessed,
72 * the hashbucket lock must be held.
73 */
74
75 /* searches for object==oid in pool, returns locked object if found */
tmem_obj_find(struct tmem_hashbucket * hb,struct tmem_oid * oidp)76 static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
77 struct tmem_oid *oidp)
78 {
79 struct rb_node *rbnode;
80 struct tmem_obj *obj;
81
82 rbnode = hb->obj_rb_root.rb_node;
83 while (rbnode) {
84 BUG_ON(RB_EMPTY_NODE(rbnode));
85 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
86 switch (tmem_oid_compare(oidp, &obj->oid)) {
87 case 0: /* equal */
88 goto out;
89 case -1:
90 rbnode = rbnode->rb_left;
91 break;
92 case 1:
93 rbnode = rbnode->rb_right;
94 break;
95 }
96 }
97 obj = NULL;
98 out:
99 return obj;
100 }
101
102 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *);
103
104 /* free an object that has no more pampds in it */
tmem_obj_free(struct tmem_obj * obj,struct tmem_hashbucket * hb)105 static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
106 {
107 struct tmem_pool *pool;
108
109 BUG_ON(obj == NULL);
110 ASSERT_SENTINEL(obj, OBJ);
111 BUG_ON(obj->pampd_count > 0);
112 pool = obj->pool;
113 BUG_ON(pool == NULL);
114 if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
115 tmem_pampd_destroy_all_in_obj(obj);
116 BUG_ON(obj->objnode_tree_root != NULL);
117 BUG_ON((long)obj->objnode_count != 0);
118 atomic_dec(&pool->obj_count);
119 BUG_ON(atomic_read(&pool->obj_count) < 0);
120 INVERT_SENTINEL(obj, OBJ);
121 obj->pool = NULL;
122 tmem_oid_set_invalid(&obj->oid);
123 rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
124 }
125
126 /*
127 * initialize, and insert an tmem_object_root (called only if find failed)
128 */
tmem_obj_init(struct tmem_obj * obj,struct tmem_hashbucket * hb,struct tmem_pool * pool,struct tmem_oid * oidp)129 static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
130 struct tmem_pool *pool,
131 struct tmem_oid *oidp)
132 {
133 struct rb_root *root = &hb->obj_rb_root;
134 struct rb_node **new = &(root->rb_node), *parent = NULL;
135 struct tmem_obj *this;
136
137 BUG_ON(pool == NULL);
138 atomic_inc(&pool->obj_count);
139 obj->objnode_tree_height = 0;
140 obj->objnode_tree_root = NULL;
141 obj->pool = pool;
142 obj->oid = *oidp;
143 obj->objnode_count = 0;
144 obj->pampd_count = 0;
145 SET_SENTINEL(obj, OBJ);
146 while (*new) {
147 BUG_ON(RB_EMPTY_NODE(*new));
148 this = rb_entry(*new, struct tmem_obj, rb_tree_node);
149 parent = *new;
150 switch (tmem_oid_compare(oidp, &this->oid)) {
151 case 0:
152 BUG(); /* already present; should never happen! */
153 break;
154 case -1:
155 new = &(*new)->rb_left;
156 break;
157 case 1:
158 new = &(*new)->rb_right;
159 break;
160 }
161 }
162 rb_link_node(&obj->rb_tree_node, parent, new);
163 rb_insert_color(&obj->rb_tree_node, root);
164 }
165
166 /*
167 * Tmem is managed as a set of tmem_pools with certain attributes, such as
168 * "ephemeral" vs "persistent". These attributes apply to all tmem_objs
169 * and all pampds that belong to a tmem_pool. A tmem_pool is created
170 * or deleted relatively rarely (for example, when a filesystem is
171 * mounted or unmounted.
172 */
173
174 /* flush all data from a pool and, optionally, free it */
tmem_pool_flush(struct tmem_pool * pool,bool destroy)175 static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
176 {
177 struct rb_node *rbnode;
178 struct tmem_obj *obj;
179 struct tmem_hashbucket *hb = &pool->hashbucket[0];
180 int i;
181
182 BUG_ON(pool == NULL);
183 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
184 spin_lock(&hb->lock);
185 rbnode = rb_first(&hb->obj_rb_root);
186 while (rbnode != NULL) {
187 obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
188 rbnode = rb_next(rbnode);
189 tmem_pampd_destroy_all_in_obj(obj);
190 tmem_obj_free(obj, hb);
191 (*tmem_hostops.obj_free)(obj, pool);
192 }
193 spin_unlock(&hb->lock);
194 }
195 if (destroy)
196 list_del(&pool->pool_list);
197 }
198
199 /*
200 * A tmem_obj contains a radix-tree-like tree in which the intermediate
201 * nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation
202 * is very specialized and tuned for specific uses and is not particularly
203 * suited for use from this code, though some code from the core algorithms has
204 * been reused, thus the copyright notices below). Each tmem_objnode contains
205 * a set of pointers which point to either a set of intermediate tmem_objnodes
206 * or a set of of pampds.
207 *
208 * Portions Copyright (C) 2001 Momchil Velikov
209 * Portions Copyright (C) 2001 Christoph Hellwig
210 * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
211 */
212
213 struct tmem_objnode_tree_path {
214 struct tmem_objnode *objnode;
215 int offset;
216 };
217
218 /* objnode height_to_maxindex translation */
219 static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
220
tmem_objnode_tree_init(void)221 static void tmem_objnode_tree_init(void)
222 {
223 unsigned int ht, tmp;
224
225 for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
226 tmp = ht * OBJNODE_TREE_MAP_SHIFT;
227 if (tmp >= OBJNODE_TREE_INDEX_BITS)
228 tmem_objnode_tree_h2max[ht] = ~0UL;
229 else
230 tmem_objnode_tree_h2max[ht] =
231 (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
232 }
233 }
234
tmem_objnode_alloc(struct tmem_obj * obj)235 static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
236 {
237 struct tmem_objnode *objnode;
238
239 ASSERT_SENTINEL(obj, OBJ);
240 BUG_ON(obj->pool == NULL);
241 ASSERT_SENTINEL(obj->pool, POOL);
242 objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
243 if (unlikely(objnode == NULL))
244 goto out;
245 objnode->obj = obj;
246 SET_SENTINEL(objnode, OBJNODE);
247 memset(&objnode->slots, 0, sizeof(objnode->slots));
248 objnode->slots_in_use = 0;
249 obj->objnode_count++;
250 out:
251 return objnode;
252 }
253
tmem_objnode_free(struct tmem_objnode * objnode)254 static void tmem_objnode_free(struct tmem_objnode *objnode)
255 {
256 struct tmem_pool *pool;
257 int i;
258
259 BUG_ON(objnode == NULL);
260 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
261 BUG_ON(objnode->slots[i] != NULL);
262 ASSERT_SENTINEL(objnode, OBJNODE);
263 INVERT_SENTINEL(objnode, OBJNODE);
264 BUG_ON(objnode->obj == NULL);
265 ASSERT_SENTINEL(objnode->obj, OBJ);
266 pool = objnode->obj->pool;
267 BUG_ON(pool == NULL);
268 ASSERT_SENTINEL(pool, POOL);
269 objnode->obj->objnode_count--;
270 objnode->obj = NULL;
271 (*tmem_hostops.objnode_free)(objnode, pool);
272 }
273
274 /*
275 * lookup index in object and return associated pampd (or NULL if not found)
276 */
tmem_pampd_lookup_in_obj(struct tmem_obj * obj,uint32_t index)277 static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
278 {
279 unsigned int height, shift;
280 struct tmem_objnode **slot = NULL;
281
282 BUG_ON(obj == NULL);
283 ASSERT_SENTINEL(obj, OBJ);
284 BUG_ON(obj->pool == NULL);
285 ASSERT_SENTINEL(obj->pool, POOL);
286
287 height = obj->objnode_tree_height;
288 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
289 goto out;
290 if (height == 0 && obj->objnode_tree_root) {
291 slot = &obj->objnode_tree_root;
292 goto out;
293 }
294 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
295 slot = &obj->objnode_tree_root;
296 while (height > 0) {
297 if (*slot == NULL)
298 goto out;
299 slot = (struct tmem_objnode **)
300 ((*slot)->slots +
301 ((index >> shift) & OBJNODE_TREE_MAP_MASK));
302 shift -= OBJNODE_TREE_MAP_SHIFT;
303 height--;
304 }
305 out:
306 return slot != NULL ? *slot : NULL;
307 }
308
tmem_pampd_add_to_obj(struct tmem_obj * obj,uint32_t index,void * pampd)309 static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
310 void *pampd)
311 {
312 int ret = 0;
313 struct tmem_objnode *objnode = NULL, *newnode, *slot;
314 unsigned int height, shift;
315 int offset = 0;
316
317 /* if necessary, extend the tree to be higher */
318 if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
319 height = obj->objnode_tree_height + 1;
320 if (index > tmem_objnode_tree_h2max[height])
321 while (index > tmem_objnode_tree_h2max[height])
322 height++;
323 if (obj->objnode_tree_root == NULL) {
324 obj->objnode_tree_height = height;
325 goto insert;
326 }
327 do {
328 newnode = tmem_objnode_alloc(obj);
329 if (!newnode) {
330 ret = -ENOMEM;
331 goto out;
332 }
333 newnode->slots[0] = obj->objnode_tree_root;
334 newnode->slots_in_use = 1;
335 obj->objnode_tree_root = newnode;
336 obj->objnode_tree_height++;
337 } while (height > obj->objnode_tree_height);
338 }
339 insert:
340 slot = obj->objnode_tree_root;
341 height = obj->objnode_tree_height;
342 shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
343 while (height > 0) {
344 if (slot == NULL) {
345 /* add a child objnode. */
346 slot = tmem_objnode_alloc(obj);
347 if (!slot) {
348 ret = -ENOMEM;
349 goto out;
350 }
351 if (objnode) {
352
353 objnode->slots[offset] = slot;
354 objnode->slots_in_use++;
355 } else
356 obj->objnode_tree_root = slot;
357 }
358 /* go down a level */
359 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
360 objnode = slot;
361 slot = objnode->slots[offset];
362 shift -= OBJNODE_TREE_MAP_SHIFT;
363 height--;
364 }
365 BUG_ON(slot != NULL);
366 if (objnode) {
367 objnode->slots_in_use++;
368 objnode->slots[offset] = pampd;
369 } else
370 obj->objnode_tree_root = pampd;
371 obj->pampd_count++;
372 out:
373 return ret;
374 }
375
tmem_pampd_delete_from_obj(struct tmem_obj * obj,uint32_t index)376 static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
377 {
378 struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
379 struct tmem_objnode_tree_path *pathp = path;
380 struct tmem_objnode *slot = NULL;
381 unsigned int height, shift;
382 int offset;
383
384 BUG_ON(obj == NULL);
385 ASSERT_SENTINEL(obj, OBJ);
386 BUG_ON(obj->pool == NULL);
387 ASSERT_SENTINEL(obj->pool, POOL);
388 height = obj->objnode_tree_height;
389 if (index > tmem_objnode_tree_h2max[height])
390 goto out;
391 slot = obj->objnode_tree_root;
392 if (height == 0 && obj->objnode_tree_root) {
393 obj->objnode_tree_root = NULL;
394 goto out;
395 }
396 shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
397 pathp->objnode = NULL;
398 do {
399 if (slot == NULL)
400 goto out;
401 pathp++;
402 offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
403 pathp->offset = offset;
404 pathp->objnode = slot;
405 slot = slot->slots[offset];
406 shift -= OBJNODE_TREE_MAP_SHIFT;
407 height--;
408 } while (height > 0);
409 if (slot == NULL)
410 goto out;
411 while (pathp->objnode) {
412 pathp->objnode->slots[pathp->offset] = NULL;
413 pathp->objnode->slots_in_use--;
414 if (pathp->objnode->slots_in_use) {
415 if (pathp->objnode == obj->objnode_tree_root) {
416 while (obj->objnode_tree_height > 0 &&
417 obj->objnode_tree_root->slots_in_use == 1 &&
418 obj->objnode_tree_root->slots[0]) {
419 struct tmem_objnode *to_free =
420 obj->objnode_tree_root;
421
422 obj->objnode_tree_root =
423 to_free->slots[0];
424 obj->objnode_tree_height--;
425 to_free->slots[0] = NULL;
426 to_free->slots_in_use = 0;
427 tmem_objnode_free(to_free);
428 }
429 }
430 goto out;
431 }
432 tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
433 pathp--;
434 }
435 obj->objnode_tree_height = 0;
436 obj->objnode_tree_root = NULL;
437
438 out:
439 if (slot != NULL)
440 obj->pampd_count--;
441 BUG_ON(obj->pampd_count < 0);
442 return slot;
443 }
444
445 /* recursively walk the objnode_tree destroying pampds and objnodes */
tmem_objnode_node_destroy(struct tmem_obj * obj,struct tmem_objnode * objnode,unsigned int ht)446 static void tmem_objnode_node_destroy(struct tmem_obj *obj,
447 struct tmem_objnode *objnode,
448 unsigned int ht)
449 {
450 int i;
451
452 if (ht == 0)
453 return;
454 for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
455 if (objnode->slots[i]) {
456 if (ht == 1) {
457 obj->pampd_count--;
458 (*tmem_pamops.free)(objnode->slots[i],
459 obj->pool);
460 objnode->slots[i] = NULL;
461 continue;
462 }
463 tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
464 tmem_objnode_free(objnode->slots[i]);
465 objnode->slots[i] = NULL;
466 }
467 }
468 }
469
tmem_pampd_destroy_all_in_obj(struct tmem_obj * obj)470 static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
471 {
472 if (obj->objnode_tree_root == NULL)
473 return;
474 if (obj->objnode_tree_height == 0) {
475 obj->pampd_count--;
476 (*tmem_pamops.free)(obj->objnode_tree_root, obj->pool);
477 } else {
478 tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
479 obj->objnode_tree_height);
480 tmem_objnode_free(obj->objnode_tree_root);
481 obj->objnode_tree_height = 0;
482 }
483 obj->objnode_tree_root = NULL;
484 }
485
486 /*
487 * Tmem is operated on by a set of well-defined actions:
488 * "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
489 * (The tmem ABI allows for subpages and exchanges but these operations
490 * are not included in this implementation.)
491 *
492 * These "tmem core" operations are implemented in the following functions.
493 */
494
495 /*
496 * "Put" a page, e.g. copy a page from the kernel into newly allocated
497 * PAM space (if such space is available). Tmem_put is complicated by
498 * a corner case: What if a page with matching handle already exists in
499 * tmem? To guarantee coherency, one of two actions is necessary: Either
500 * the data for the page must be overwritten, or the page must be
501 * "flushed" so that the data is not accessible to a subsequent "get".
502 * Since these "duplicate puts" are relatively rare, this implementation
503 * always flushes for simplicity.
504 */
tmem_put(struct tmem_pool * pool,struct tmem_oid * oidp,uint32_t index,struct page * page)505 int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
506 struct page *page)
507 {
508 struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
509 void *pampd = NULL, *pampd_del = NULL;
510 int ret = -ENOMEM;
511 bool ephemeral;
512 struct tmem_hashbucket *hb;
513
514 ephemeral = is_ephemeral(pool);
515 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
516 spin_lock(&hb->lock);
517 obj = objfound = tmem_obj_find(hb, oidp);
518 if (obj != NULL) {
519 pampd = tmem_pampd_lookup_in_obj(objfound, index);
520 if (pampd != NULL) {
521 /* if found, is a dup put, flush the old one */
522 pampd_del = tmem_pampd_delete_from_obj(obj, index);
523 BUG_ON(pampd_del != pampd);
524 (*tmem_pamops.free)(pampd, pool);
525 if (obj->pampd_count == 0) {
526 objnew = obj;
527 objfound = NULL;
528 }
529 pampd = NULL;
530 }
531 } else {
532 obj = objnew = (*tmem_hostops.obj_alloc)(pool);
533 if (unlikely(obj == NULL)) {
534 ret = -ENOMEM;
535 goto out;
536 }
537 tmem_obj_init(obj, hb, pool, oidp);
538 }
539 BUG_ON(obj == NULL);
540 BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
541 pampd = (*tmem_pamops.create)(obj->pool, &obj->oid, index, page);
542 if (unlikely(pampd == NULL))
543 goto free;
544 ret = tmem_pampd_add_to_obj(obj, index, pampd);
545 if (unlikely(ret == -ENOMEM))
546 /* may have partially built objnode tree ("stump") */
547 goto delete_and_free;
548 goto out;
549
550 delete_and_free:
551 (void)tmem_pampd_delete_from_obj(obj, index);
552 free:
553 if (pampd)
554 (*tmem_pamops.free)(pampd, pool);
555 if (objnew) {
556 tmem_obj_free(objnew, hb);
557 (*tmem_hostops.obj_free)(objnew, pool);
558 }
559 out:
560 spin_unlock(&hb->lock);
561 return ret;
562 }
563
564 /*
565 * "Get" a page, e.g. if one can be found, copy the tmem page with the
566 * matching handle from PAM space to the kernel. By tmem definition,
567 * when a "get" is successful on an ephemeral page, the page is "flushed",
568 * and when a "get" is successful on a persistent page, the page is retained
569 * in tmem. Note that to preserve
570 * coherency, "get" can never be skipped if tmem contains the data.
571 * That is, if a get is done with a certain handle and fails, any
572 * subsequent "get" must also fail (unless of course there is a
573 * "put" done with the same handle).
574
575 */
tmem_get(struct tmem_pool * pool,struct tmem_oid * oidp,uint32_t index,struct page * page)576 int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp,
577 uint32_t index, struct page *page)
578 {
579 struct tmem_obj *obj;
580 void *pampd;
581 bool ephemeral = is_ephemeral(pool);
582 uint32_t ret = -1;
583 struct tmem_hashbucket *hb;
584
585 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
586 spin_lock(&hb->lock);
587 obj = tmem_obj_find(hb, oidp);
588 if (obj == NULL)
589 goto out;
590 ephemeral = is_ephemeral(pool);
591 if (ephemeral)
592 pampd = tmem_pampd_delete_from_obj(obj, index);
593 else
594 pampd = tmem_pampd_lookup_in_obj(obj, index);
595 if (pampd == NULL)
596 goto out;
597 ret = (*tmem_pamops.get_data)(page, pampd, pool);
598 if (ret < 0)
599 goto out;
600 if (ephemeral) {
601 (*tmem_pamops.free)(pampd, pool);
602 if (obj->pampd_count == 0) {
603 tmem_obj_free(obj, hb);
604 (*tmem_hostops.obj_free)(obj, pool);
605 obj = NULL;
606 }
607 }
608 ret = 0;
609 out:
610 spin_unlock(&hb->lock);
611 return ret;
612 }
613
614 /*
615 * If a page in tmem matches the handle, "flush" this page from tmem such
616 * that any subsequent "get" does not succeed (unless, of course, there
617 * was another "put" with the same handle).
618 */
tmem_flush_page(struct tmem_pool * pool,struct tmem_oid * oidp,uint32_t index)619 int tmem_flush_page(struct tmem_pool *pool,
620 struct tmem_oid *oidp, uint32_t index)
621 {
622 struct tmem_obj *obj;
623 void *pampd;
624 int ret = -1;
625 struct tmem_hashbucket *hb;
626
627 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
628 spin_lock(&hb->lock);
629 obj = tmem_obj_find(hb, oidp);
630 if (obj == NULL)
631 goto out;
632 pampd = tmem_pampd_delete_from_obj(obj, index);
633 if (pampd == NULL)
634 goto out;
635 (*tmem_pamops.free)(pampd, pool);
636 if (obj->pampd_count == 0) {
637 tmem_obj_free(obj, hb);
638 (*tmem_hostops.obj_free)(obj, pool);
639 }
640 ret = 0;
641
642 out:
643 spin_unlock(&hb->lock);
644 return ret;
645 }
646
647 /*
648 * "Flush" all pages in tmem matching this oid.
649 */
tmem_flush_object(struct tmem_pool * pool,struct tmem_oid * oidp)650 int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
651 {
652 struct tmem_obj *obj;
653 struct tmem_hashbucket *hb;
654 int ret = -1;
655
656 hb = &pool->hashbucket[tmem_oid_hash(oidp)];
657 spin_lock(&hb->lock);
658 obj = tmem_obj_find(hb, oidp);
659 if (obj == NULL)
660 goto out;
661 tmem_pampd_destroy_all_in_obj(obj);
662 tmem_obj_free(obj, hb);
663 (*tmem_hostops.obj_free)(obj, pool);
664 ret = 0;
665
666 out:
667 spin_unlock(&hb->lock);
668 return ret;
669 }
670
671 /*
672 * "Flush" all pages (and tmem_objs) from this tmem_pool and disable
673 * all subsequent access to this tmem_pool.
674 */
tmem_destroy_pool(struct tmem_pool * pool)675 int tmem_destroy_pool(struct tmem_pool *pool)
676 {
677 int ret = -1;
678
679 if (pool == NULL)
680 goto out;
681 tmem_pool_flush(pool, 1);
682 ret = 0;
683 out:
684 return ret;
685 }
686
687 static LIST_HEAD(tmem_global_pool_list);
688
689 /*
690 * Create a new tmem_pool with the provided flag and return
691 * a pool id provided by the tmem host implementation.
692 */
tmem_new_pool(struct tmem_pool * pool,uint32_t flags)693 void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
694 {
695 int persistent = flags & TMEM_POOL_PERSIST;
696 int shared = flags & TMEM_POOL_SHARED;
697 struct tmem_hashbucket *hb = &pool->hashbucket[0];
698 int i;
699
700 for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
701 hb->obj_rb_root = RB_ROOT;
702 spin_lock_init(&hb->lock);
703 }
704 INIT_LIST_HEAD(&pool->pool_list);
705 atomic_set(&pool->obj_count, 0);
706 SET_SENTINEL(pool, POOL);
707 list_add_tail(&pool->pool_list, &tmem_global_pool_list);
708 pool->persistent = persistent;
709 pool->shared = shared;
710 }
711