1 /*
2  * tmem.h
3  *
4  * Transcendent memory
5  *
6  * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
7  */
8 
9 #ifndef _TMEM_H_
10 #define _TMEM_H_
11 
12 #include <linux/highmem.h>
13 #include <linux/hash.h>
14 #include <linux/atomic.h>
15 
16 /*
17  * These are pre-defined by the Xen<->Linux ABI
18  */
19 #define TMEM_PUT_PAGE			4
20 #define TMEM_GET_PAGE			5
21 #define TMEM_FLUSH_PAGE			6
22 #define TMEM_FLUSH_OBJECT		7
23 #define TMEM_POOL_PERSIST		1
24 #define TMEM_POOL_SHARED		2
25 #define TMEM_POOL_PRECOMPRESSED		4
26 #define TMEM_POOL_PAGESIZE_SHIFT	4
27 #define TMEM_POOL_PAGESIZE_MASK		0xf
28 #define TMEM_POOL_RESERVED_BITS		0x00ffff00
29 
30 /*
31  * sentinels have proven very useful for debugging but can be removed
32  * or disabled before final merge.
33  */
34 #define SENTINELS
35 #ifdef SENTINELS
36 #define DECL_SENTINEL uint32_t sentinel;
37 #define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL)
38 #define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL)
39 #define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL)
40 #define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL)
41 #else
42 #define DECL_SENTINEL
43 #define SET_SENTINEL(_x, _y) do { } while (0)
44 #define INVERT_SENTINEL(_x, _y) do { } while (0)
45 #define ASSERT_SENTINEL(_x, _y) do { } while (0)
46 #define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0)
47 #endif
48 
49 #define ASSERT_SPINLOCK(_l)	WARN_ON(!spin_is_locked(_l))
50 
51 /*
52  * A pool is the highest-level data structure managed by tmem and
53  * usually corresponds to a large independent set of pages such as
54  * a filesystem.  Each pool has an id, and certain attributes and counters.
55  * It also contains a set of hash buckets, each of which contains an rbtree
56  * of objects and a lock to manage concurrency within the pool.
57  */
58 
59 #define TMEM_HASH_BUCKET_BITS	8
60 #define TMEM_HASH_BUCKETS	(1<<TMEM_HASH_BUCKET_BITS)
61 
62 struct tmem_hashbucket {
63 	struct rb_root obj_rb_root;
64 	spinlock_t lock;
65 };
66 
67 struct tmem_pool {
68 	void *client; /* "up" for some clients, avoids table lookup */
69 	struct list_head pool_list;
70 	uint32_t pool_id;
71 	bool persistent;
72 	bool shared;
73 	atomic_t obj_count;
74 	atomic_t refcount;
75 	struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS];
76 	DECL_SENTINEL
77 };
78 
79 #define is_persistent(_p)  (_p->persistent)
80 #define is_ephemeral(_p)   (!(_p->persistent))
81 
82 /*
83  * An object id ("oid") is large: 192-bits (to ensure, for example, files
84  * in a modern filesystem can be uniquely identified).
85  */
86 
87 struct tmem_oid {
88 	uint64_t oid[3];
89 };
90 
91 struct tmem_xhandle {
92 	uint8_t client_id;
93 	uint8_t xh_data_cksum;
94 	uint16_t xh_data_size;
95 	uint16_t pool_id;
96 	struct tmem_oid oid;
97 	uint32_t index;
98 	void *extra;
99 };
100 
tmem_xhandle_fill(uint16_t client_id,struct tmem_pool * pool,struct tmem_oid * oidp,uint32_t index)101 static inline struct tmem_xhandle tmem_xhandle_fill(uint16_t client_id,
102 					struct tmem_pool *pool,
103 					struct tmem_oid *oidp,
104 					uint32_t index)
105 {
106 	struct tmem_xhandle xh;
107 	xh.client_id = client_id;
108 	xh.xh_data_cksum = (uint8_t)-1;
109 	xh.xh_data_size = (uint16_t)-1;
110 	xh.pool_id = pool->pool_id;
111 	xh.oid = *oidp;
112 	xh.index = index;
113 	return xh;
114 }
115 
tmem_oid_set_invalid(struct tmem_oid * oidp)116 static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)
117 {
118 	oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
119 }
120 
tmem_oid_valid(struct tmem_oid * oidp)121 static inline bool tmem_oid_valid(struct tmem_oid *oidp)
122 {
123 	return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL ||
124 		oidp->oid[2] != -1UL;
125 }
126 
tmem_oid_compare(struct tmem_oid * left,struct tmem_oid * right)127 static inline int tmem_oid_compare(struct tmem_oid *left,
128 					struct tmem_oid *right)
129 {
130 	int ret;
131 
132 	if (left->oid[2] == right->oid[2]) {
133 		if (left->oid[1] == right->oid[1]) {
134 			if (left->oid[0] == right->oid[0])
135 				ret = 0;
136 			else if (left->oid[0] < right->oid[0])
137 				ret = -1;
138 			else
139 				return 1;
140 		} else if (left->oid[1] < right->oid[1])
141 			ret = -1;
142 		else
143 			ret = 1;
144 	} else if (left->oid[2] < right->oid[2])
145 		ret = -1;
146 	else
147 		ret = 1;
148 	return ret;
149 }
150 
tmem_oid_hash(struct tmem_oid * oidp)151 static inline unsigned tmem_oid_hash(struct tmem_oid *oidp)
152 {
153 	return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
154 				TMEM_HASH_BUCKET_BITS);
155 }
156 
157 /*
158  * A tmem_obj contains an identifier (oid), pointers to the parent
159  * pool and the rb_tree to which it belongs, counters, and an ordered
160  * set of pampds, structured in a radix-tree-like tree.  The intermediate
161  * nodes of the tree are called tmem_objnodes.
162  */
163 
164 struct tmem_objnode;
165 
166 struct tmem_obj {
167 	struct tmem_oid oid;
168 	struct tmem_pool *pool;
169 	struct rb_node rb_tree_node;
170 	struct tmem_objnode *objnode_tree_root;
171 	unsigned int objnode_tree_height;
172 	unsigned long objnode_count;
173 	long pampd_count;
174 	/* for current design of ramster, all pages belonging to
175 	 * an object reside on the same remotenode and extra is
176 	 * used to record the number of the remotenode so a
177 	 * flush-object operation can specify it */
178 	void *extra; /* for use by pampd implementation */
179 	DECL_SENTINEL
180 };
181 
182 #define OBJNODE_TREE_MAP_SHIFT 6
183 #define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT)
184 #define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1)
185 #define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
186 #define OBJNODE_TREE_MAX_PATH \
187 		(OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2)
188 
189 struct tmem_objnode {
190 	struct tmem_obj *obj;
191 	DECL_SENTINEL
192 	void *slots[OBJNODE_TREE_MAP_SIZE];
193 	unsigned int slots_in_use;
194 };
195 
196 /* pampd abstract datatype methods provided by the PAM implementation */
197 struct tmem_pamops {
198 	void *(*create)(char *, size_t, bool, int,
199 			struct tmem_pool *, struct tmem_oid *, uint32_t);
200 	int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
201 				struct tmem_oid *, uint32_t);
202 	int (*get_data_and_free)(char *, size_t *, bool, void *,
203 				struct tmem_pool *, struct tmem_oid *,
204 				uint32_t);
205 	void (*free)(void *, struct tmem_pool *,
206 				struct tmem_oid *, uint32_t, bool);
207 	void (*free_obj)(struct tmem_pool *, struct tmem_obj *);
208 	bool (*is_remote)(void *);
209 	void *(*repatriate_preload)(void *, struct tmem_pool *,
210 					struct tmem_oid *, uint32_t, bool *);
211 	int (*repatriate)(void *, void *, struct tmem_pool *,
212 				struct tmem_oid *, uint32_t, bool, void *);
213 	void (*new_obj)(struct tmem_obj *);
214 	int (*replace_in_obj)(void *, struct tmem_obj *);
215 };
216 extern void tmem_register_pamops(struct tmem_pamops *m);
217 
218 /* memory allocation methods provided by the host implementation */
219 struct tmem_hostops {
220 	struct tmem_obj *(*obj_alloc)(struct tmem_pool *);
221 	void (*obj_free)(struct tmem_obj *, struct tmem_pool *);
222 	struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *);
223 	void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *);
224 };
225 extern void tmem_register_hostops(struct tmem_hostops *m);
226 
227 /* core tmem accessor functions */
228 extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
229 			char *, size_t, bool, int);
230 extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
231 			char *, size_t *, bool, int);
232 extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
233 			void *);
234 extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *,
235 				   uint32_t index, struct tmem_obj **,
236 				   void **);
237 extern void tmem_localify_finish(struct tmem_obj *, uint32_t index,
238 				 void *, void *, bool);
239 extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
240 			uint32_t index);
241 extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
242 extern int tmem_destroy_pool(struct tmem_pool *);
243 extern void tmem_new_pool(struct tmem_pool *, uint32_t);
244 #endif /* _TMEM_H */
245