1 /*
2  * Copyright 2010 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  */
14 
15 /**
16  * @file drivers/xgbe/impl.h
17  * Implementation details for the NetIO library.
18  */
19 
20 #ifndef __DRV_XGBE_IMPL_H__
21 #define __DRV_XGBE_IMPL_H__
22 
23 #include <hv/netio_errors.h>
24 #include <hv/netio_intf.h>
25 #include <hv/drv_xgbe_intf.h>
26 
27 
28 /** How many groups we have (log2). */
29 #define LOG2_NUM_GROUPS (12)
30 /** How many groups we have. */
31 #define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
32 
33 /** Number of output requests we'll buffer per tile. */
34 #define EPP_REQS_PER_TILE (32)
35 
36 /** Words used in an eDMA command without checksum acceleration. */
37 #define EDMA_WDS_NO_CSUM      8
38 /** Words used in an eDMA command with checksum acceleration. */
39 #define EDMA_WDS_CSUM        10
40 /** Total available words in the eDMA command FIFO. */
41 #define EDMA_WDS_TOTAL      128
42 
43 
44 /*
45  * FIXME: These definitions are internal and should have underscores!
46  * NOTE: The actual numeric values here are intentional and allow us to
47  * optimize the concept "if small ... else if large ... else ...", by
48  * checking for the low bit being set, and then for non-zero.
49  * These are used as array indices, so they must have the values (0, 1, 2)
50  * in some order.
51  */
52 #define SIZE_SMALL (1)       /**< Small packet queue. */
53 #define SIZE_LARGE (2)       /**< Large packet queue. */
54 #define SIZE_JUMBO (0)       /**< Jumbo packet queue. */
55 
56 /** The number of "SIZE_xxx" values. */
57 #define NETIO_NUM_SIZES 3
58 
59 
60 /*
61  * Default numbers of packets for IPP drivers.  These values are chosen
62  * such that CIPP1 will not overflow its L2 cache.
63  */
64 
65 /** The default number of small packets. */
66 #define NETIO_DEFAULT_SMALL_PACKETS 2750
67 /** The default number of large packets. */
68 #define NETIO_DEFAULT_LARGE_PACKETS 2500
69 /** The default number of jumbo packets. */
70 #define NETIO_DEFAULT_JUMBO_PACKETS 250
71 
72 
73 /** Log2 of the size of a memory arena. */
74 #define NETIO_ARENA_SHIFT      24      /* 16 MB */
75 /** Size of a memory arena. */
76 #define NETIO_ARENA_SIZE       (1 << NETIO_ARENA_SHIFT)
77 
78 
79 /** A queue of packets.
80  *
81  * This structure partially defines a queue of packets waiting to be
82  * processed.  The queue as a whole is written to by an interrupt handler and
83  * read by non-interrupt code; this data structure is what's touched by the
84  * interrupt handler.  The other part of the queue state, the read offset, is
85  * kept in user space, not in hypervisor space, so it is in a separate data
86  * structure.
87  *
88  * The read offset (__packet_receive_read in the user part of the queue
89  * structure) points to the next packet to be read. When the read offset is
90  * equal to the write offset, the queue is empty; therefore the queue must
91  * contain one more slot than the required maximum queue size.
92  *
93  * Here's an example of all 3 state variables and what they mean.  All
94  * pointers move left to right.
95  *
96  * @code
97  *   I   I   V   V   V   V   I   I   I   I
98  *   0   1   2   3   4   5   6   7   8   9  10
99  *           ^       ^       ^               ^
100  *           |               |               |
101  *           |               |               __last_packet_plus_one
102  *           |               __buffer_write
103  *           __packet_receive_read
104  * @endcode
105  *
106  * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
107  * = 10).  The read pointer is at 2, and the write pointer is at 6; thus,
108  * there are valid, unread packets in slots 2, 3, 4, and 5.  The remaining
109  * slots are invalid (do not contain a packet).
110  */
111 typedef struct {
112   /** Byte offset of the next notify packet to be written: zero for the first
113    *  packet on the queue, sizeof (netio_pkt_t) for the second packet on the
114    *  queue, etc. */
115   volatile uint32_t __packet_write;
116 
117   /** Offset of the packet after the last valid packet (i.e., when any
118    *  pointer is incremented to this value, it wraps back to zero). */
119   uint32_t __last_packet_plus_one;
120 }
121 __netio_packet_queue_t;
122 
123 
124 /** A queue of buffers.
125  *
126  * This structure partially defines a queue of empty buffers which have been
127  * obtained via requests to the IPP.  (The elements of the queue are packet
128  * handles, which are transformed into a full netio_pkt_t when the buffer is
129  * retrieved.)  The queue as a whole is written to by an interrupt handler and
130  * read by non-interrupt code; this data structure is what's touched by the
131  * interrupt handler.  The other parts of the queue state, the read offset and
132  * requested write offset, are kept in user space, not in hypervisor space, so
133  * they are in a separate data structure.
134  *
135  * The read offset (__buffer_read in the user part of the queue structure)
136  * points to the next buffer to be read. When the read offset is equal to the
137  * write offset, the queue is empty; therefore the queue must contain one more
138  * slot than the required maximum queue size.
139  *
140  * The requested write offset (__buffer_requested_write in the user part of
141  * the queue structure) points to the slot which will hold the next buffer we
142  * request from the IPP, once we get around to sending such a request.  When
143  * the requested write offset is equal to the write offset, no requests for
144  * new buffers are outstanding; when the requested write offset is one greater
145  * than the read offset, no more requests may be sent.
146  *
147  * Note that, unlike the packet_queue, the buffer_queue places incoming
148  * buffers at decreasing addresses.  This makes the check for "is it time to
149  * wrap the buffer pointer" cheaper in the assembly code which receives new
150  * buffers, and means that the value which defines the queue size,
151  * __last_buffer, is different than in the packet queue.  Also, the offset
152  * used in the packet_queue is already scaled by the size of a packet; here we
153  * use unscaled slot indices for the offsets.  (These differences are
154  * historical, and in the future it's possible that the packet_queue will look
155  * more like this queue.)
156  *
157  * @code
158  * Here's an example of all 4 state variables and what they mean.  Remember:
159  * all pointers move right to left.
160  *
161  *   V   V   V   I   I   R   R   V   V   V
162  *   0   1   2   3   4   5   6   7   8   9
163  *           ^       ^       ^           ^
164  *           |       |       |           |
165  *           |       |       |           __last_buffer
166  *           |       |       __buffer_write
167  *           |       __buffer_requested_write
168  *           __buffer_read
169  * @endcode
170  *
171  * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
172  * The read pointer is at 2, and the write pointer is at 6; thus, there are
173  * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7.  The requested write
174  * pointer is at 4; thus, requests have been made to the IPP for buffers which
175  * will be placed in slots 6 and 5 when they arrive.  Finally, the remaining
176  * slots are invalid (do not contain a buffer).
177  */
178 typedef struct
179 {
180   /** Ordinal number of the next buffer to be written: 0 for the first slot in
181    *  the queue, 1 for the second slot in the queue, etc. */
182   volatile uint32_t __buffer_write;
183 
184   /** Ordinal number of the last buffer (i.e., when any pointer is decremented
185    *  below zero, it is reloaded with this value). */
186   uint32_t __last_buffer;
187 }
188 __netio_buffer_queue_t;
189 
190 
191 /**
192  * An object for providing Ethernet packets to a process.
193  */
194 typedef struct __netio_queue_impl_t
195 {
196   /** The queue of packets waiting to be received. */
197   __netio_packet_queue_t __packet_receive_queue;
198   /** The intr bit mask that IDs this device. */
199   unsigned int __intr_id;
200   /** Offset to queues of empty buffers, one per size. */
201   uint32_t __buffer_queue[NETIO_NUM_SIZES];
202   /** The address of the first EPP tile, or -1 if no EPP. */
203   /* ISSUE: Actually this is always "0" or "~0". */
204   uint32_t __epp_location;
205   /** The queue ID that this queue represents. */
206   unsigned int __queue_id;
207   /** Number of acknowledgements received. */
208   volatile uint32_t __acks_received;
209   /** Last completion number received for packet_sendv. */
210   volatile uint32_t __last_completion_rcv;
211   /** Number of packets allowed to be outstanding. */
212   uint32_t __max_outstanding;
213   /** First VA available for packets. */
214   void* __va_0;
215   /** First VA in second range available for packets. */
216   void* __va_1;
217   /** Padding to align the "__packets" field to the size of a netio_pkt_t. */
218   uint32_t __padding[3];
219   /** The packets themselves. */
220   netio_pkt_t __packets[0];
221 }
222 netio_queue_impl_t;
223 
224 
225 /**
226  * An object for managing the user end of a NetIO queue.
227  */
228 typedef struct __netio_queue_user_impl_t
229 {
230   /** The next incoming packet to be read. */
231   uint32_t __packet_receive_read;
232   /** The next empty buffers to be read, one index per size. */
233   uint8_t __buffer_read[NETIO_NUM_SIZES];
234   /** Where the empty buffer we next request from the IPP will go, one index
235    * per size. */
236   uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
237   /** PCIe interface flag. */
238   uint8_t __pcie;
239   /** Number of packets left to be received before we send a credit update. */
240   uint32_t __receive_credit_remaining;
241   /** Value placed in __receive_credit_remaining when it reaches zero. */
242   uint32_t __receive_credit_interval;
243   /** First fast I/O routine index. */
244   uint32_t __fastio_index;
245   /** Number of acknowledgements expected. */
246   uint32_t __acks_outstanding;
247   /** Last completion number requested. */
248   uint32_t __last_completion_req;
249   /** File descriptor for driver. */
250   int __fd;
251 }
252 netio_queue_user_impl_t;
253 
254 
255 #define NETIO_GROUP_CHUNK_SIZE   64   /**< Max # groups in one IPP request */
256 #define NETIO_BUCKET_CHUNK_SIZE  64   /**< Max # buckets in one IPP request */
257 
258 
259 /** Internal structure used to convey packet send information to the
260  * hypervisor.  FIXME: Actually, it's not used for that anymore, but
261  * netio_packet_send() still uses it internally.
262  */
263 typedef struct
264 {
265   uint16_t flags;              /**< Packet flags (__NETIO_SEND_FLG_xxx) */
266   uint16_t transfer_size;      /**< Size of packet */
267   uint32_t va;                 /**< VA of start of packet */
268   __netio_pkt_handle_t handle; /**< Packet handle */
269   uint32_t csum0;              /**< First checksum word */
270   uint32_t csum1;              /**< Second checksum word */
271 }
272 __netio_send_cmd_t;
273 
274 
275 /** Flags used in two contexts:
276  *  - As the "flags" member in the __netio_send_cmd_t, above; used only
277  *    for netio_pkt_send_{prepare,commit}.
278  *  - As part of the flags passed to the various send packet fast I/O calls.
279  */
280 
281 /** Need acknowledgement on this packet.  Note that some code in the
282  *  normal send_pkt fast I/O handler assumes that this is equal to 1. */
283 #define __NETIO_SEND_FLG_ACK    0x1
284 
285 /** Do checksum on this packet.  (Only used with the __netio_send_cmd_t;
286  *  normal packet sends use a special fast I/O index to denote checksumming,
287  *  and multi-segment sends test the checksum descriptor.) */
288 #define __NETIO_SEND_FLG_CSUM   0x2
289 
290 /** Get a completion on this packet.  Only used with multi-segment sends.  */
291 #define __NETIO_SEND_FLG_COMPLETION 0x4
292 
293 /** Position of the number-of-extra-segments value in the flags word.
294     Only used with multi-segment sends. */
295 #define __NETIO_SEND_FLG_XSEG_SHIFT 3
296 
297 /** Width of the number-of-extra-segments value in the flags word. */
298 #define __NETIO_SEND_FLG_XSEG_WIDTH 2
299 
300 #endif /* __DRV_XGBE_IMPL_H__ */
301