-
Notifications
You must be signed in to change notification settings - Fork 3
/
nf10_lbuf.c
1256 lines (1119 loc) · 39.8 KB
/
nf10_lbuf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*******************************************************************************
*
* NetFPGA-10G http://www.netfpga.org
*
* File:
* nf10_lbuf.c
*
* Project:
*
*
* Author:
* Hwanju Kim
*
* Description:
* This is the large-buffer (lbuf) DMA-dependent module, an implementation
* of hw_ops (lbuf_hw_ops) of nf10_adapter. Lbuf DMA is basically to use
* large buffer as a transport. For RX, lbuf size is determined by DMA hw
* but not impossible to be flexible (DMA can be changed variable size).
* For TX, there is no dependency on DMA, now a single large TX buffer is
* allocated and permanently used for tx.
*
* This code is initially developed for the Network-as-a-Service (NaaS) project.
* (under development in https://github.com/NetFPGA-NewNIC/linux-driver)
*
* Copyright notice:
* Copyright (C) 2014 University of Cambridge
*
* Licence:
* This file is part of the NetFPGA 10G development base package.
*
* This file is free code: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License version 2.1 as
* published by the Free Software Foundation.
*
* This package is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with the NetFPGA source package. If not, see
* http://www.gnu.org/licenses/.
*
*/
#include <linux/etherdevice.h>
#include "nf10.h"
#include "nf10_lbuf.h"
#include "nf10_lbuf_api.h"
#include "nf10_user.h"
static struct kmem_cache *desc_cache;
struct lbuf_stats {
u64 tx_lbufs;
u64 tx_bytes;
u32 tx_stops;
u64 rx_mac_timeout;
};
/**
* desc - lbuf descriptor
**/
struct desc {
void *kern_addr;
dma_addr_t dma_addr;
u32 size;
unsigned int tx_prod;
unsigned int tx_prod_pvt;
unsigned int tx_cons;
spinlock_t lock;
};
/**
* lbuf_info - core data structure for lbuf DMA
**/
static struct lbuf_info {
/* associated adapter set by nf10_lbuf_init */
struct nf10_adapter *adapter;
/* NR_SLOT rx lbufs' descriptors */
struct desc *rx_desc[NR_SLOT];
/* kernel tx lbuf descriptor: currently using one tx lbuf for kernel */
struct desc *tx_kern_desc;
/* user tx lbufs' descriptors: its number can be configured by user */
struct desc *tx_user_desc[MAX_TX_USER_LBUF];
/* shared single-page metadata structure between kernel and user */
struct lbuf_user *u;
/* kernel gc address updated in nf10_clean_tx_irq on IRQ:
* last address of tx lbuf that is drained to DMA for flow control */
unsigned long long last_gc_addr;
/* tx completion buffer: slot availablility and hw gc address */
void *tx_completion_kern_addr; /* for sw use */
dma_addr_t tx_completion_dma_addr; /* for hw use */
/* lbuf stats */
struct lbuf_stats stats;
struct device_attribute stat_attr;
} lbuf_info;
#define LBUF_TX_ORDER 10 /* default 4MB */
#define LBUF_TX_SIZE (1UL << (PAGE_SHIFT + LBUF_TX_ORDER))
#define DEFAULT_INTR_PERIOD_USECS 30
#define TX_CLEAN_BUDGET 64
/**
* Accessor/updator macros for primary pointers/stats in lbuf_info:
* use the following macros not accessing/manipulating values directly
* for debugging
* - idx: slot index
* - rx_cons: rx cons pointer in rx lbuf[idx]
* - tx_prod/prod_pvt/cons: tx pointers in kernel tx lbuf
**/
#define rx_idx() (lbuf_info.u->rx_idx)
#define tx_idx() (lbuf_info.u->tx_idx)
#define inc_rx_idx() inc_idx(rx_idx())
#define inc_tx_idx() inc_idx(tx_idx())
#define get_rx_desc(idx) (lbuf_info.rx_desc[idx])
#define set_rx_desc(idx, d) do { lbuf_info.rx_desc[idx] = d; } while(0)
#define cur_rx_desc() get_rx_desc(rx_idx())
#define get_tx_completion(idx) LBUF_TX_COMPLETION(lbuf_info.tx_completion_kern_addr, idx)
#define get_tx_avail(idx) (get_tx_completion(idx) == TX_AVAIL)
#define set_tx_avail(idx) do { get_tx_completion(idx) = TX_AVAIL; } while(0)
#define set_tx_used(idx) do { get_tx_completion(idx) = TX_USED; } while(0)
#define get_rx_cons() (lbuf_info.u->rx_cons)
#define set_rx_cons(v) do { lbuf_info.u->rx_cons = v; } while(0)
#define tx_kern_desc() (lbuf_info.tx_kern_desc)
#define tx_user_desc(ref) (lbuf_info.tx_user_desc[ref])
#define get_tx_prod(d) (d->tx_prod)
#define set_tx_prod(d, v) do { d->tx_prod = v; } while(0)
#define get_tx_prod_pvt(d) (d->tx_prod_pvt)
#define set_tx_prod_pvt(d, v) do { d->tx_prod_pvt = v; } while(0)
#define get_tx_cons(d) (d->tx_cons)
#define set_tx_cons(d, v) do { d->tx_cons = v; } while(0)
#define init_tx_pointers(d) do { set_tx_prod(desc, 0); set_tx_prod_pvt(desc, 0); set_tx_cons(desc, 0); } while(0)
#define tx_clean_completed(d) (get_tx_prod(d) == get_tx_cons(d))
#define tx_pending(d) (get_tx_prod_pvt(d) - get_tx_prod(d) > 0)
#define set_tx_dma_addr(i, v) do { lbuf_info.u->tx_dma_addr[i] = v; } while(0)
#define set_rx_dma_addr(i, v) do { lbuf_info.u->rx_dma_addr[i] = v; } while(0)
#define get_sw_gc_addr() (lbuf_info.last_gc_addr)
#define set_sw_gc_addr(v) do { lbuf_info.last_gc_addr = (unsigned long)v; } while(0)
#define get_sw_user_gc_addr() (lbuf_info.u->last_gc_addr)
#define get_hw_gc_addr() LBUF_GC_ADDR(lbuf_info.tx_completion_kern_addr)
#define addr_in_lbuf(d, addr) (addr > d->dma_addr && addr <= d->dma_addr + d->size)
/**
* Simple profiling macros with rdtsc:
* if you want to use it, adjust NR_TIMESTAMPS depending on the number of
* events to be profiled and enclose the code where you want to measure time
* with START_TIMESTAMP(i) and STOP_TIMESTAMP(i). Each can be reported using
* ELAPSED_CYCLES(i), which is currently included in show_lbuf_stat().
**/
#ifdef CONFIG_PROFILE
/* WARN: note that it does not do bound check for performance */
#define DEFINE_TIMESTAMP(n) u64 _t1, _t2, _total[n] = {0}
#define START_TIMESTAMP(i) rdtscll(_t1)
#define STOP_TIMESTAMP(i) \
do { \
rdtscll(_t2); \
_total[i] += (_t2 - _t1); \
} while(0)
#define ELAPSED_CYCLES(i) (_total[i])
#else
#define DEFINE_TIMESTAMP(n)
#define START_TIMESTAMP(i)
#define STOP_TIMESTAMP(i)
#define ELAPSED_CYCLES(i) (0ULL)
#endif
#define NR_TIMESTAMPS 4
DEFINE_TIMESTAMP(NR_TIMESTAMPS);
static inline void *__alloc_lbuf(struct nf10_adapter *adapter,
struct desc *desc, u32 size)
{
desc->kern_addr = pci_alloc_consistent(adapter->pdev, size,
&desc->dma_addr);
desc->size = size;
init_tx_pointers(desc);
spin_lock_init(&desc->lock);
return desc->kern_addr;
}
static inline void __free_lbuf(struct nf10_adapter *adapter, struct desc *desc)
{
pci_free_consistent(adapter->pdev, desc->size,
desc->kern_addr, desc->dma_addr);
}
static struct desc *alloc_desc(void)
{
return kmem_cache_alloc(desc_cache, GFP_ATOMIC);
}
static void __free_desc(struct desc *desc)
{
kmem_cache_free(desc_cache, desc);
}
/**
* alloc_lbuf - allocate DMA-coherent lbuf with size
* @adapter: associated adapter structure
* @size: requested size
*
* This is the top-level function for allocating a lbuf with size.
*
* Returns desc if allocation is succeeded, NULL otherwise.
**/
static struct desc *alloc_lbuf(struct nf10_adapter *adapter, unsigned long size)
{
struct desc *desc = alloc_desc();
if (unlikely(!desc))
return NULL;
if (unlikely(!__alloc_lbuf(adapter, desc, size))) {
__free_desc(desc);
return NULL;
}
netif_dbg(adapter, drv, default_netdev(adapter),
"%s: addr=(kern=%p:dma=%p)\n", __func__,
desc->kern_addr, (void *)desc->dma_addr);
return desc;
}
/**
* free_lbuf - free desc's lbuf
* @adapter: associated adapter structure
* @desc: lbuf descriptor to be freed
*
* This is the top-level function for freeing a lbuf.
**/
static void free_lbuf(struct nf10_adapter *adapter, struct desc *desc)
{
/* this function is safe with NULL desc */
if (unlikely(desc == NULL))
return;
netif_dbg(adapter, drv, default_netdev(adapter),
"%s: addr=(kern=%p:dma=%p)\n", __func__,
desc->kern_addr, (void *)desc->dma_addr);
__free_lbuf(adapter, desc);
__free_desc(desc);
}
/**
* __enable_irq - do synchronization with hw and enable IRQ
* @adapter: associated adapter structure
*
* This function firstly synchronizes current status of rx and tx with DMA, and
* then enables IRQ. For rx, it lets DMA know the current rx_cons address, and
* for tx, it writes the gc address last seen by kernel. Such synchronization
* is needed to tickle DMA to generate IRQ if sw has not seen up-to-date
* information (i.e., new rx packets and newly drained tx packets)
*
* It is also invoked from user process via poll/select(). When user process
* goes to sleep due to no pending evnet, it enables IRQ to wake up when IRQ is
* delivered. In this case, UF_GC_ADDR_SYNC is set to synchronize user gc
* address.
**/
static void __enable_irq(struct nf10_adapter *adapter)
{
u64 last_rx_dma_addr =
(u64)&DWORD_GET(cur_rx_desc()->dma_addr, get_rx_cons());
/* if requested, user gc address is synchronized */
if (unlikely(adapter->user_flags & UF_GC_ADDR_SYNC)) {
set_sw_gc_addr(get_sw_user_gc_addr());
adapter->user_flags &= ~UF_GC_ADDR_SYNC;
}
/* tx: sync gc address if non-zero */
if (get_sw_gc_addr())
nf10_writeq(adapter, TX_SYNC_REG, get_sw_gc_addr());
/* rx: sync rx_cons address */
nf10_writeq(adapter, RX_SYNC_REG, last_rx_dma_addr);
wmb();
nf10_writel(adapter, IRQ_ENABLE_REG, IRQ_CTRL_VAL);
netif_dbg(adapter, intr, default_netdev(adapter),
"enable_irq (wb=[tx:%p,rx:%p])\n",
(void *)get_sw_gc_addr(), (void *)last_rx_dma_addr);
}
static void __disable_irq(struct nf10_adapter *adapter)
{
nf10_writel(adapter, IRQ_DISABLE_REG, IRQ_CTRL_VAL);
netif_dbg(adapter, intr, default_netdev(adapter), "disable_irq\n");
}
/**
* this structure is extensible when adding any other IRQ control feature.
* if you extend a new control, define IRQ_CTRL_* in nf10.h and add its function
* to the following structure
**/
void (*irq_ctrl_handlers[NR_IRQ_CTRL])(struct nf10_adapter *adapter) = {
[IRQ_CTRL_ENABLE] = __enable_irq,
[IRQ_CTRL_DISABLE] = __disable_irq,
};
static int init_tx_lbufs(struct nf10_adapter *adapter)
{
int i;
/* allocate kernel tx lbuf: currently a single tx lbuf is used
* for kernel-level tx (but it can be extended) */
BUG_ON(tx_kern_desc());
if (!(tx_kern_desc() = alloc_lbuf(adapter, LBUF_TX_SIZE)))
return -ENOMEM;
netif_info(adapter, probe, default_netdev(adapter),
"TX kern lbuf allocated at kern_addr=%p/dma_addr=%p"
" (size=%u bytes)\n", tx_kern_desc()->kern_addr,
(void *)tx_kern_desc()->dma_addr, tx_kern_desc()->size);
/* tx completion DMA-coherent buffer: it's for the availablity of
* each slot and gc address */
lbuf_info.tx_completion_kern_addr =
pci_alloc_consistent(adapter->pdev, TX_COMPLETION_SIZE,
&lbuf_info.tx_completion_dma_addr);
if (lbuf_info.tx_completion_kern_addr == NULL) {
free_lbuf(adapter, tx_kern_desc());
return -ENOMEM;
}
/* make all slots available. this is the only place where software
* sets each slot available at initialization.
* Afterwards slot is made available by DMA, when the slot's PCIe read
* requests are all sent. */
for (i = 0; i < NR_SLOT; i++)
set_tx_avail(i);
/* let DMA know where the tx completion area is allocated */
nf10_writeq(adapter, TX_COMPLETION_ADDR,
lbuf_info.tx_completion_dma_addr);
return 0;
}
/**
* get_tx_user_lbuf - get a tx user lbuf based on ref and size
* @adapter: associated adapter structure
* @ref: reference (index) to tx user lbuf array
* @size: requested size via mmap
*
* This function checks if user tx lbuf with the requested size is avaialble.
* If so, return its PFN, otherwise, allocate a new tx lbuf.
*
* Returns PFN of a tx lbuf of ref and size, but 0 if failed.
**/
static unsigned long get_tx_user_lbuf(struct nf10_adapter *adapter,
int ref, unsigned long size)
{
struct desc *desc;
if (unlikely(ref >= MAX_TX_USER_LBUF)) {
pr_err("%s: ref(=%d) >= %d\n", __func__, ref, MAX_TX_USER_LBUF);
return 0;
}
desc = tx_user_desc(ref);
/* reuse tx buffer if existing lbuf >= requested size */
if (!desc || desc->size < size) {
free_lbuf(adapter, desc);
if ((desc = alloc_lbuf(adapter, size)) == NULL) {
pr_err("%s: failed to allocate tx_user_desc[%d]\n",
__func__, ref);
return 0;
}
tx_user_desc(ref) = desc;
}
set_tx_dma_addr(ref, desc->dma_addr);
return virt_to_phys(desc->kern_addr) >> PAGE_SHIFT;
}
static void put_tx_user_lbuf(struct nf10_adapter *adapter, int ref)
{
if (ref >= MAX_TX_USER_LBUF || !tx_user_desc(ref))
return;
free_lbuf(adapter, tx_user_desc(ref));
tx_user_desc(ref) = NULL;
}
static void free_tx_lbufs(struct nf10_adapter *adapter)
{
int i;
free_lbuf(adapter, tx_kern_desc());
tx_kern_desc() = NULL;
pci_free_consistent(adapter->pdev, TX_COMPLETION_SIZE,
lbuf_info.tx_completion_kern_addr,
lbuf_info.tx_completion_dma_addr);
for (i = 0; i < MAX_TX_USER_LBUF; i++)
put_tx_user_lbuf(adapter, i);
}
/**
* nf10_lbuf_prepare_rx - [user_ops] prepare a rx lbuf to DMA (low-level)
* @adapter: associated adapter structure
* @idx: rx lbuf slot index
*
* This function prepares a rx lbuf to DMA by writing address and readiness of
* the lbuf to the registers of the slot indicated by idx.
**/
static void nf10_lbuf_prepare_rx(struct nf10_adapter *adapter, unsigned long idx)
{
void *kern_addr;
dma_addr_t dma_addr;
struct desc *desc;
/* sanity check due to malicious user-driven preparation */
if (unlikely(idx >= NR_SLOT)) {
pr_err("%s: invalid desc index(=%lu)\n", __func__, idx);
return;
}
desc = get_rx_desc(idx);
if (unlikely(desc->kern_addr == NULL)) {
pr_err("%s: desc->kern_addr is NULL\n", __func__);
return;
}
kern_addr = desc->kern_addr;
dma_addr = desc->dma_addr;
nf10_writeq(adapter, rx_addr_off(idx), dma_addr);
nf10_writel(adapter, rx_stat_off(idx), RX_READY);
netif_dbg(adapter, rx_status, default_netdev(adapter),
"RX lbuf[%lu] is prepared to nf10\n", idx);
}
static void nf10_lbuf_prepare_rx_all(struct nf10_adapter *adapter)
{
unsigned long i;
netif_dbg(adapter, drv, default_netdev(adapter),
"init to prepare all rx descriptors\n");
for (i = 0; i < NR_SLOT; i++)
nf10_lbuf_prepare_rx(adapter, i);
/* initialize rx_cons to LBUF_RX_RESERVED_DWORDS, a start point for data */
set_rx_cons(LBUF_RX_RESERVED_DWORDS);
}
static void free_rx_lbufs(struct nf10_adapter *adapter)
{
int i;
for (i = 0; i < NR_SLOT; i++) {
struct desc *desc = get_rx_desc(i);
if (desc) {
netif_info(adapter, drv, default_netdev(adapter),
"RX lbuf[%d] is freed from kern_addr=%p",
i, desc->kern_addr);
free_lbuf(adapter, desc);
set_rx_desc(i, NULL);
}
}
}
/**
* init_rx_lbufs - allocate and prepare rx lbufs
* @adapter: associated adapter structure
*
* This function allocates NR_SLOT rx lbufs and prepare them to DMA.
**/
static int init_rx_lbufs(struct nf10_adapter *adapter)
{
int i;
for (i = 0; i < NR_SLOT; i++) {
/* RX desc is normally allocated once and used permanently
* unlike RX lbuf */
struct desc *desc;
BUG_ON(get_rx_desc(i)); /* ensure unused desc is NULL */
desc = alloc_lbuf(adapter, LBUF_RX_SIZE);
if (unlikely(!desc))
goto alloc_fail;
set_rx_desc(i, desc);
set_rx_dma_addr(i, desc->dma_addr);
netif_info(adapter, probe, default_netdev(adapter),
"RX lbuf[%d] allocated at kern_addr=%p/dma_addr=%p"
" (size=%u bytes)\n", i,
desc->kern_addr, (void *)desc->dma_addr, desc->size);
}
nf10_lbuf_prepare_rx_all(adapter);
return 0;
alloc_fail:
free_rx_lbufs(adapter);
return -ENOMEM;
}
static ssize_t show_lbuf_stat(struct device *dev,
struct device_attribute *attr, char *buf)
{
int i;
struct lbuf_info *info = container_of(attr, struct lbuf_info,
stat_attr);
struct lbuf_stats *stats = &info->stats;
unsigned long rx_bytes = 0;
sprintf(buf, "tx_lbufs=%llu\ntx_bytes=%llu\ntx_avg_bytes=%llu\n",
stats->tx_lbufs, stats->tx_bytes,
stats->tx_lbufs ? stats->tx_bytes / stats->tx_lbufs : 0);
sprintf(buf + strlen(buf), "tx_stops=%u\n", stats->tx_stops);
sprintf(buf + strlen(buf), "rx_mac_timeout=%llu\n",
stats->rx_mac_timeout);
for (i = 0; i < CONFIG_NR_PORTS; i++)
rx_bytes += info->adapter->netdev[i]->stats.rx_bytes;
if (rx_bytes > 0) {
sprintf(buf + strlen(buf), "rx_cycles_per_KB rx_alloc=%llu"
" copy=%llu zero=%llu stack=%llu\n",
(ELAPSED_CYCLES(0) << 10) / rx_bytes,
(ELAPSED_CYCLES(1) << 10) / rx_bytes,
(ELAPSED_CYCLES(2) << 10) / rx_bytes,
(ELAPSED_CYCLES(3) << 10) / rx_bytes);
}
return strlen(buf);
}
static ssize_t init_lbuf_stat(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct lbuf_info *info = container_of(attr, struct lbuf_info,
stat_attr);
memset(&info->stats, 0, sizeof(info->stats));
return count;
}
/**
* nf10_lbuf_get_pfn - [user_ops] return PFN of kernel page by mmap index
* @adapter: associated adapter structure
* @size: lbuf size requested by user process via mmap
*
* This function is called by mmap from user process to map metadata and lbuf
* pages allocated in kernel. size is used for sanity check.
* Corresponding page is decided by adapter->nr_user_mmap, which is incremented
* if mmap is sucessfully done. The current mapping is
* nr_user_mmap == 0: general metadata page
* == 1: metadata for tx completion (slot availability, gc address)
* == 2-(2+NR_SLOT-1): rx lbufs
* == (2+NR_SLOT)- : tx lbufs (variable and guided by user process)
**/
static unsigned long nf10_lbuf_get_pfn(struct nf10_adapter *adapter,
unsigned long size)
{
unsigned int idx = adapter->nr_user_mmap;
unsigned long pfn = 0; /* 0 means error */
if (idx == 0) { /* general metadata page */
pfn = virt_to_phys(lbuf_info.u) >> PAGE_SHIFT;
netif_info(adapter, drv, default_netdev(adapter),
"%s: [%u] DMA metadata page (pfn=%lx)\n",
__func__, idx, pfn);
}
else if (idx == 1) { /* metadata for tx completion page */
void *addr = lbuf_info.tx_completion_kern_addr;
pfn = virt_to_phys(addr) >> PAGE_SHIFT;
netif_info(adapter, drv, default_netdev(adapter),
"%s: [%u] DMA tx completion area (pfn=%lx)\n",
__func__, idx, pfn);
}
else { /* rx/tx data pages */
idx -= 2; /* adjust index to data */
if (idx < NR_SLOT && size == LBUF_RX_SIZE) /* rx */
pfn = get_rx_desc(idx)->dma_addr >> PAGE_SHIFT;
else if (idx >= NR_SLOT && size >= MIN_TX_USER_LBUF_SIZE &&
size <= MAX_TX_USER_LBUF_SIZE)
pfn = get_tx_user_lbuf(adapter, idx - NR_SLOT, size);
netif_info(adapter, drv, default_netdev(adapter),
"%s: [%u] data page (pfn=%lx size=%lu)\n",
__func__, adapter->nr_user_mmap, pfn, size);
}
return pfn;
}
/**
* nf10_lbuf_user_xmit - [user_ops] transmit user tx lbuf
* @adapter: associated adapter structure
* @arg: metadata including reference id and legnth of user tx lbuf
*
* This function is called by user process via ioctl (in nf10_user.c)
**/
static u32 lbuf_xmit(struct nf10_adapter *adapter, struct desc *desc);
static int nf10_lbuf_user_xmit(struct nf10_adapter *adapter, unsigned long arg)
{
struct desc *desc;
u32 ref = XMIT_REF(arg);
u32 len = XMIT_LEN(arg);
netif_dbg(adapter, drv, default_netdev(adapter),
"user_xmit: ref=%u len=%u arg=%lx\n", ref, len, arg);
if (unlikely(ref >= MAX_TX_USER_LBUF)) {
pr_err("%s: Error invalid ref %u >= %d\n",
__func__, ref, MAX_TX_USER_LBUF);
return -EINVAL;
}
desc = tx_user_desc(ref);
if (unlikely(desc == NULL)) {
pr_err("%s: Error tx_user_lbufs[%d] is NULL\n", __func__, ref);
return -EINVAL;
}
/* no need to acquire desc->lock, since this lbuf is ensured not to
* have been transmitted to hardware. user tx lbuf is used one-shot,
* so prod (=0) to prod_pvt (=len) represents pending data before
* lbuf_xmit, while cons is 0, since it's not consumed */
set_tx_prod(desc, 0);
set_tx_prod_pvt(desc, len);
set_tx_cons(desc, 0);
lbuf_xmit(adapter, desc);
return 0;
}
static struct nf10_user_ops lbuf_user_ops = {
.get_pfn = nf10_lbuf_get_pfn,
.prepare_rx_buffer = nf10_lbuf_prepare_rx,
.start_xmit = nf10_lbuf_user_xmit,
};
/**
* nf10_lbuf_set_irq_period - [hw_ops] set IRQ period to DMA
* @adapter: associated adapter structure
*
* The period to be set should be updated first in adapter->irq_period_usecs
**/
static int nf10_lbuf_set_irq_period(struct nf10_adapter *adapter)
{
nf10_writel(adapter, IRQ_PERIOD_REG,
adapter->irq_period_usecs * 1000 /* ns */);
netif_info(adapter, probe, default_netdev(adapter),
"%u us is set as irq period\n", adapter->irq_period_usecs);
return 0;
}
/**
* nf10_lbuf_init - [hw_ops] init lbuf DMA
* @adapter: associated adapter structure
**/
static int nf10_lbuf_init(struct nf10_adapter *adapter)
{
int err;
/* create desc pool */
desc_cache = kmem_cache_create("lbuf_desc",
sizeof(struct desc),
__alignof__(struct desc),
0, NULL);
if (desc_cache == NULL) {
pr_err("failed to alloc desc_cache\n");
return -ENOMEM;
}
/* init lbuf user-visiable single-page space for metadata */
if ((lbuf_info.u =
(struct lbuf_user *)get_zeroed_page(GFP_KERNEL)) == NULL) {
netif_err(adapter, rx_err, default_netdev(adapter),
"failed to alloc lbuf user page\n");
kmem_cache_destroy(desc_cache);
return -ENOMEM;
}
lbuf_info.adapter = adapter;
adapter->user_ops = &lbuf_user_ops;
adapter->irq_period_usecs = DEFAULT_INTR_PERIOD_USECS;
nf10_lbuf_set_irq_period(adapter);
/* create a device file to show lbuf stats */
lbuf_info.stat_attr.attr.name = "lbuf_stat";
lbuf_info.stat_attr.attr.mode = S_IRUGO | S_IWUSR;
lbuf_info.stat_attr.show = show_lbuf_stat;
lbuf_info.stat_attr.store = init_lbuf_stat;
sysfs_attr_init(&lbuf_info.stat_attr.attr);
err = device_create_file(&adapter->pdev->dev, &lbuf_info.stat_attr);
if (err)
pr_warn("failed to create file for lbuf_stat\n");
return 0;
}
/**
* nf10_lbuf_free - [hw_ops] free lbuf DMA
* @adapter: associated adapter structure
**/
static void nf10_lbuf_free(struct nf10_adapter *adapter)
{
kmem_cache_destroy(desc_cache);
device_remove_file(&adapter->pdev->dev, &lbuf_info.stat_attr);
}
/**
* nf10_lbuf_init_buffers - [hw_ops] init tx and rx lbufs
* @adapter: associated adapter structure
**/
static int nf10_lbuf_init_buffers(struct nf10_adapter *adapter)
{
int err = 0;
if ((err = init_tx_lbufs(adapter)))
return err;
if ((err = init_rx_lbufs(adapter)))
free_tx_lbufs(adapter);
return err;
}
/**
* nf10_lbuf_free_buffers - [hw_ops] free tx and rx lbufs
* @adapter: associated adapter structure
**/
static void nf10_lbuf_free_buffers(struct nf10_adapter *adapter)
{
free_tx_lbufs(adapter);
free_rx_lbufs(adapter);
}
/**
* move_to_next_lbuf - re-prepare current lbuf and switch to next lbuf
* @adapter: associated adapter structure
*
* This function prepares the current closed lbuf and increment rx lbuf slot
* index. In deliver_packet, all the consumed area had been zeroed, so just
* need to initialize lbuf header before preparation. rx_cons is initialized
* to LBUF_RX_RESERVED_DWORDS, the start point for packet data.
**/
static void move_to_next_lbuf(struct nf10_adapter *adapter)
{
netif_dbg(adapter, rx_status, default_netdev(adapter),
"%s: rx_idx=%u\n", __func__, rx_idx());
LBUF_RX_INIT_HEADER(cur_rx_desc()->kern_addr);
wmb();
nf10_lbuf_prepare_rx(adapter, (unsigned long)rx_idx());
inc_rx_idx();
set_rx_cons(LBUF_RX_RESERVED_DWORDS);
}
/**
* deliver_packet - deliver a packet from lbuf to kernel protocol layer
* @netdev: net device passed from a packet
* @pkt_addr: address of the packet to be delivered from lbuf
* @pkt_len: length of the packet
* @pskb: allocated skb to convey the packet (*pskb becomes NULL after delivery)
* @work_done: # of delivered packets for NAPI (incremented after delivery)
*
* Since rx lbuf is permanently used, it should be zeroed before preparing to
* DMA. Currently, zeroing is done right after a pakcet is copied to skb data.
* An alternative is that zeroing before preparation.
**/
static void deliver_packet(struct net_device *netdev, void *pkt_addr,
unsigned int pkt_len, struct sk_buff **pskb, int *work_done)
{
struct nf10_adapter *adapter = netdev_adapter(netdev);
struct sk_buff *skb = *pskb;
/* interface is down, skip it */
if (unlikely(netdev_port_up(netdev) == 0))
return;
START_TIMESTAMP(1);
skb_copy_to_linear_data(skb, pkt_addr, pkt_len);
STOP_TIMESTAMP(1);
START_TIMESTAMP(2);
memset(pkt_addr - LBUF_TX_METADATA_SIZE, 0,
ALIGN(pkt_len, 8) + LBUF_TX_METADATA_SIZE);
STOP_TIMESTAMP(2);
START_TIMESTAMP(3);
skb_put(skb, pkt_len);
skb->protocol = eth_type_trans(skb, netdev);
skb->ip_summed = CHECKSUM_NONE;
napi_gro_receive(&adapter->napi, skb);
STOP_TIMESTAMP(3);
netdev->stats.rx_packets++;
netdev->stats.rx_bytes += pkt_len;
(*work_done)++;
(*pskb) = NULL;
}
/**
* nf10_lbuf_process_rx_irq - [hw_ops] process received packets from rx lbuf
* @adapter: associated adapter structure
* @work_done: # of packets handled in the function and returned to NAPI loop
* @budget: NAPI budget of packets that are allowed to consumed in a call
*
* This function scans the current rx lbuf, extracts each packet received, and
* passes it to upper layer using skb. In lbuf DMA, packets received from MAC
* core are written to a rx lbuf in a compact way, hence currently variable sized
* area for each packet. To deal with such variable size, the lbuf should be
* prepared as a zeroed lbuf making software to decide each packet is completely
* received, so that it can be safely passed to upper layer.
* See README for the details.
**/
static void nf10_lbuf_process_rx_irq(struct nf10_adapter *adapter,
int *work_done, int budget)
{
void *buf_addr;
unsigned int dword_idx, next_dword_idx;
struct sk_buff *skb;
int port_num;
void *pkt_addr;
unsigned int pkt_len, next_pkt_len;
struct net_device *netdev = NULL;
union lbuf_header lh;
do {
skb = NULL;
buf_addr = cur_rx_desc()->kern_addr;
/* rx cons pointer is maintained in dword unit */
dword_idx = get_rx_cons();
pkt_len = LBUF_RX_PKT_LEN(buf_addr, dword_idx);
port_num = LBUF_PKT_PORT_NUM(buf_addr, dword_idx);
/* if the current packet length is zero, two cases are possible:
* 1) no more packet has arrived
* 2) this lbuf has no space to receive (so-called lbuf closed)
*/
if (pkt_len == 0) {
/* if this lbuf is closed, move to next lbuf */
LBUF_RX_GET_HEADER(buf_addr, lh);
if (LBUF_RX_CLOSED(dword_idx, lh)) {
move_to_next_lbuf(adapter);
continue;
}
/* Now make sure no packet has arrived, exit the loop */
break;
}
/* BUG if the fetched port number or packet length is invalid.
* if so, sync between sw and hw is likely to be lost */
if (unlikely(!LBUF_IS_PKT_VALID(port_num, pkt_len))) {
netdev = LBUF_IS_PORT_VALID(port_num) ?
adapter->netdev[port_num] :
default_netdev(adapter);
netif_err(adapter, rx_err, netdev,
"Error: invalid packet "
"(port_num=%d, len=%u at rx_idx=%d lbuf[%u])",
port_num, pkt_len, rx_idx(), dword_idx);
/* For DMA hardware debugging, some contents of previous
* and next packets are dumped */
print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_NONE, 16,1,
(u32 *)buf_addr + (dword_idx - 32), 128, true);
printk("-this packet ------------------------------\n");
print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_NONE, 16,1,
(u32 *)buf_addr + dword_idx, 128, true);
/* XXX: user_flags is not meant for it, but in this
* exceptional case (RX hang), we disable IRQ for good
* not to indefinitely generate IRQ and this report.
* Anyway, this is unrecoverable situation */
adapter->user_flags |= UF_IRQ_DISABLED;
break;
}
/* Now, pkt_len > 0,
* meaning the current packet starts being received.
* First, check if user process is running for rx */
if (nf10_user_callback(adapter, 1)) {
/* if user process takes it, work_done 0 lets
* NAPI loop stop */
*work_done = 0;
return;
}
netdev = adapter->netdev[port_num];
if (unlikely(!skb)) { /* skb becomes NULL if delieved */
START_TIMESTAMP(0);
skb = netdev_alloc_skb_ip_align(netdev, pkt_len);
STOP_TIMESTAMP(0);
if (unlikely(!skb)) {
netif_err(adapter, rx_err, netdev,
"failed to alloc skb (l=%u)", pkt_len);
break;
}
}
pkt_addr = LBUF_RX_PKT_ADDR(buf_addr, dword_idx);
next_dword_idx = LBUF_RX_NEXT_DWORD_IDX(dword_idx, pkt_len);
wait_to_end_recv:
/* lbuf rx engine uses the value of the length of next packet
* to determine if the current packet is completely received */
next_pkt_len = LBUF_RX_PKT_LEN(buf_addr, next_dword_idx);
if (next_pkt_len > 0) {
/* if next packet length is non-zero, the current packet
* is received entirely, so deliver this to kernel */
deliver_packet(netdev, pkt_addr, pkt_len,
&skb, work_done);
set_rx_cons(next_dword_idx);
}
else { /* next_pkt_len == 0 */
/* if next pakcet length is zero, three cases:
* 1) the current packet reception is in progress
* 2) this lbuf is closed due to insufficient space
* 3) MAC timeout occurs, so DMA jumped to 128B-aligned
*/
LBUF_RX_GET_HEADER(buf_addr, lh);
/* lazy update: rx_dropped is eventually accurate */
netdev->stats.rx_dropped = lh.nr_drops;
/* using nr_qwords in lbuf header to know if 1) is true
* if nr_qwords < next qword index, 1) is met,
* so keep waiting for the current packet */
if ((lh.nr_qwords << 1) <
next_dword_idx - LBUF_RX_RESERVED_DWORDS)
goto wait_to_end_recv;
/* if nr_qwords >= next qword index
* the entire packet is received, consume it */
deliver_packet(netdev, pkt_addr, pkt_len,
&skb, work_done);
/* check if the lbuf is closed -> 2) is true */
if (LBUF_RX_CLOSED(next_dword_idx, lh)) {
move_to_next_lbuf(adapter);
continue;
}
/* now would make sure that only 3) is left, but next
* packet length may become non-zero, which means
* a following packet triggers MAC time out. So, before
* asserting MAC timeout occurance, check again */
next_pkt_len = LBUF_RX_PKT_LEN(buf_addr, next_dword_idx);
if (next_pkt_len == 0) {
/* MAC timeout, DMA has jumped to 128-aligned
* address for the next packet */
next_dword_idx = LBUF_RX_128B_ALIGN(next_dword_idx);
lbuf_info.stats.rx_mac_timeout++;
}
set_rx_cons(next_dword_idx);
}
/* check if next_dword_idx exceeds lbuf */
if (get_rx_cons() >= (cur_rx_desc()->size >> 2))
move_to_next_lbuf(adapter);
} while(*work_done < budget);
netif_dbg(adapter, rx_status, default_netdev(adapter),
"loop exit: i=%u n=%d rx=%lu\n", dword_idx, *work_done,
likely(netdev) ? netdev->stats.rx_packets : 0);
}
/**
* lbuf_xmit - send pending data in lbuf to hardware
* @adapter: associated adapter structure
* @desc: lbuf descriptor to be transmitted
*
* This function flushes pending data (from prod to prod_pvt) from lbuf
* to hardware by writing address and length to tx doorbell register.
* Since there are multiple doorbell slots, slot index is claimed first
* and write to the doorbell registers on the claimed slot.
* Before touching the doorbell registers, a few pointer manipulation
* with desc->lock being held is needed.
* 1) Once any pending data is sent, next prod must be aligned with 4KB.
* 2) If the next prod reaches the end of lbuf, it must be wrapped around to 0.
* 3) Since pending data to prod_pvt is sent, prod becomes the same as prod_pvt.
*
* Returns sent bytes
**/
static u32 lbuf_xmit(struct nf10_adapter *adapter, struct desc *desc)
{
u32 idx;
u32 nr_qwords;
u32 prod, next_prod;
u32 prod_pvt;
u32 bytes_to_send;
dma_addr_t dma_addr;
spin_lock_bh(&desc->lock);
/* get the current slot index, prod, prod_pvt */
idx = tx_idx();
prod = get_tx_prod(desc);
prod_pvt = get_tx_prod_pvt(desc);
/* if the current slot is unavailable or no pending data exists
* return zero byte */
if (!get_tx_avail(idx) || prod == prod_pvt) {
spin_unlock_bh(&desc->lock);
return 0;
}
/* before making prod and prod_pvt the same, let prod_pvt be aligned
* with 4KB, which is required by hardware */
next_prod = ALIGN(prod_pvt, 4096);
/* wrap around if reaching the end of lbuf */
if (unlikely(next_prod == desc->size))
next_prod = 0;
/* synchronize prod and prod_pvt with the same next pointer */
set_tx_prod(desc, next_prod);
set_tx_prod_pvt(desc, next_prod);
/* claim the current slot */
set_tx_used(idx);