forked from tempesta-tech/tempesta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
linux-5.10.35.patch
2665 lines (2522 loc) · 76.5 KB
/
linux-5.10.35.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 26bfe7ae7..35390651e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5259,6 +5259,12 @@
tdfx= [HW,DRM]
+ tempesta_dbmem= [KNL]
+ Order of 2MB memory blocks reserved on each NUMA node
+ for Tempesta database. Huge pages are used if
+ possible. Minimum value to start Tempesta is 4 (32MB).
+ Default is 8, i.e. 512MB is reserved.
+
test_suspend= [SUSPEND][,N]
Specify "mem" (for Suspend-to-RAM) or "standby" (for
standby suspend) or "freeze" (for suspend type freeze)
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 38f493604..4c244d605 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -24,6 +24,10 @@
#define KFPU_387 _BITUL(0) /* 387 state will be initialized */
#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */
+#ifdef CONFIG_SECURITY_TEMPESTA
+extern void __kernel_fpu_begin_mask(unsigned int kfpu_mask);
+extern void __kernel_fpu_end_bh(void);
+#endif
extern void kernel_fpu_begin_mask(unsigned int kfpu_mask);
extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 571220ac8..a7ce7c357 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -76,6 +76,10 @@ static bool interrupted_user_mode(void)
*/
bool irq_fpu_usable(void)
{
+#ifdef CONFIG_SECURITY_TEMPESTA
+ if (likely(in_serving_softirq()))
+ return true;
+#endif
return !in_interrupt() ||
interrupted_user_mode() ||
interrupted_kernel_fpu_idle();
@@ -121,10 +125,8 @@ int copy_fpregs_to_fpstate(struct fpu *fpu)
}
EXPORT_SYMBOL(copy_fpregs_to_fpstate);
-void kernel_fpu_begin_mask(unsigned int kfpu_mask)
+void __kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
- preempt_disable();
-
WARN_ON_FPU(!irq_fpu_usable());
WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
@@ -148,14 +150,46 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask)
if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU))
asm volatile ("fninit");
}
+
+void kernel_fpu_begin_mask(unsigned int kfpu_mask)
+{
+#ifdef CONFIG_SECURITY_TEMPESTA
+ /* SoftIRQ in the Tempesta kernel always enables FPU. */
+ if (likely(in_serving_softirq()))
+ return;
+
+ /*
+ * We don't know in which context the function is called, but we know
+ * preciseely that softirq uses FPU, so we have to disable softirq as
+ * well as task preemption.
+ */
+ local_bh_disable();
+#endif
+ preempt_disable();
+
+ __kernel_fpu_begin_mask(kfpu_mask);
+}
EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
-void kernel_fpu_end(void)
+void __kernel_fpu_end_bh(void)
{
WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
this_cpu_write(in_kernel_fpu, false);
+}
+
+void kernel_fpu_end(void)
+{
+#ifdef CONFIG_SECURITY_TEMPESTA
+ if (likely(in_serving_softirq()))
+ return;
+#endif
+ __kernel_fpu_end_bh();
+
preempt_enable();
+#ifdef CONFIG_SECURITY_TEMPESTA
+ local_bh_enable();
+#endif
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
diff --git a/crypto/aead.c b/crypto/aead.c
index 169910952..a3f0aeca1 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -217,6 +217,24 @@ struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask)
}
EXPORT_SYMBOL_GPL(crypto_alloc_aead);
+#ifdef CONFIG_SECURITY_TEMPESTA
+struct crypto_alg *
+crypto_find_aead(const char *alg_name, u32 type, u32 mask)
+{
+ return crypto_find_alg(alg_name, &crypto_aead_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_find_aead);
+
+struct crypto_aead *
+crypto_alloc_aead_atomic(struct crypto_alg *alg)
+{
+ alg = crypto_mod_get(alg);
+ BUG_ON(!alg);
+ return crypto_create_tfm(alg, &crypto_aead_type);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_aead_atomic);
+#endif
+
static int aead_prepare_alg(struct aead_alg *alg)
{
struct crypto_alg *base = &alg->base;
diff --git a/crypto/ahash.c b/crypto/ahash.c
index c2ca631a1..c49313c1a 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -559,6 +559,25 @@ struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type,
}
EXPORT_SYMBOL_GPL(crypto_alloc_ahash);
+#ifdef CONFIG_SECURITY_TEMPESTA
+/* Asynch hash is required by GHASH used in GCM. */
+struct crypto_alg *
+crypto_find_ahash(const char *alg_name, u32 type, u32 mask)
+{
+ return crypto_find_alg(alg_name, &crypto_ahash_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_find_ahash);
+
+struct crypto_ahash *
+crypto_alloc_ahash_atomic(struct crypto_alg *alg)
+{
+ alg = crypto_mod_get(alg);
+ BUG_ON(!alg);
+ return crypto_create_tfm(alg, &crypto_ahash_type);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_ahash_atomic);
+#endif
+
int crypto_has_ahash(const char *alg_name, u32 type, u32 mask)
{
return crypto_type_has_alg(alg_name, &crypto_ahash_type, type, mask);
diff --git a/crypto/api.c b/crypto/api.c
index ed08cbd5b..17b5789d4 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -446,7 +446,11 @@ void *crypto_create_tfm_node(struct crypto_alg *alg,
tfmsize = frontend->tfmsize;
total = tfmsize + sizeof(*tfm) + frontend->extsize(alg);
+#ifdef CONFIG_SECURITY_TEMPESTA
+ mem = kzalloc_node(total, GFP_ATOMIC, node);
+#else
mem = kzalloc_node(total, GFP_KERNEL, node);
+#endif
if (mem == NULL)
goto out_err;
@@ -480,6 +484,9 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
const struct crypto_type *frontend,
u32 type, u32 mask)
{
+ /* The function is slow and preemptable to be called in softirq. */
+ WARN_ON_ONCE(in_serving_softirq());
+
if (frontend) {
type &= frontend->maskclear;
mask &= frontend->maskclear;
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index a1bea0f4b..29515a891 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -27,6 +27,8 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
+#include "internal.h"
+
static unsigned int cryptd_max_cpu_qlen = 1000;
module_param(cryptd_max_cpu_qlen, uint, 0);
MODULE_PARM_DESC(cryptd_max_cpu_qlen, "Set cryptd Max queue depth");
@@ -901,6 +903,75 @@ static struct crypto_template cryptd_tmpl = {
.module = THIS_MODULE,
};
+#ifdef CONFIG_SECURITY_TEMPESTA
+
+#define MAX_CACHED_ALG_COUNT 8
+struct alg_cache {
+ int n;
+ spinlock_t lock;
+ struct {
+ u32 type;
+ u32 mask;
+ struct crypto_alg *alg;
+ char alg_name[CRYPTO_MAX_ALG_NAME];
+ } a[MAX_CACHED_ALG_COUNT];
+};
+
+static struct alg_cache skcipher_alg_cache;
+static struct alg_cache ahash_alg_cache;
+static struct alg_cache aead_alg_cache;
+
+/*
+ * Finds a previously allocated algorithm or allocates a new one. In any case,
+ * returned alg holds at least one reference to its module.
+ */
+static struct crypto_alg *
+cryptd_find_alg_cached(const char *cryptd_alg_name, u32 type, u32 mask,
+ struct crypto_alg *(*find_alg)(const char *, u32, u32),
+ struct alg_cache *__restrict ac)
+{
+ struct crypto_alg *alg;
+ int k;
+
+ spin_lock(&ac->lock);
+ for (k = 0; k < ac->n; k++) {
+ if (strcmp(ac->a[k].alg_name, cryptd_alg_name) == 0
+ && ac->a[k].type == type && ac->a[k].mask == mask)
+ {
+ spin_unlock(&ac->lock);
+ return ac->a[k].alg;
+ }
+ }
+ spin_unlock(&ac->lock);
+
+ /* Searching for the algorithm may sleep, so warn about it. */
+ WARN_ON_ONCE(in_serving_softirq());
+
+ alg = find_alg(cryptd_alg_name, type, mask);
+ if (IS_ERR(alg))
+ return alg;
+
+ spin_lock(&ac->lock);
+ if (ac->n >= MAX_CACHED_ALG_COUNT) {
+ spin_unlock(&ac->lock);
+ BUG();
+ return ERR_PTR(-ENOMEM);
+ }
+
+ snprintf(ac->a[ac->n].alg_name, sizeof(ac->a[ac->n].alg_name), "%s",
+ cryptd_alg_name);
+
+ ac->a[ac->n].type = type;
+ ac->a[ac->n].mask = mask;
+ ac->a[ac->n].alg = alg;
+
+ ac->n += 1;
+ spin_unlock(&ac->lock);
+
+ return alg;
+}
+#endif /* CONFIG_SECURITY_TEMPESTA */
+
struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name,
u32 type, u32 mask)
{
@@ -912,7 +983,20 @@ struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name,
"cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
return ERR_PTR(-EINVAL);
+#ifdef CONFIG_SECURITY_TEMPESTA
+ {
+ struct crypto_alg *alg =
+ cryptd_find_alg_cached(cryptd_alg_name, type, mask,
+ crypto_find_skcipher,
+ &skcipher_alg_cache);
+ if (IS_ERR(alg))
+ return (struct cryptd_skcipher *)alg;
+
+ tfm = crypto_alloc_skcipher_atomic(alg);
+ }
+#else
tfm = crypto_alloc_skcipher(cryptd_alg_name, type, mask);
+#endif
if (IS_ERR(tfm))
return ERR_CAST(tfm);
@@ -963,7 +1047,21 @@ struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
"cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
return ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_SECURITY_TEMPESTA
+ {
+ struct crypto_alg *alg =
+ cryptd_find_alg_cached(cryptd_alg_name, type, mask,
+ crypto_find_ahash,
+ &ahash_alg_cache);
+ if (IS_ERR(alg))
+ return (struct cryptd_ahash *)alg;
+
+ tfm = crypto_alloc_ahash_atomic(alg);
+ }
+#else
tfm = crypto_alloc_ahash(cryptd_alg_name, type, mask);
+#endif
if (IS_ERR(tfm))
return ERR_CAST(tfm);
if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
@@ -1020,7 +1118,21 @@ struct cryptd_aead *cryptd_alloc_aead(const char *alg_name,
if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
"cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
return ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_SECURITY_TEMPESTA
+ {
+ struct crypto_alg *alg =
+ cryptd_find_alg_cached(cryptd_alg_name, type, mask,
+ crypto_find_aead,
+ &aead_alg_cache);
+ if (IS_ERR(alg))
+ return (struct cryptd_aead *)alg;
+
+ tfm = crypto_alloc_aead_atomic(alg);
+ }
+#else
tfm = crypto_alloc_aead(cryptd_alg_name, type, mask);
+#endif
if (IS_ERR(tfm))
return ERR_CAST(tfm);
if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
diff --git a/crypto/shash.c b/crypto/shash.c
index 2e3433ad9..bc9c26dfe 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -509,6 +509,24 @@ struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
}
EXPORT_SYMBOL_GPL(crypto_alloc_shash);
+#ifdef CONFIG_SECURITY_TEMPESTA
+struct crypto_alg *
+crypto_find_shash(const char *alg_name, u32 type, u32 mask)
+{
+ return crypto_find_alg(alg_name, &crypto_shash_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_find_shash);
+
+struct crypto_shash *
+crypto_alloc_shash_atomic(struct crypto_alg *alg)
+{
+ alg = crypto_mod_get(alg);
+ BUG_ON(!alg);
+ return crypto_create_tfm(alg, &crypto_shash_type);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_shash_atomic);
+#endif
+
static int shash_prepare_alg(struct shash_alg *alg)
{
struct crypto_alg *base = &alg->base;
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index b4dae640d..1b6d4a669 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -762,6 +762,24 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
}
EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
+#ifdef CONFIG_SECURITY_TEMPESTA
+struct crypto_alg *
+crypto_find_skcipher(const char *alg_name, u32 type, u32 mask)
+{
+ return crypto_find_alg(alg_name, &crypto_skcipher_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_find_skcipher);
+
+struct crypto_skcipher *
+crypto_alloc_skcipher_atomic(struct crypto_alg *alg)
+{
+ alg = crypto_mod_get(alg);
+ BUG_ON(!alg);
+ return crypto_create_tfm(alg, &crypto_skcipher_type);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_skcipher_atomic);
+#endif
+
struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
const char *alg_name, u32 type, u32 mask)
{
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index c32a6f566..5fe1addcc 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -177,6 +177,11 @@ static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm)
*/
struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask);
+#ifdef CONFIG_SECURITY_TEMPESTA
+struct crypto_alg *crypto_find_aead(const char *alg_name, u32 type, u32 mask);
+struct crypto_aead *crypto_alloc_aead_atomic(struct crypto_alg *alg);
+#endif
+
static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm)
{
return &tfm->base;
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 13f8a6a54..bba113f6d 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -273,6 +273,11 @@ static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm)
struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type,
u32 mask);
+#ifdef CONFIG_SECURITY_TEMPESTA
+struct crypto_alg *crypto_find_ahash(const char *alg_name, u32 type, u32 mask);
+struct crypto_ahash *crypto_alloc_ahash_atomic(struct crypto_alg *alg);
+#endif
+
static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm)
{
return &tfm->base;
@@ -716,6 +721,11 @@ static inline void ahash_request_set_crypt(struct ahash_request *req,
struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
u32 mask);
+#ifdef CONFIG_SECURITY_TEMPESTA
+struct crypto_alg *crypto_find_shash(const char *alg_name, u32 type, u32 mask);
+struct crypto_shash *crypto_alloc_shash_atomic(struct crypto_alg *alg);
+#endif
+
static inline struct crypto_tfm *crypto_shash_tfm(struct crypto_shash *tfm)
{
return &tfm->base;
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index 6a733b171..d7e354ab2 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -187,6 +187,12 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(const char *alg_name,
u32 type, u32 mask);
+#ifdef CONFIG_SECURITY_TEMPESTA
+struct crypto_alg *crypto_find_skcipher(const char *alg_name, u32 type,
+ u32 mask);
+struct crypto_skcipher *crypto_alloc_skcipher_atomic(struct crypto_alg *alg);
+#endif
+
static inline struct crypto_tfm *crypto_skcipher_tfm(
struct crypto_skcipher *tfm)
{
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ee8299eb1..da02a1d9d 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -524,13 +524,13 @@ extern bool force_irqthreads;
tasklets are more than enough. F.e. all serial device BHs et
al. should be converted to tasklets, not to softirqs.
*/
-
+/* Tempesta: process RX before TX to proxy traffic in one softirq shot. */
enum
{
HI_SOFTIRQ=0,
TIMER_SOFTIRQ,
- NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
+ NET_TX_SOFTIRQ,
BLOCK_SOFTIRQ,
IRQ_POLL_SOFTIRQ,
TASKLET_SOFTIRQ,
@@ -574,7 +574,7 @@ extern void softirq_init(void);
extern void __raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq_irqoff(unsigned int nr);
-extern void raise_softirq(unsigned int nr);
+void raise_softirq(unsigned int nr);
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
diff --git a/include/linux/net.h b/include/linux/net.h
index 0dcd51fee..9a09576a8 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -215,6 +215,8 @@ struct net_proto_family {
struct module *owner;
};
+extern const struct net_proto_family *get_proto_family(int family);
+
struct iovec;
struct kvec;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e37480b5f..617f4e76b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -154,11 +154,32 @@ static inline bool dev_xmit_complete(int rc)
# define LL_MAX_HEADER 32
#endif
+#ifdef CONFIG_SECURITY_TEMPESTA
+/*
+ * For Tempesta case the most traffic is TLS encrypted, so we need the extra
+ * room for TLS record header and explicit IV on skb allocation to avoid data
+ * movement on tcp_write_xmit(). Not all skbs have TLS headers - not a big deal
+ * to allocate 16 more bytes (5 - TLS header, 8 - IV, 3 - alignment).
+ */
+#define TLS_MAX_HDR 16
+/*
+ * For fast transformation of HTTP/1.1 responses into HTTP/2 format, Tempesta
+ * uses zero-copy in-place rewriting of the response data, right in original
+ * skb. HTTP/2 data is almost always smaller of its source HTTP/1.1 data, but
+ * for the sake of robustness we use 32-byte initial offset in front of skb
+ * data. Thus, in order to guarantee the stack headers to fit, we should
+ * increase the total space for them.
+ */
+#define HTTP2_MAX_OFFSET 32
+#else
+#define TLS_MAX_HDR 0
+#define HTTP2_MAX_OFFSET 0
+#endif
#if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \
!IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL)
-#define MAX_HEADER LL_MAX_HEADER
+#define MAX_HEADER (LL_MAX_HEADER + TLS_MAX_HDR + HTTP2_MAX_OFFSET)
#else
-#define MAX_HEADER (LL_MAX_HEADER + 48)
+#define MAX_HEADER (LL_MAX_HEADER + 48 + TLS_MAX_HDR + HTTP2_MAX_OFFSET)
#endif
/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a828cf99c..b877eb543 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -232,6 +232,12 @@
SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
+#ifdef CONFIG_SECURITY_TEMPESTA
+#define SKB_MAX_HEADER (PAGE_SIZE - MAX_TCP_HEADER \
+ - SKB_DATA_ALIGN(sizeof(struct sk_buff)) \
+ - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+ - SKB_DATA_ALIGN(1))
+#endif
/* return minimum truesize of one skb containing X bytes of data */
#define SKB_TRUESIZE(X) ((X) + \
@@ -784,6 +790,9 @@ struct sk_buff {
fclone:2,
peeked:1,
head_frag:1,
+#ifdef CONFIG_SECURITY_TEMPESTA
+ skb_page:1,
+#endif
pfmemalloc:1;
#ifdef CONFIG_SKB_EXTENSIONS
__u8 active_extensions;
@@ -839,6 +848,9 @@ struct sk_buff {
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
+#ifdef CONFIG_SECURITY_TEMPESTA
+ __u8 tail_lock:1;
+#endif
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
@@ -931,6 +943,52 @@ struct sk_buff {
#define SKB_ALLOC_RX 0x02
#define SKB_ALLOC_NAPI 0x04
+#ifdef CONFIG_SECURITY_TEMPESTA
+long __get_skb_count(void);
+
+/**
+ * The skb type is used only for time between @skb was inserted into TCP send
+ * queue and it's processed (first time) in tcp_write_xmit(). This time the @skb
+ * isn't scheduled yet, so we can use skb->dev for our needs to avoid extending
+ * sk_buff. We use the least significant bit to be sure that this isn't a
+ * pointer to not to break anything. TLS message type << 1 is always smaller
+ * than 0xff.
+ */
+static inline void
+tempesta_tls_skb_settype(struct sk_buff *skb, unsigned char type)
+{
+ BUG_ON(type >= 0x80);
+ WARN_ON_ONCE(skb->dev);
+
+ skb->dev = (void *)((type << 1) | 1UL);
+}
+
+static inline unsigned char
+tempesta_tls_skb_type(struct sk_buff *skb)
+{
+ unsigned long d = (unsigned long)skb->dev;
+
+ if (!(d & 1UL))
+ return 0; /* a pointer in skb->dev */
+ return d >> 1;
+}
+
+static inline void
+tempesta_tls_skb_typecp(struct sk_buff *dst, struct sk_buff *src)
+{
+ dst->dev = src->dev;
+}
+
+static inline void
+tempesta_tls_skb_clear(struct sk_buff *skb)
+{
+ unsigned long d = (unsigned long)skb->dev;
+
+ WARN_ON_ONCE(d & ~0xff);
+ skb->dev = NULL;
+}
+#endif
+
/**
* skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves
* @skb: buffer
@@ -1074,6 +1132,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
bool *fragstolen, int *delta_truesize);
+void *pg_skb_alloc(unsigned int size, gfp_t gfp_mask, int node);
struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
int node);
struct sk_buff *__build_skb(void *data, unsigned int frag_size);
@@ -2104,7 +2163,11 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
static inline bool skb_is_nonlinear(const struct sk_buff *skb)
{
+#ifdef CONFIG_SECURITY_TEMPESTA
+ return skb->tail_lock || skb->data_len;
+#else
return skb->data_len;
+#endif
}
static inline unsigned int skb_headlen(const struct sk_buff *skb)
@@ -2341,6 +2404,20 @@ static inline unsigned int skb_headroom(const struct sk_buff *skb)
return skb->data - skb->head;
}
+#ifdef CONFIG_SECURITY_TEMPESTA
+/**
+ * skb_tailroom_locked - bytes at buffer end
+ * @skb: buffer to check
+ *
+ * Return the number of bytes of free space at the tail of an sk_buff with
+ * respect to tail locking only.
+ */
+static inline int skb_tailroom_locked(const struct sk_buff *skb)
+{
+ return skb->tail_lock ? 0 : skb->end - skb->tail;
+}
+#endif
+
/**
* skb_tailroom - bytes at buffer end
* @skb: buffer to check
diff --git a/include/linux/tempesta.h b/include/linux/tempesta.h
new file mode 100644
index 000000000..55049bd32
--- /dev/null
+++ b/include/linux/tempesta.h
@@ -0,0 +1,54 @@
+/**
+ * Linux interface for Tempesta FW.
+ *
+ * Copyright (C) 2014 NatSys Lab. (info@natsys-lab.com).
+ * Copyright (C) 2015-2022 Tempesta Technologies, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#ifndef __TEMPESTA_H__
+#define __TEMPESTA_H__
+
+#include <net/sock.h>
+
+typedef void (*TempestaTxAction)(void);
+
+typedef struct {
+ int (*sk_alloc)(struct sock *sk);
+ void (*sk_free)(struct sock *sk);
+ int (*sock_tcp_rcv)(struct sock *sk, struct sk_buff *skb);
+} TempestaOps;
+
+typedef struct {
+ unsigned long addr;
+ unsigned long pages; /* number of 4KB pages */
+} TempestaMapping;
+
+/* Security hooks. */
+int tempesta_new_clntsk(struct sock *newsk);
+void tempesta_register_ops(TempestaOps *tops);
+void tempesta_unregister_ops(TempestaOps *tops);
+
+/* Network hooks. */
+void tempesta_set_tx_action(TempestaTxAction action);
+void tempesta_del_tx_action(void);
+
+/* Memory management. */
+void tempesta_reserve_pages(void);
+void tempesta_reserve_vmpages(void);
+int tempesta_get_mapping(int node, TempestaMapping **tm);
+
+#endif /* __TEMPESTA_H__ */
+
diff --git a/include/net/sock.h b/include/net/sock.h
index 261195598..f88df23d0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -506,6 +506,11 @@ struct sock {
void (*sk_state_change)(struct sock *sk);
void (*sk_data_ready)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
+#ifdef CONFIG_SECURITY_TEMPESTA
+ int (*sk_write_xmit)(struct sock *sk,
+ struct sk_buff *skb,
+ unsigned int limit);
+#endif
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
@@ -861,6 +866,9 @@ enum sock_flags {
SOCK_TXTIME,
SOCK_XDP, /* XDP is attached */
SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
+#ifdef CONFIG_SECURITY_TEMPESTA
+ SOCK_TEMPESTA, /* The socket is managed by Tempesta FW */
+#endif
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -1915,8 +1923,7 @@ static inline bool sk_rethink_txhash(struct sock *sk)
static inline struct dst_entry *
__sk_dst_get(struct sock *sk)
{
- return rcu_dereference_check(sk->sk_dst_cache,
- lockdep_sock_is_held(sk));
+ return rcu_dereference_raw(sk->sk_dst_cache);
}
static inline struct dst_entry *
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7d66c61d2..572f6388f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -307,6 +307,7 @@ bool tcp_check_oom(struct sock *sk, int shift);
extern struct proto tcp_prot;
+extern struct proto tcpv6_prot;
#define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field)
#define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field)
@@ -653,6 +654,17 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
/* tcp.c */
void tcp_get_info(struct sock *, struct tcp_info *);
+/* Routines required by Tempesta FW. */
+void tcp_cleanup_rbuf(struct sock *sk, int copied);
+extern void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
+ int size_goal);
+extern int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
+extern void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb);
+extern void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags);
+extern void tcp_queue_skb(struct sock *sk, struct sk_buff *skb);
+extern int tcp_close_state(struct sock *sk);
+extern void skb_entail(struct sock *sk, struct sk_buff *skb);
+
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
diff --git a/include/net/tls.h b/include/net/tls.h
index 2bdd80221..356850dda 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -66,6 +66,13 @@
#define MAX_IV_SIZE 16
#define TLS_MAX_REC_SEQ_SIZE 8
+#ifdef CONFIG_SECURITY_TEMPESTA
+#define TLS_MAX_TAG_SZ 16
+/* Maximum size for required skb overhead: header, IV, tag. */
+#define TLS_MAX_OVERHEAD (TLS_HEADER_SIZE + TLS_AAD_SPACE_SIZE \
+ + TLS_MAX_TAG_SZ)
+#endif
+
/* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes.
*
* IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3]
diff --git a/init/main.c b/init/main.c
index d9d914111..9a56ca35d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -110,6 +110,8 @@
#include <kunit/test.h>
+#include <linux/tempesta.h>
+
static int kernel_init(void *);
extern void init_IRQ(void);
@@ -828,6 +830,15 @@ static void __init mm_init(void)
init_debug_pagealloc();
report_meminit();
mem_init();
+
+#ifdef CONFIG_SECURITY_TEMPESTA
+ /*
+ * Tempesta: reserve pages just when zones are initialized
+ * to get continous address space of huge pages.
+ */
+ tempesta_reserve_pages();
+#endif
+
kmem_cache_init();
kmemleak_init();
pgtable_init();
@@ -838,6 +849,11 @@ static void __init mm_init(void)
init_espfix_bsp();
/* Should be run after espfix64 is set up. */
pti_init();
+
+#ifdef CONFIG_SECURITY_TEMPESTA
+ /* Try vmalloc() if the previous one failed. */
+ tempesta_reserve_vmpages();
+#endif
}
void __init __weak arch_call_rest_init(void)
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index eca83965b..e0ed16db6 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -111,7 +111,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
return true;
#endif /* CONFIG_SMP */
}
-
+EXPORT_SYMBOL_GPL(irq_work_queue_on);
bool irq_work_needs_cpu(void)
{
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 09229ad82..40c4d0682 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -25,6 +25,7 @@
#include <linux/smpboot.h>
#include <linux/tick.h>
#include <linux/irq.h>
+#include <asm/fpu/api.h>
#define CREATE_TRACE_POINTS
#include <trace/events/irq.h>
@@ -275,6 +276,10 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
in_hardirq = lockdep_softirq_start();
+#ifdef CONFIG_SECURITY_TEMPESTA
+ __kernel_fpu_begin_mask(KFPU_MXCSR);
+#endif
+
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
@@ -320,6 +325,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
wakeup_softirqd();
}
+#ifdef CONFIG_SECURITY_TEMPESTA
+ __kernel_fpu_end_bh();
+#endif
lockdep_softirq_end(in_hardirq);
account_irq_exit_time(current);
__local_bh_enable(SOFTIRQ_OFFSET);
@@ -478,6 +486,7 @@ void raise_softirq(unsigned int nr)
raise_softirq_irqoff(nr);
local_irq_restore(flags);
}
+EXPORT_SYMBOL(raise_softirq);
void __raise_softirq_irqoff(unsigned int nr)
{
diff --git a/mm/Makefile b/mm/Makefile
index d73aed0fc..d19a4ecc1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -120,3 +120,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
+obj-$(CONFIG_SECURITY_TEMPESTA) += tempesta_mm.o
diff --git a/mm/tempesta_mm.c b/mm/tempesta_mm.c
new file mode 100644
index 000000000..9dc507aab
--- /dev/null
+++ b/mm/tempesta_mm.c
@@ -0,0 +1,278 @@
+/**
+ * Tempesta Memory Reservation
+ *
+ * Copyright (C) 2015-2022 Tempesta Technologies, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/gfp.h>
+#include <linux/hugetlb.h>
+#include <linux/tempesta.h>
+#include <linux/topology.h>
+#include <linux/vmalloc.h>
+
+#include "internal.h"
+
+#define MAX_PGORDER 16 /* 128GB per one table */
+#define MIN_PGORDER 4 /* 32MB */
+#define DEFAULT_PGORDER 8 /* 512MB */
+/* Modern processors support up to 1.5TB of RAM, be ready for 2TB. */
+#define GREEDY_ARNUM (1024 * 1024 + 1)
+#define PGNUM (1 << pgorder)
+#define PGNUM4K (PGNUM * (1 << HUGETLB_PAGE_ORDER))
+
+static int pgorder = DEFAULT_PGORDER;
+static gfp_t gfp_f = GFP_HIGHUSER | __GFP_COMP | __GFP_THISNODE | __GFP_ZERO
+ | __GFP_RETRY_MAYFAIL;
+static TempestaMapping map[MAX_NUMNODES];
+/*
+ * Modern x86-64 has not more than 512GB RAM per physical node.
+ * This is very large amount of memory, but it will be freed when
+ * initialization phase ends.
+ */
+static struct page *greedy[GREEDY_ARNUM] __initdata = { 0 };
+
+static int __init
+tempesta_setup_pages(char *str)
+{
+ get_option(&str, &pgorder);
+ if (pgorder < MIN_PGORDER) {
+ pr_err("Tempesta: bad dbmem value %d, must be [%d:%d]\n",
+ pgorder, MIN_PGORDER, MAX_PGORDER);
+ pgorder = MIN_PGORDER;
+ }
+ if (pgorder > MAX_PGORDER) {
+ pr_err("Tempesta: bad dbmem value %d, must be [%d:%d]\n",
+ pgorder, MIN_PGORDER, MAX_PGORDER);
+ pgorder = MAX_PGORDER;
+ }
+
+ return 1;
+}
+__setup("tempesta_dbmem=", tempesta_setup_pages);
+
+/**
+ * The code is somewhat stollen from mm/hugetlb.c.
+ */
+static struct page *