diff -Nru lightzone-4.2.2/debian/changelog lightzone-4.2.3/debian/changelog --- lightzone-4.2.2/debian/changelog 2020-11-24 14:19:18.000000000 +0000 +++ lightzone-4.2.3/debian/changelog 2021-04-28 14:51:21.000000000 +0000 @@ -1,4 +1,10 @@ -lightzone (4.2.2-0ppa2) focal; urgency=medium +lightzone (4.2.3-0ppa1) focal; urgency=medium + + * Upstream sync + + -- Masahiro Kitagawa Sat, 17 Apr 2021 10:19:49 +0900 + +lightzone (4.2.2-0obs2) unstable; urgency=medium * Upstream sync diff -Nru lightzone-4.2.2/debian/install lightzone-4.2.3/debian/install --- lightzone-4.2.2/debian/install 2020-11-24 14:19:18.000000000 +0000 +++ lightzone-4.2.3/debian/install 2021-04-28 14:51:21.000000000 +0000 @@ -5,7 +5,6 @@ lightcrafts/products/LightZone-forkd usr/lib/lightzone lightcrafts/products/dcraw_lz usr/lib/lightzone lightcrafts/products/lightcrafts.jar usr/share/java/lightzone -linux/products/libLinux.so usr/lib/lightzone linux/products/lightcrafts-linux.jar usr/share/java/lightzone linux/products/lightzonehelp.jar usr/share/java/lightzone linux/products/lightzone usr/bin diff -Nru lightzone-4.2.2/debian/lightzone.dsc lightzone-4.2.3/debian/lightzone.dsc --- lightzone-4.2.2/debian/lightzone.dsc 2020-11-24 14:19:18.000000000 +0000 +++ lightzone-4.2.3/debian/lightzone.dsc 2021-04-28 14:51:21.000000000 +0000 @@ -1,8 +1,8 @@ -Format: 3.0 (quilt) +Format: 1.0 Source: lightzone Binary: lightzone Architecture: i386 amd64 armhf arm64 -Version: 4.2.2 +Version: 4.2.3 Maintainer: Masahiro Kitagawa Homepage: http://lightzoneproject.org/ Standards-Version: 4.4.1 @@ -38,4 +38,4 @@ rsync Package-List: lightzone deb graphics optional -DEBTRANSFORM-TAR: lightzone-4.2.2.tar.bz2 +DEBTRANSFORM-TAR: lightzone-4.2.3.tar.bz2 diff -Nru lightzone-4.2.2/debian/source/format lightzone-4.2.3/debian/source/format --- lightzone-4.2.2/debian/source/format 2020-11-24 14:19:18.000000000 +0000 +++ lightzone-4.2.3/debian/source/format 2021-04-28 14:51:21.000000000 +0000 @@ -1 +1 @@ -3.0 (quilt) +1.0 diff -Nru lightzone-4.2.2/debian/source/options lightzone-4.2.3/debian/source/options --- lightzone-4.2.2/debian/source/options 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/debian/source/options 2021-04-28 14:51:21.000000000 +0000 @@ -0,0 +1,7 @@ +--tar-ignore=.idea +--tar-ignore=freebsd-ports +--tar-ignore=macosx +--tar-ignore=windows +--tar-ignore=.appveyor.yml +--tar-ignore=.git* +--tar-ignore=Splash.xcf diff -Nru lightzone-4.2.2/lightcrafts/build.xml lightzone-4.2.3/lightcrafts/build.xml --- lightzone-4.2.2/lightcrafts/build.xml 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/build.xml 2021-04-17 01:19:49.000000000 +0000 @@ -42,7 +42,7 @@ - + diff -Nru lightzone-4.2.2/lightcrafts/coprocesses/GNUmakefile lightzone-4.2.3/lightcrafts/coprocesses/GNUmakefile --- lightzone-4.2.2/lightcrafts/coprocesses/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/coprocesses/GNUmakefile 2021-04-17 01:19:49.000000000 +0000 @@ -1,3 +1,11 @@ +ifndef PLATFORM0 + include ../mk/platform0.mk +endif + +ifeq ($(PLATFORM),Windows) + SUBDIRS:= dcraw/ +endif + include ../mk/recurse.mk # vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/lightcrafts/help/Danish/Default_Tone_Curve.html lightzone-4.2.3/lightcrafts/help/Danish/Default_Tone_Curve.html --- lightzone-4.2.2/lightcrafts/help/Danish/Default_Tone_Curve.html 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/help/Danish/Default_Tone_Curve.html 2021-04-17 01:19:49.000000000 +0000 @@ -74,20 +74,23 @@
Canon
- EOS 1D, EOS 1D Mark II, EOS 1D Mark II N, EOS 1D Mark III, EOS 1D Mark IV, EOS 1D X, + EOS 1D X Mark II, EOS 1Ds, EOS 1Ds Mark II, EOS 1Ds Mark III, EOS 5D, EOS 5D Mark II, EOS 5D Mark III, + EOS 5D Mark IV, + EOS 5Ds, EOS 6D, + EOS 6D Mark II, EOS 7D, EOS 7D Mark II, EOS 10D, @@ -97,31 +100,47 @@ EOS 50D, EOS 60D, EOS 70D, - EOS 100D, - EOS 450D, - EOS 1000D, - EOS 1100D, - EOS 1200D, + EOS 77D, + EOS 80D, + EOS 100D (Digital Rebel SL1, Kiss X7), + EOS 200D (Digital Rebel SL2, Kiss X9), + EOS 300D (Digital Rebel, Kiss Digital), + EOS 350D (Digital Rebel XT, Kiss Digital N), + EOS 400D (Digital Rebel XTi, Kiss Digital X), + EOS 450D (Digital Rebel XSi, Kiss X2), + EOS 500D (Digital Rebel T1i, Kiss X3), + EOS 550D (Digital Rebel T2i, Kiss X4), + EOS 600D (Digital Rebel T3i, Kiss X5), + EOS 650D (Digital Rebel T4i, Kiss X6i), + EOS 700D (Digital Rebel T5i, Kiss X7i), + EOS 750D (Digital Rebel T6i, Kiss X8i), + EOS 760D (Digital Rebel T6s, 8000D), + EOS 800D (Digital Rebel T7i, Kiss X9i), + EOS 1000D (Digital Rebel XS, Kiss F), + EOS 1100D (Digital Rebel T3, Kiss X50), + EOS 1200D (Digital Rebel T5, Kiss X70), + EOS 1300D (Digital Rebel T6, Kiss X80), EOS D30, EOS D60, EOS M, - EOS Digital Rebel (300D), - EOS Digital Rebel XT (350D), - EOS Digital Rebel XTi (400D), - EOS Digital Rebel XSi (450D), - EOS Digital Rebel T1i (500D), - EOS Digital Rebel T2i (550D), - EOS Digital Rebel T3i (600D), - EOS Digital Rebel T4i (650D), - EOS Digital Rebel T5i (700D), - EOS Kiss Digital, + EOS M3, + EOS M5, + EOS M6, + EOS M10, + EOS M100, Powershot G1 X, + Powershot G1 X Mark III, Powershot G2, Powershot G3, + Powershot G3 X, Powershot G5, + Powershot G5 X, Powershot G6, Powershot G7 X, + Powershot G7 X Mark II, Powershot G9, + Powershot G9 X, + Powershot G9 X Mark II, Powershot G10, Powershot G11, Powershot G12, @@ -136,20 +155,22 @@ Powershot S100, Powershot S110, Powershot S120, - Powershot SX50 HS - + Powershot SX50 HS, + Powershot SX60 HS +
+ +
DxO
+
+ ONE
Epson
- R-D1 -
Fuji
- Finepix E550, Finepix E900, Finepix F700, @@ -169,28 +190,33 @@ Finepix S7000, Finepix S9000, Finepix S9500, + GFX 50S, X-A1, + X-A2, + X-A3, + X-A5, X-E1, X-E2, X-M1, X-Pro1, + X-Pro2, X-S1, X-T1, + X-T2, X10, X20, X30, + X70, X100, X100S, X100T, XF1, XQ1, XQ2 -
Kodak
- DCS Pro 14N, DCS Pro SLR-C, DCS Pro SLR-N, @@ -198,12 +224,10 @@ P850, P880, Pro Back -
Leica
- AG M8 Digital, AG M9 Digital, AG R9 Digital Back DMR, @@ -212,12 +236,10 @@ Digilux 3, V-Lux 1, X Vario (Typ 107) -
Minolta
- Alpha-5 Digital, Alpha-7 Digital, Dimage A1, @@ -229,16 +251,15 @@ Dynax 5D, Dynax 7D, Maxxum 7D -
Nikon
- 1 AW1, 1 J1, 1 J2, 1 J3, + 1 J5, 1 S1, 1 V1, 1 V2, @@ -260,6 +281,7 @@ D4S, D40, D40X, + D5, D50, D60, D70, @@ -270,6 +292,7 @@ D200, D300, D300s, + D500, D600, D610, D700, @@ -277,29 +300,31 @@ D800, D800E, D810, + D850, D3000, D3100, D3200, D3300, + D3400, D5000, D5100, D5200, D5300, D5500, + D5600, D7000, D7100, D7200, + D7500, Df, E5400, E8400, E8700, E8800, -
Olympus
- C-5050Z, C-5060WZ, C-70Z, @@ -322,8 +347,12 @@ E-520, E-620, E-M1, + E-M1 Mark II, E-M5, + E-M5 Mark II, E-M10, + E-M10 Mark II, + E-M10 Mark III, E-P1, E-P2, E-P3, @@ -340,15 +369,19 @@ SP-350, SP-500UZ, SP-550UZ, + STYLUS 1, XZ-1, XZ-2, XZ-10 -
Panasonic
- + DC-FZ80, + DC-G9, + DC-GH5, + DC-ZS200, + DMC-CM1, DMC-FZ18, DMC-FZ28, DMC-FZ30, @@ -357,11 +390,14 @@ DMC-FZ150, DMC-FZ200, DMC-FZ1000, + DMC-FZ2500, DMC-G1, DMC-G2, DMC-G3, DMC-G5, DMC-G6, + DMC-G7, + DMC-G85, DMC-GF1, DMC-GF2, DMC-GF3, @@ -373,6 +409,8 @@ DMC-GM1, DMC-GX1, DMC-GX7, + DMC-GX8, + DMC-GX85, DMC-L1, DMC-L10, DMC-LC1, @@ -382,57 +420,52 @@ DMC-LX3, DMC-LX5, DMC-LX7, - DMC-LX100 - + DMC-LX100, + DMC-ZS40, + DMC-ZS50, + DMC-ZS100
Pentax
- 645D, + 645Z, + K-1, K-3, + K-3 II, K-5, K-5 II, K-5 II s, K-7, K-50, + K-70, K-500, K-r, + K-S2, K-x, K10D, K20D, K100D, K100D Super, K200D, - K2000, + K2000D, Km, Q7, *ist D, *ist DL, *ist DS -
Ricoh
- Caplio GX100, GR, + GR II, GXR MOUNT A12 - -
- -
Samsung
-
- - GX10, - GX20 -
Samsung
- EK-GN120, EX2F, GX-1S, @@ -441,24 +474,28 @@ NX1, NX10, NX20, - NX30 NX100, NX200, + NX300, + NX500, NX1000, NX2000, NX mini -
Sony
- DSC-R1, DSC-RX1R, + DSC-RX1RM2, DSC-RX10, + DSC-RX10M2, + DSC-RX10M3, DSC-RX100, DSC-RX100M2, DSC-RX100M3, + DSC-RX100M4, + DSC-RX100M5, A100, A200, A230, @@ -472,12 +509,23 @@ A700, A850, A900, + ILCA-77M2, + ILCA-99M2, ILCE-7, + ILCE-7M2, + ILCE-7M3, ILCE-7R, + ILCE-7RM2, + ILCE-7RM3, + ILCE-7S, + ILCE-7SM2, + ILCE-9, ILCE-3000, ILCE-5000, ILCE-5100, ILCE-6000, + ILCE-6300, + ILCE-6500, ILCE-QX1, NEX-3, NEX-3N, @@ -498,7 +546,6 @@ SLT-A65V, SLT-A77V, SLT-A99V -

diff -Nru lightzone-4.2.2/lightcrafts/help/Dutch/Default_Tone_Curve.html lightzone-4.2.3/lightcrafts/help/Dutch/Default_Tone_Curve.html --- lightzone-4.2.2/lightcrafts/help/Dutch/Default_Tone_Curve.html 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/help/Dutch/Default_Tone_Curve.html 2021-04-17 01:19:49.000000000 +0000 @@ -49,20 +49,23 @@
Canon
- EOS 1D, EOS 1D Mark II, EOS 1D Mark II N, EOS 1D Mark III, EOS 1D Mark IV, EOS 1D X, + EOS 1D X Mark II, EOS 1Ds, EOS 1Ds Mark II, EOS 1Ds Mark III, EOS 5D, EOS 5D Mark II, EOS 5D Mark III, + EOS 5D Mark IV, + EOS 5Ds, EOS 6D, + EOS 6D Mark II, EOS 7D, EOS 7D Mark II, EOS 10D, @@ -72,31 +75,47 @@ EOS 50D, EOS 60D, EOS 70D, - EOS 100D, - EOS 450D, - EOS 1000D, - EOS 1100D, - EOS 1200D, + EOS 77D, + EOS 80D, + EOS 100D (Digital Rebel SL1, Kiss X7), + EOS 200D (Digital Rebel SL2, Kiss X9), + EOS 300D (Digital Rebel, Kiss Digital), + EOS 350D (Digital Rebel XT, Kiss Digital N), + EOS 400D (Digital Rebel XTi, Kiss Digital X), + EOS 450D (Digital Rebel XSi, Kiss X2), + EOS 500D (Digital Rebel T1i, Kiss X3), + EOS 550D (Digital Rebel T2i, Kiss X4), + EOS 600D (Digital Rebel T3i, Kiss X5), + EOS 650D (Digital Rebel T4i, Kiss X6i), + EOS 700D (Digital Rebel T5i, Kiss X7i), + EOS 750D (Digital Rebel T6i, Kiss X8i), + EOS 760D (Digital Rebel T6s, 8000D), + EOS 800D (Digital Rebel T7i, Kiss X9i), + EOS 1000D (Digital Rebel XS, Kiss F), + EOS 1100D (Digital Rebel T3, Kiss X50), + EOS 1200D (Digital Rebel T5, Kiss X70), + EOS 1300D (Digital Rebel T6, Kiss X80), EOS D30, EOS D60, EOS M, - EOS Digital Rebel (300D), - EOS Digital Rebel XT (350D), - EOS Digital Rebel XTi (400D), - EOS Digital Rebel XSi (450D), - EOS Digital Rebel T1i (500D), - EOS Digital Rebel T2i (550D), - EOS Digital Rebel T3i (600D), - EOS Digital Rebel T4i (650D), - EOS Digital Rebel T5i (700D), - EOS Kiss Digital, + EOS M3, + EOS M5, + EOS M6, + EOS M10, + EOS M100, Powershot G1 X, + Powershot G1 X Mark III, Powershot G2, Powershot G3, + Powershot G3 X, Powershot G5, + Powershot G5 X, Powershot G6, Powershot G7 X, + Powershot G7 X Mark II, Powershot G9, + Powershot G9 X, + Powershot G9 X Mark II, Powershot G10, Powershot G11, Powershot G12, @@ -111,20 +130,22 @@ Powershot S100, Powershot S110, Powershot S120, - Powershot SX50 HS - + Powershot SX50 HS, + Powershot SX60 HS +
+ +
DxO
+
+ ONE
Epson
- R-D1 -
Fuji
- Finepix E550, Finepix E900, Finepix F700, @@ -144,28 +165,33 @@ Finepix S7000, Finepix S9000, Finepix S9500, + GFX 50S, X-A1, + X-A2, + X-A3, + X-A5, X-E1, X-E2, X-M1, X-Pro1, + X-Pro2, X-S1, X-T1, + X-T2, X10, X20, X30, + X70, X100, X100S, X100T, XF1, XQ1, XQ2 -
Kodak
- DCS Pro 14N, DCS Pro SLR-C, DCS Pro SLR-N, @@ -173,12 +199,10 @@ P850, P880, Pro Back -
Leica
- AG M8 Digital, AG M9 Digital, AG R9 Digital Back DMR, @@ -187,12 +211,10 @@ Digilux 3, V-Lux 1, X Vario (Typ 107) -
Minolta
- Alpha-5 Digital, Alpha-7 Digital, Dimage A1, @@ -204,16 +226,15 @@ Dynax 5D, Dynax 7D, Maxxum 7D -
Nikon
- 1 AW1, 1 J1, 1 J2, 1 J3, + 1 J5, 1 S1, 1 V1, 1 V2, @@ -235,6 +256,7 @@ D4S, D40, D40X, + D5, D50, D60, D70, @@ -245,6 +267,7 @@ D200, D300, D300s, + D500, D600, D610, D700, @@ -252,29 +275,31 @@ D800, D800E, D810, + D850, D3000, D3100, D3200, D3300, + D3400, D5000, D5100, D5200, D5300, D5500, + D5600, D7000, D7100, D7200, + D7500, Df, E5400, E8400, E8700, E8800, -
Olympus
- C-5050Z, C-5060WZ, C-70Z, @@ -297,8 +322,12 @@ E-520, E-620, E-M1, + E-M1 Mark II, E-M5, + E-M5 Mark II, E-M10, + E-M10 Mark II, + E-M10 Mark III, E-P1, E-P2, E-P3, @@ -315,15 +344,19 @@ SP-350, SP-500UZ, SP-550UZ, + STYLUS 1, XZ-1, XZ-2, XZ-10 -
Panasonic
- + DC-FZ80, + DC-G9, + DC-GH5, + DC-ZS200, + DMC-CM1, DMC-FZ18, DMC-FZ28, DMC-FZ30, @@ -332,11 +365,14 @@ DMC-FZ150, DMC-FZ200, DMC-FZ1000, + DMC-FZ2500, DMC-G1, DMC-G2, DMC-G3, DMC-G5, DMC-G6, + DMC-G7, + DMC-G85, DMC-GF1, DMC-GF2, DMC-GF3, @@ -348,6 +384,8 @@ DMC-GM1, DMC-GX1, DMC-GX7, + DMC-GX8, + DMC-GX85, DMC-L1, DMC-L10, DMC-LC1, @@ -357,57 +395,52 @@ DMC-LX3, DMC-LX5, DMC-LX7, - DMC-LX100 - + DMC-LX100, + DMC-ZS40, + DMC-ZS50, + DMC-ZS100
Pentax
- 645D, + 645Z, + K-1, K-3, + K-3 II, K-5, K-5 II, K-5 II s, K-7, K-50, + K-70, K-500, K-r, + K-S2, K-x, K10D, K20D, K100D, K100D Super, K200D, - K2000, + K2000D, Km, Q7, *ist D, *ist DL, *ist DS -
Ricoh
- Caplio GX100, GR, + GR II, GXR MOUNT A12 - -
- -
Samsung
-
- - GX10, - GX20 -
Samsung
- EK-GN120, EX2F, GX-1S, @@ -416,24 +449,28 @@ NX1, NX10, NX20, - NX30 NX100, NX200, + NX300, + NX500, NX1000, NX2000, NX mini -
Sony
- DSC-R1, DSC-RX1R, + DSC-RX1RM2, DSC-RX10, + DSC-RX10M2, + DSC-RX10M3, DSC-RX100, DSC-RX100M2, DSC-RX100M3, + DSC-RX100M4, + DSC-RX100M5, A100, A200, A230, @@ -447,12 +484,23 @@ A700, A850, A900, + ILCA-77M2, + ILCA-99M2, ILCE-7, + ILCE-7M2, + ILCE-7M3, ILCE-7R, + ILCE-7RM2, + ILCE-7RM3, + ILCE-7S, + ILCE-7SM2, + ILCE-9, ILCE-3000, ILCE-5000, ILCE-5100, ILCE-6000, + ILCE-6300, + ILCE-6500, ILCE-QX1, NEX-3, NEX-3N, @@ -473,7 +521,6 @@ SLT-A65V, SLT-A77V, SLT-A99V -

diff -Nru lightzone-4.2.2/lightcrafts/help/English/Default_Tone_Curve.html lightzone-4.2.3/lightcrafts/help/English/Default_Tone_Curve.html --- lightzone-4.2.2/lightcrafts/help/English/Default_Tone_Curve.html 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/help/English/Default_Tone_Curve.html 2021-04-17 01:19:49.000000000 +0000 @@ -73,13 +73,17 @@ EOS 1D Mark III, EOS 1D Mark IV, EOS 1D X, + EOS 1D X Mark II, EOS 1Ds, EOS 1Ds Mark II, EOS 1Ds Mark III, EOS 5D, EOS 5D Mark II, EOS 5D Mark III, + EOS 5D Mark IV, + EOS 5Ds, EOS 6D, + EOS 6D Mark II, EOS 7D, EOS 7D Mark II, EOS 10D, @@ -89,31 +93,47 @@ EOS 50D, EOS 60D, EOS 70D, - EOS 100D, - EOS 450D, - EOS 1000D, - EOS 1100D, - EOS 1200D, + EOS 77D, + EOS 80D, + EOS 100D (Digital Rebel SL1, Kiss X7), + EOS 200D (Digital Rebel SL2, Kiss X9), + EOS 300D (Digital Rebel, Kiss Digital), + EOS 350D (Digital Rebel XT, Kiss Digital N), + EOS 400D (Digital Rebel XTi, Kiss Digital X), + EOS 450D (Digital Rebel XSi, Kiss X2), + EOS 500D (Digital Rebel T1i, Kiss X3), + EOS 550D (Digital Rebel T2i, Kiss X4), + EOS 600D (Digital Rebel T3i, Kiss X5), + EOS 650D (Digital Rebel T4i, Kiss X6i), + EOS 700D (Digital Rebel T5i, Kiss X7i), + EOS 750D (Digital Rebel T6i, Kiss X8i), + EOS 760D (Digital Rebel T6s, 8000D), + EOS 800D (Digital Rebel T7i, Kiss X9i), + EOS 1000D (Digital Rebel XS, Kiss F), + EOS 1100D (Digital Rebel T3, Kiss X50), + EOS 1200D (Digital Rebel T5, Kiss X70), + EOS 1300D (Digital Rebel T6, Kiss X80), EOS D30, EOS D60, EOS M, - EOS Digital Rebel (300D), - EOS Digital Rebel XT (350D), - EOS Digital Rebel XTi (400D), - EOS Digital Rebel XSi (450D), - EOS Digital Rebel T1i (500D), - EOS Digital Rebel T2i (550D), - EOS Digital Rebel T3i (600D), - EOS Digital Rebel T4i (650D), - EOS Digital Rebel T5i (700D), - EOS Kiss Digital, + EOS M3, + EOS M5, + EOS M6, + EOS M10, + EOS M100, Powershot G1 X, + Powershot G1 X Mark III, Powershot G2, Powershot G3, + Powershot G3 X, Powershot G5, + Powershot G5 X, Powershot G6, Powershot G7 X, + Powershot G7 X Mark II, Powershot G9, + Powershot G9 X, + Powershot G9 X Mark II, Powershot G10, Powershot G11, Powershot G12, @@ -128,7 +148,13 @@ Powershot S100, Powershot S110, Powershot S120, - Powershot SX50 HS + Powershot SX50 HS, + Powershot SX60 HS + + +
DxO
+
+ ONE
Epson
@@ -157,16 +183,23 @@ Finepix S7000, Finepix S9000, Finepix S9500, + GFX 50S, X-A1, + X-A2, + X-A3, + X-A5, X-E1, X-E2, X-M1, X-Pro1, + X-Pro2, X-S1, X-T1, + X-T2, X10, X20, X30, + X70, X100, X100S, X100T, @@ -219,6 +252,7 @@ 1 J1, 1 J2, 1 J3, + 1 J5, 1 S1, 1 V1, 1 V2, @@ -240,6 +274,7 @@ D4S, D40, D40X, + D5, D50, D60, D70, @@ -250,6 +285,7 @@ D200, D300, D300s, + D500, D600, D610, D700, @@ -257,18 +293,22 @@ D800, D800E, D810, + D850, D3000, D3100, D3200, D3300, + D3400, D5000, D5100, D5200, D5300, D5500, + D5600, D7000, D7100, D7200, + D7500, Df, E5400, E8400, @@ -300,8 +340,12 @@ E-520, E-620, E-M1, + E-M1 Mark II, E-M5, + E-M5 Mark II, E-M10, + E-M10 Mark II, + E-M10 Mark III, E-P1, E-P2, E-P3, @@ -318,6 +362,7 @@ SP-350, SP-500UZ, SP-550UZ, + STYLUS 1, XZ-1, XZ-2, XZ-10 @@ -325,6 +370,11 @@
Panasonic
+ DC-FZ80, + DC-G9, + DC-GH5, + DC-ZS200, + DMC-CM1, DMC-FZ18, DMC-FZ28, DMC-FZ30, @@ -333,11 +383,14 @@ DMC-FZ150, DMC-FZ200, DMC-FZ1000, + DMC-FZ2500, DMC-G1, DMC-G2, DMC-G3, DMC-G5, DMC-G6, + DMC-G7, + DMC-G85, DMC-GF1, DMC-GF2, DMC-GF3, @@ -349,6 +402,8 @@ DMC-GM1, DMC-GX1, DMC-GX7, + DMC-GX8, + DMC-GX85, DMC-L1, DMC-L10, DMC-LC1, @@ -358,27 +413,35 @@ DMC-LX3, DMC-LX5, DMC-LX7, - DMC-LX100 + DMC-LX100, + DMC-ZS40, + DMC-ZS50, + DMC-ZS100
Pentax
645D, + 645Z, + K-1, K-3, + K-3 II, K-5, K-5 II, K-5 II s, K-7, K-50, + K-70, K-500, K-r, + K-S2, K-x, K10D, K20D, K100D, K100D Super, K200D, - K2000, + K2000D, Km, Q7, *ist D, @@ -390,17 +453,12 @@
Caplio GX100, GR, + GR II, GXR MOUNT A12
Samsung
- GX10, - GX20 -
- -
Samsung
-
EK-GN120, EX2F, GX-1S, @@ -409,9 +467,10 @@ NX1, NX10, NX20, - NX30 NX100, NX200, + NX300, + NX500, NX1000, NX2000, NX mini @@ -421,10 +480,15 @@
DSC-R1, DSC-RX1R, + DSC-RX1RM2, DSC-RX10, + DSC-RX10M2, + DSC-RX10M3, DSC-RX100, DSC-RX100M2, DSC-RX100M3, + DSC-RX100M4, + DSC-RX100M5, A100, A200, A230, @@ -438,12 +502,23 @@ A700, A850, A900, + ILCA-77M2, + ILCA-99M2, ILCE-7, + ILCE-7M2, + ILCE-7M3, ILCE-7R, + ILCE-7RM2, + ILCE-7RM3, + ILCE-7S, + ILCE-7SM2, + ILCE-9, ILCE-3000, ILCE-5000, ILCE-5100, ILCE-6000, + ILCE-6300, + ILCE-6500, ILCE-QX1, NEX-3, NEX-3N, diff -Nru lightzone-4.2.2/lightcrafts/help/French/Default_Tone_Curve.html lightzone-4.2.3/lightcrafts/help/French/Default_Tone_Curve.html --- lightzone-4.2.2/lightcrafts/help/French/Default_Tone_Curve.html 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/help/French/Default_Tone_Curve.html 2021-04-17 01:19:49.000000000 +0000 @@ -49,20 +49,23 @@
Canon
- EOS 1D, EOS 1D Mark II, EOS 1D Mark II N, EOS 1D Mark III, EOS 1D Mark IV, EOS 1D X, + EOS 1D X Mark II, EOS 1Ds, EOS 1Ds Mark II, EOS 1Ds Mark III, EOS 5D, EOS 5D Mark II, EOS 5D Mark III, + EOS 5D Mark IV, + EOS 5Ds, EOS 6D, + EOS 6D Mark II, EOS 7D, EOS 7D Mark II, EOS 10D, @@ -72,31 +75,47 @@ EOS 50D, EOS 60D, EOS 70D, - EOS 100D, - EOS 450D, - EOS 1000D, - EOS 1100D, - EOS 1200D, + EOS 77D, + EOS 80D, + EOS 100D (Digital Rebel SL1, Kiss X7), + EOS 200D (Digital Rebel SL2, Kiss X9), + EOS 300D (Digital Rebel, Kiss Digital), + EOS 350D (Digital Rebel XT, Kiss Digital N), + EOS 400D (Digital Rebel XTi, Kiss Digital X), + EOS 450D (Digital Rebel XSi, Kiss X2), + EOS 500D (Digital Rebel T1i, Kiss X3), + EOS 550D (Digital Rebel T2i, Kiss X4), + EOS 600D (Digital Rebel T3i, Kiss X5), + EOS 650D (Digital Rebel T4i, Kiss X6i), + EOS 700D (Digital Rebel T5i, Kiss X7i), + EOS 750D (Digital Rebel T6i, Kiss X8i), + EOS 760D (Digital Rebel T6s, 8000D), + EOS 800D (Digital Rebel T7i, Kiss X9i), + EOS 1000D (Digital Rebel XS, Kiss F), + EOS 1100D (Digital Rebel T3, Kiss X50), + EOS 1200D (Digital Rebel T5, Kiss X70), + EOS 1300D (Digital Rebel T6, Kiss X80), EOS D30, EOS D60, EOS M, - EOS Digital Rebel (300D), - EOS Digital Rebel XT (350D), - EOS Digital Rebel XTi (400D), - EOS Digital Rebel XSi (450D), - EOS Digital Rebel T1i (500D), - EOS Digital Rebel T2i (550D), - EOS Digital Rebel T3i (600D), - EOS Digital Rebel T4i (650D), - EOS Digital Rebel T5i (700D), - EOS Kiss Digital, + EOS M3, + EOS M5, + EOS M6, + EOS M10, + EOS M100, Powershot G1 X, + Powershot G1 X Mark III, Powershot G2, Powershot G3, + Powershot G3 X, Powershot G5, + Powershot G5 X, Powershot G6, Powershot G7 X, + Powershot G7 X Mark II, Powershot G9, + Powershot G9 X, + Powershot G9 X Mark II, Powershot G10, Powershot G11, Powershot G12, @@ -111,20 +130,22 @@ Powershot S100, Powershot S110, Powershot S120, - Powershot SX50 HS - + Powershot SX50 HS, + Powershot SX60 HS +
+ +
DxO
+
+ ONE
Epson
- R-D1 -
Fuji
- Finepix E550, Finepix E900, Finepix F700, @@ -144,28 +165,33 @@ Finepix S7000, Finepix S9000, Finepix S9500, + GFX 50S, X-A1, + X-A2, + X-A3, + X-A5, X-E1, X-E2, X-M1, X-Pro1, + X-Pro2, X-S1, X-T1, + X-T2, X10, X20, X30, + X70, X100, X100S, X100T, XF1, XQ1, XQ2 -
Kodak
- DCS Pro 14N, DCS Pro SLR-C, DCS Pro SLR-N, @@ -173,12 +199,10 @@ P850, P880, Pro Back -
Leica
- AG M8 Digital, AG M9 Digital, AG R9 Digital Back DMR, @@ -187,12 +211,10 @@ Digilux 3, V-Lux 1, X Vario (Typ 107) -
Minolta
- Alpha-5 Digital, Alpha-7 Digital, Dimage A1, @@ -204,16 +226,15 @@ Dynax 5D, Dynax 7D, Maxxum 7D -
Nikon
- 1 AW1, 1 J1, 1 J2, 1 J3, + 1 J5, 1 S1, 1 V1, 1 V2, @@ -235,6 +256,7 @@ D4S, D40, D40X, + D5, D50, D60, D70, @@ -245,6 +267,7 @@ D200, D300, D300s, + D500, D600, D610, D700, @@ -252,29 +275,31 @@ D800, D800E, D810, + D850, D3000, D3100, D3200, D3300, + D3400, D5000, D5100, D5200, D5300, D5500, + D5600, D7000, D7100, D7200, + D7500, Df, E5400, E8400, E8700, E8800, -
Olympus
- C-5050Z, C-5060WZ, C-70Z, @@ -297,8 +322,12 @@ E-520, E-620, E-M1, + E-M1 Mark II, E-M5, + E-M5 Mark II, E-M10, + E-M10 Mark II, + E-M10 Mark III, E-P1, E-P2, E-P3, @@ -315,15 +344,19 @@ SP-350, SP-500UZ, SP-550UZ, + STYLUS 1, XZ-1, XZ-2, XZ-10 -
Panasonic
- + DC-FZ80, + DC-G9, + DC-GH5, + DC-ZS200, + DMC-CM1, DMC-FZ18, DMC-FZ28, DMC-FZ30, @@ -332,11 +365,14 @@ DMC-FZ150, DMC-FZ200, DMC-FZ1000, + DMC-FZ2500, DMC-G1, DMC-G2, DMC-G3, DMC-G5, DMC-G6, + DMC-G7, + DMC-G85, DMC-GF1, DMC-GF2, DMC-GF3, @@ -348,6 +384,8 @@ DMC-GM1, DMC-GX1, DMC-GX7, + DMC-GX8, + DMC-GX85, DMC-L1, DMC-L10, DMC-LC1, @@ -357,57 +395,52 @@ DMC-LX3, DMC-LX5, DMC-LX7, - DMC-LX100 - + DMC-LX100, + DMC-ZS40, + DMC-ZS50, + DMC-ZS100
Pentax
- 645D, + 645Z, + K-1, K-3, + K-3 II, K-5, K-5 II, K-5 II s, K-7, K-50, + K-70, K-500, K-r, + K-S2, K-x, K10D, K20D, K100D, K100D Super, K200D, - K2000, + K2000D, Km, Q7, *ist D, *ist DL, *ist DS -
Ricoh
- Caplio GX100, GR, + GR II, GXR MOUNT A12 - -
- -
Samsung
-
- - GX10, - GX20 -
Samsung
- EK-GN120, EX2F, GX-1S, @@ -416,24 +449,28 @@ NX1, NX10, NX20, - NX30 NX100, NX200, + NX300, + NX500, NX1000, NX2000, NX mini -
Sony
- DSC-R1, DSC-RX1R, + DSC-RX1RM2, DSC-RX10, + DSC-RX10M2, + DSC-RX10M3, DSC-RX100, DSC-RX100M2, DSC-RX100M3, + DSC-RX100M4, + DSC-RX100M5, A100, A200, A230, @@ -447,12 +484,23 @@ A700, A850, A900, + ILCA-77M2, + ILCA-99M2, ILCE-7, + ILCE-7M2, + ILCE-7M3, ILCE-7R, + ILCE-7RM2, + ILCE-7RM3, + ILCE-7S, + ILCE-7SM2, + ILCE-9, ILCE-3000, ILCE-5000, ILCE-5100, ILCE-6000, + ILCE-6300, + ILCE-6500, ILCE-QX1, NEX-3, NEX-3N, @@ -473,7 +521,6 @@ SLT-A65V, SLT-A77V, SLT-A99V -

diff -Nru lightzone-4.2.2/lightcrafts/help/Italian/Default_Tone_Curve.html lightzone-4.2.3/lightcrafts/help/Italian/Default_Tone_Curve.html --- lightzone-4.2.2/lightcrafts/help/Italian/Default_Tone_Curve.html 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/help/Italian/Default_Tone_Curve.html 2021-04-17 01:19:49.000000000 +0000 @@ -33,72 +33,100 @@

Con LightZone lavorare su file immagine raw delle principali fotocamere digitali è facilmente quanto lavorare su immagini JPEG o TIFF.

Tuttavia per visualizzare correttamente i file raw di alcune fotocamere è sempre necessario applicare di una "curva tonale" di default. Con LightZone, alla pila strumenti di questi file raw, è aggiunto automaticamente uno strumento ZoneMapper contenente la curva tonale di default. Nonostante lo strumento ZoneMapper sia ridotto e bloccato tramite lo strumento Blocca/Sblocca, è possibile espanderlo facendo doppio clic sulla sua barra del titolo ed esaminare la curva. È possibile anche sbloccarla, modificarla o cancellarla.

-

I file raw delle seguenti fotocamere avranno una curva tonale di default:

+

I file raw delle seguenti fotocamere avranno una curva tonale di default: + +

Canon
- EOS 1D, - EOS 1D Mark II, - EOS 1D Mark II N, - EOS 1D Mark III, - EOS 1D Mark IV, - EOS 1D X, - EOS 1Ds, - EOS 1Ds Mark II, - EOS 1Ds Mark III, - EOS 5D, - EOS 5D Mark II, - EOS 5D Mark III, - EOS 6D, - EOS 7D, - EOS 7D Mark II, - EOS 10D, - EOS 20D, - EOS 30D, - EOS 40D, - EOS 50D, - EOS 60D, - EOS 70D, - EOS 100D, - EOS 450D, - EOS 1000D, - EOS 1100D, - EOS 1200D, - EOS D30, - EOS D60, - EOS M, - EOS Digital Rebel (300D), - EOS Digital Rebel XT (350D), - EOS Digital Rebel XTi (400D), - EOS Digital Rebel XSi (450D), - EOS Digital Rebel T1i (500D), - EOS Digital Rebel T2i (550D), - EOS Digital Rebel T3i (600D), - EOS Digital Rebel T4i (650D), - EOS Digital Rebel T5i (700D), - EOS Kiss Digital, - Powershot G1 X, - Powershot G2, - Powershot G3, - Powershot G5, - Powershot G6, - Powershot G7 X, - Powershot G9, - Powershot G10, - Powershot G11, - Powershot G12, - Powershot G15, - Powershot G16, - Powershot S30, - Powershot S40, - Powershot S50, - Powershot S60, - Powershot S90, - Powershot S95, - Powershot S100, - Powershot S110, - Powershot S120, - Powershot SX50 HS + EOS 1D, + EOS 1D Mark II, + EOS 1D Mark II N, + EOS 1D Mark III, + EOS 1D Mark IV, + EOS 1D X, + EOS 1D X Mark II, + EOS 1Ds, + EOS 1Ds Mark II, + EOS 1Ds Mark III, + EOS 5D, + EOS 5D Mark II, + EOS 5D Mark III, + EOS 5D Mark IV, + EOS 5Ds, + EOS 6D, + EOS 6D Mark II, + EOS 7D, + EOS 7D Mark II, + EOS 10D, + EOS 20D, + EOS 30D, + EOS 40D, + EOS 50D, + EOS 60D, + EOS 70D, + EOS 77D, + EOS 80D, + EOS 100D (Digital Rebel SL1, Kiss X7), + EOS 200D (Digital Rebel SL2, Kiss X9), + EOS 300D (Digital Rebel, Kiss Digital), + EOS 350D (Digital Rebel XT, Kiss Digital N), + EOS 400D (Digital Rebel XTi, Kiss Digital X), + EOS 450D (Digital Rebel XSi, Kiss X2), + EOS 500D (Digital Rebel T1i, Kiss X3), + EOS 550D (Digital Rebel T2i, Kiss X4), + EOS 600D (Digital Rebel T3i, Kiss X5), + EOS 650D (Digital Rebel T4i, Kiss X6i), + EOS 700D (Digital Rebel T5i, Kiss X7i), + EOS 750D (Digital Rebel T6i, Kiss X8i), + EOS 760D (Digital Rebel T6s, 8000D), + EOS 800D (Digital Rebel T7i, Kiss X9i), + EOS 1000D (Digital Rebel XS, Kiss F), + EOS 1100D (Digital Rebel T3, Kiss X50), + EOS 1200D (Digital Rebel T5, Kiss X70), + EOS 1300D (Digital Rebel T6, Kiss X80), + EOS D30, + EOS D60, + EOS M, + EOS M3, + EOS M5, + EOS M6, + EOS M10, + EOS M100, + Powershot G1 X, + Powershot G1 X Mark III, + Powershot G2, + Powershot G3, + Powershot G3 X, + Powershot G5, + Powershot G5 X, + Powershot G6, + Powershot G7 X, + Powershot G7 X Mark II, + Powershot G9, + Powershot G9 X, + Powershot G9 X Mark II, + Powershot G10, + Powershot G11, + Powershot G12, + Powershot G15, + Powershot G16, + Powershot S30, + Powershot S40, + Powershot S50, + Powershot S60, + Powershot S90, + Powershot S95, + Powershot S100, + Powershot S110, + Powershot S120, + Powershot SX50 HS, + Powershot SX60 HS +
+ +
DxO
+
+ ONE
Epson
@@ -108,35 +136,42 @@
Fuji
- Finepix E550, - Finepix E900, - Finepix F700, - Finepix F710, - Finepix S1, - Finepix S2 Pro, - Finepix S3 Pro, - Finepix S5 Pro, - Finepix S20 Pro, - Finepix S100FS, - Finepix S5000, - Finepix S5100, - Finepix S5200, - Finepix S5500, - Finepix S5600, - Finepix S6000fd, - Finepix S7000, - Finepix S9000, - Finepix S9500, + Finepix E550, + Finepix E900, + Finepix F700, + Finepix F710, + Finepix S1, + Finepix S2 Pro, + Finepix S3 Pro, + Finepix S5 Pro, + Finepix S20 Pro, + Finepix S100FS, + Finepix S5000, + Finepix S5100, + Finepix S5200, + Finepix S5500, + Finepix S5600, + Finepix S6000fd, + Finepix S7000, + Finepix S9000, + Finepix S9500, + GFX 50S, X-A1, + X-A2, + X-A3, + X-A5, X-E1, X-E2, X-M1, X-Pro1, + X-Pro2, X-S1, X-T1, + X-T2, X10, X20, X30, + X70, X100, X100S, X100T, @@ -147,56 +182,57 @@
Kodak
- DCS Pro 14N, - DCS Pro SLR-C, - DCS Pro SLR-N, + DCS Pro 14N, + DCS Pro SLR-C, + DCS Pro SLR-N, P712, P850, P880, - Pro Back + Pro Back
Leica
- AG M8 Digital, - AG M9 Digital, - AG R9 Digital Back DMR, - D-Lux 2, - Digilux 2, - Digilux 3, - V-Lux 1, - X Vario (Typ 107) + AG M8 Digital, + AG M9 Digital, + AG R9 Digital Back DMR, + D-Lux 2, + Digilux 2, + Digilux 3, + V-Lux 1, + X Vario (Typ 107)
Minolta
- Alpha-5 Digital, - Alpha-7 Digital, - Dimage A1, - Dimage A2, - Dimage A200, - Dimage 5, - Dimage 7, - Dimage 7 HI, - Dynax 5D, - Dynax 7D, - Maxxum 7D + Alpha-5 Digital, + Alpha-7 Digital, + Dimage A1, + Dimage A2, + Dimage A200, + Dimage 5, + Dimage 7, + Dimage 7 HI, + Dynax 5D, + Dynax 7D, + Maxxum 7D
Nikon
- 1 AW1, - 1 J1, - 1 J2, - 1 J3, - 1 S1, - 1 V1, - 1 V2, - 1 V3, - Coolpix A, - Coolpix P340, - Coolpix P6000, - Coolpix P7800, + 1 AW1, + 1 J1, + 1 J2, + 1 J3, + 1 J5, + 1 S1, + 1 V1, + 1 V2, + 1 V3, + Coolpix A, + Coolpix P340, + Coolpix P6000, + Coolpix P7800, D1H, D1X, D2H, @@ -210,6 +246,7 @@ D4S, D40, D40X, + D5, D50, D60, D70, @@ -220,6 +257,7 @@ D200, D300, D300s, + D500, D600, D610, D700, @@ -227,18 +265,22 @@ D800, D800E, D810, + D850, D3000, D3100, D3200, D3300, + D3400, D5000, D5100, D5200, D5300, D5500, + D5600, D7000, D7100, D7200, + D7500, Df, E5400, E8400, @@ -270,8 +312,12 @@ E-520, E-620, E-M1, + E-M1 Mark II, E-M5, + E-M5 Mark II, E-M10, + E-M10 Mark II, + E-M10 Mark III, E-P1, E-P2, E-P3, @@ -288,6 +334,7 @@ SP-350, SP-500UZ, SP-550UZ, + STYLUS 1, XZ-1, XZ-2, XZ-10 @@ -295,6 +342,11 @@
Panasonic
+ DC-FZ80, + DC-G9, + DC-GH5, + DC-ZS200, + DMC-CM1, DMC-FZ18, DMC-FZ28, DMC-FZ30, @@ -303,11 +355,14 @@ DMC-FZ150, DMC-FZ200, DMC-FZ1000, + DMC-FZ2500, DMC-G1, DMC-G2, DMC-G3, DMC-G5, DMC-G6, + DMC-G7, + DMC-G85, DMC-GF1, DMC-GF2, DMC-GF3, @@ -319,6 +374,8 @@ DMC-GM1, DMC-GX1, DMC-GX7, + DMC-GX8, + DMC-GX85, DMC-L1, DMC-L10, DMC-LC1, @@ -328,45 +385,48 @@ DMC-LX3, DMC-LX5, DMC-LX7, - DMC-LX100 + DMC-LX100, + DMC-ZS40, + DMC-ZS50, + DMC-ZS100
Pentax
645D, + 645Z, + K-1, K-3, + K-3 II, K-5, - K-5 II, - K-5 II s, + K-5 II, + K-5 II s, K-7, K-50, + K-70, K-500, K-r, + K-S2, K-x, K10D, K20D, K100D, - K100D Super, + K100D Super, K200D, - K2000, + K2000D, Km, Q7, - *ist D, - *ist DL, - *ist DS + *ist D, + *ist DL, + *ist DS
Ricoh
- Caplio GX100, + Caplio GX100, GR, - GXR MOUNT A12 -
- -
Samsung
-
- GX10, - GX20 + GR II, + GXR MOUNT A12
Samsung
@@ -379,22 +439,28 @@ NX1, NX10, NX20, - NX30 NX100, NX200, + NX300, + NX500, NX1000, NX2000, - NX mini + NX mini
Sony
DSC-R1, DSC-RX1R, + DSC-RX1RM2, DSC-RX10, + DSC-RX10M2, + DSC-RX10M3, DSC-RX100, DSC-RX100M2, DSC-RX100M3, + DSC-RX100M4, + DSC-RX100M5, A100, A200, A230, @@ -408,12 +474,23 @@ A700, A850, A900, + ILCA-77M2, + ILCA-99M2, ILCE-7, + ILCE-7M2, + ILCE-7M3, ILCE-7R, + ILCE-7RM2, + ILCE-7RM3, + ILCE-7S, + ILCE-7SM2, + ILCE-9, ILCE-3000, ILCE-5000, ILCE-5100, ILCE-6000, + ILCE-6300, + ILCE-6500, ILCE-QX1, NEX-3, NEX-3N, @@ -454,4 +531,4 @@ - \ No newline at end of file + diff -Nru lightzone-4.2.2/lightcrafts/help/Japanese/Default_Tone_Curve.html lightzone-4.2.3/lightcrafts/help/Japanese/Default_Tone_Curve.html --- lightzone-4.2.2/lightcrafts/help/Japanese/Default_Tone_Curve.html 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/help/Japanese/Default_Tone_Curve.html 2021-04-17 01:19:49.000000000 +0000 @@ -42,6 +42,7 @@ 下記のカメラの raw ファイルには既定のトーン カーブが用意されています。
+
Canon
EOS 1D, @@ -50,13 +51,17 @@ EOS 1D Mark III, EOS 1D Mark IV, EOS 1D X, + EOS 1D X Mark II, EOS 1Ds, EOS 1Ds Mark II, EOS 1Ds Mark III, EOS 5D, EOS 5D Mark II, EOS 5D Mark III, + EOS 5D Mark IV, + EOS 5Ds, EOS 6D, + EOS 6D Mark II, EOS 7D, EOS 7D Mark II, EOS 10D, @@ -66,31 +71,47 @@ EOS 50D, EOS 60D, EOS 70D, - EOS 100D, - EOS 450D, - EOS 1000D, - EOS 1100D, - EOS 1200D, + EOS 77D, + EOS 80D, + EOS 100D (Digital Rebel SL1, Kiss X7), + EOS 200D (Digital Rebel SL2, Kiss X9), + EOS 300D (Digital Rebel, Kiss Digital), + EOS 350D (Digital Rebel XT, Kiss Digital N), + EOS 400D (Digital Rebel XTi, Kiss Digital X), + EOS 450D (Digital Rebel XSi, Kiss X2), + EOS 500D (Digital Rebel T1i, Kiss X3), + EOS 550D (Digital Rebel T2i, Kiss X4), + EOS 600D (Digital Rebel T3i, Kiss X5), + EOS 650D (Digital Rebel T4i, Kiss X6i), + EOS 700D (Digital Rebel T5i, Kiss X7i), + EOS 750D (Digital Rebel T6i, Kiss X8i), + EOS 760D (Digital Rebel T6s, 8000D), + EOS 800D (Digital Rebel T7i, Kiss X9i), + EOS 1000D (Digital Rebel XS, Kiss F), + EOS 1100D (Digital Rebel T3, Kiss X50), + EOS 1200D (Digital Rebel T5, Kiss X70), + EOS 1300D (Digital Rebel T6, Kiss X80), EOS D30, EOS D60, EOS M, - EOS Digital Rebel (300D), - EOS Digital Rebel XT (350D), - EOS Digital Rebel XTi (400D), - EOS Digital Rebel XSi (450D), - EOS Digital Rebel T1i (500D), - EOS Digital Rebel T2i (550D), - EOS Digital Rebel T3i (600D), - EOS Digital Rebel T4i (650D), - EOS Digital Rebel T5i (700D), - EOS Kiss Digital, + EOS M3, + EOS M5, + EOS M6, + EOS M10, + EOS M100, Powershot G1 X, + Powershot G1 X Mark III, Powershot G2, Powershot G3, + Powershot G3 X, Powershot G5, + Powershot G5 X, Powershot G6, Powershot G7 X, + Powershot G7 X Mark II, Powershot G9, + Powershot G9 X, + Powershot G9 X Mark II, Powershot G10, Powershot G11, Powershot G12, @@ -105,7 +126,13 @@ Powershot S100, Powershot S110, Powershot S120, - Powershot SX50 HS + Powershot SX50 HS, + Powershot SX60 HS +
+ +
DxO
+
+ ONE
Epson
@@ -134,16 +161,23 @@ Finepix S7000, Finepix S9000, Finepix S9500, + GFX 50S, X-A1, + X-A2, + X-A3, + X-A5, X-E1, X-E2, X-M1, X-Pro1, + X-Pro2, X-S1, X-T1, + X-T2, X10, X20, X30, + X70, X100, X100S, X100T, @@ -196,6 +230,7 @@ 1 J1, 1 J2, 1 J3, + 1 J5, 1 S1, 1 V1, 1 V2, @@ -217,6 +252,7 @@ D4S, D40, D40X, + D5, D50, D60, D70, @@ -227,6 +263,7 @@ D200, D300, D300s, + D500, D600, D610, D700, @@ -234,18 +271,22 @@ D800, D800E, D810, + D850, D3000, D3100, D3200, D3300, + D3400, D5000, D5100, D5200, D5300, D5500, + D5600, D7000, D7100, D7200, + D7500, Df, E5400, E8400, @@ -277,8 +318,12 @@ E-520, E-620, E-M1, + E-M1 Mark II, E-M5, + E-M5 Mark II, E-M10, + E-M10 Mark II, + E-M10 Mark III, E-P1, E-P2, E-P3, @@ -295,6 +340,7 @@ SP-350, SP-500UZ, SP-550UZ, + STYLUS 1, XZ-1, XZ-2, XZ-10 @@ -302,6 +348,11 @@
Panasonic
+ DC-FZ80, + DC-G9, + DC-GH5, + DC-ZS200, + DMC-CM1, DMC-FZ18, DMC-FZ28, DMC-FZ30, @@ -310,11 +361,14 @@ DMC-FZ150, DMC-FZ200, DMC-FZ1000, + DMC-FZ2500, DMC-G1, DMC-G2, DMC-G3, DMC-G5, DMC-G6, + DMC-G7, + DMC-G85, DMC-GF1, DMC-GF2, DMC-GF3, @@ -326,6 +380,8 @@ DMC-GM1, DMC-GX1, DMC-GX7, + DMC-GX8, + DMC-GX85, DMC-L1, DMC-L10, DMC-LC1, @@ -335,27 +391,35 @@ DMC-LX3, DMC-LX5, DMC-LX7, - DMC-LX100 + DMC-LX100, + DMC-ZS40, + DMC-ZS50, + DMC-ZS100
Pentax
645D, + 645Z, + K-1, K-3, + K-3 II, K-5, K-5 II, K-5 II s, K-7, K-50, + K-70, K-500, K-r, + K-S2, K-x, K10D, K20D, K100D, K100D Super, K200D, - K2000, + K2000D, Km, Q7, *ist D, @@ -367,17 +431,12 @@
Caplio GX100, GR, + GR II, GXR MOUNT A12
Samsung
- GX10, - GX20 -
- -
Samsung
-
EK-GN120, EX2F, GX-1S, @@ -386,9 +445,10 @@ NX1, NX10, NX20, - NX30 NX100, NX200, + NX300, + NX500, NX1000, NX2000, NX mini @@ -398,10 +458,15 @@
DSC-R1, DSC-RX1R, + DSC-RX1RM2, DSC-RX10, + DSC-RX10M2, + DSC-RX10M3, DSC-RX100, DSC-RX100M2, DSC-RX100M3, + DSC-RX100M4, + DSC-RX100M5, A100, A200, A230, @@ -415,12 +480,23 @@ A700, A850, A900, + ILCA-77M2, + ILCA-99M2, ILCE-7, + ILCE-7M2, + ILCE-7M3, ILCE-7R, + ILCE-7RM2, + ILCE-7RM3, + ILCE-7S, + ILCE-7SM2, + ILCE-9, ILCE-3000, ILCE-5000, ILCE-5100, ILCE-6000, + ILCE-6300, + ILCE-6500, ILCE-QX1, NEX-3, NEX-3N, diff -Nru lightzone-4.2.2/lightcrafts/help/Spanish/Default_Tone_Curve.html lightzone-4.2.3/lightcrafts/help/Spanish/Default_Tone_Curve.html --- lightzone-4.2.2/lightcrafts/help/Spanish/Default_Tone_Curve.html 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/help/Spanish/Default_Tone_Curve.html 2021-04-17 01:19:49.000000000 +0000 @@ -71,13 +71,17 @@ EOS 1D Mark III, EOS 1D Mark IV, EOS 1D X, + EOS 1D X Mark II, EOS 1Ds, EOS 1Ds Mark II, EOS 1Ds Mark III, EOS 5D, EOS 5D Mark II, EOS 5D Mark III, + EOS 5D Mark IV, + EOS 5Ds, EOS 6D, + EOS 6D Mark II, EOS 7D, EOS 7D Mark II, EOS 10D, @@ -87,31 +91,47 @@ EOS 50D, EOS 60D, EOS 70D, - EOS 100D, - EOS 450D, - EOS 1000D, - EOS 1100D, - EOS 1200D, + EOS 77D, + EOS 80D, + EOS 100D (Digital Rebel SL1, Kiss X7), + EOS 200D (Digital Rebel SL2, Kiss X9), + EOS 300D (Digital Rebel, Kiss Digital), + EOS 350D (Digital Rebel XT, Kiss Digital N), + EOS 400D (Digital Rebel XTi, Kiss Digital X), + EOS 450D (Digital Rebel XSi, Kiss X2), + EOS 500D (Digital Rebel T1i, Kiss X3), + EOS 550D (Digital Rebel T2i, Kiss X4), + EOS 600D (Digital Rebel T3i, Kiss X5), + EOS 650D (Digital Rebel T4i, Kiss X6i), + EOS 700D (Digital Rebel T5i, Kiss X7i), + EOS 750D (Digital Rebel T6i, Kiss X8i), + EOS 760D (Digital Rebel T6s, 8000D), + EOS 800D (Digital Rebel T7i, Kiss X9i), + EOS 1000D (Digital Rebel XS, Kiss F), + EOS 1100D (Digital Rebel T3, Kiss X50), + EOS 1200D (Digital Rebel T5, Kiss X70), + EOS 1300D (Digital Rebel T6, Kiss X80), EOS D30, EOS D60, EOS M, - EOS Digital Rebel (300D), - EOS Digital Rebel XT (350D), - EOS Digital Rebel XTi (400D), - EOS Digital Rebel XSi (450D), - EOS Digital Rebel T1i (500D), - EOS Digital Rebel T2i (550D), - EOS Digital Rebel T3i (600D), - EOS Digital Rebel T4i (650D), - EOS Digital Rebel T5i (700D), - EOS Kiss Digital, + EOS M3, + EOS M5, + EOS M6, + EOS M10, + EOS M100, Powershot G1 X, + Powershot G1 X Mark III, Powershot G2, Powershot G3, + Powershot G3 X, Powershot G5, + Powershot G5 X, Powershot G6, Powershot G7 X, + Powershot G7 X Mark II, Powershot G9, + Powershot G9 X, + Powershot G9 X Mark II, Powershot G10, Powershot G11, Powershot G12, @@ -126,7 +146,13 @@ Powershot S100, Powershot S110, Powershot S120, - Powershot SX50 HS + Powershot SX50 HS, + Powershot SX60 HS +
+ +
DxO
+
+ ONE
Epson
@@ -155,16 +181,23 @@ Finepix S7000, Finepix S9000, Finepix S9500, + GFX 50S, X-A1, + X-A2, + X-A3, + X-A5, X-E1, X-E2, X-M1, X-Pro1, + X-Pro2, X-S1, X-T1, + X-T2, X10, X20, X30, + X70, X100, X100S, X100T, @@ -217,6 +250,7 @@ 1 J1, 1 J2, 1 J3, + 1 J5, 1 S1, 1 V1, 1 V2, @@ -238,6 +272,7 @@ D4S, D40, D40X, + D5, D50, D60, D70, @@ -248,6 +283,7 @@ D200, D300, D300s, + D500, D600, D610, D700, @@ -255,18 +291,22 @@ D800, D800E, D810, + D850, D3000, D3100, D3200, D3300, + D3400, D5000, D5100, D5200, D5300, D5500, + D5600, D7000, D7100, D7200, + D7500, Df, E5400, E8400, @@ -298,8 +338,12 @@ E-520, E-620, E-M1, + E-M1 Mark II, E-M5, + E-M5 Mark II, E-M10, + E-M10 Mark II, + E-M10 Mark III, E-P1, E-P2, E-P3, @@ -316,6 +360,7 @@ SP-350, SP-500UZ, SP-550UZ, + STYLUS 1, XZ-1, XZ-2, XZ-10 @@ -323,6 +368,11 @@
Panasonic
+ DC-FZ80, + DC-G9, + DC-GH5, + DC-ZS200, + DMC-CM1, DMC-FZ18, DMC-FZ28, DMC-FZ30, @@ -331,11 +381,14 @@ DMC-FZ150, DMC-FZ200, DMC-FZ1000, + DMC-FZ2500, DMC-G1, DMC-G2, DMC-G3, DMC-G5, DMC-G6, + DMC-G7, + DMC-G85, DMC-GF1, DMC-GF2, DMC-GF3, @@ -347,6 +400,8 @@ DMC-GM1, DMC-GX1, DMC-GX7, + DMC-GX8, + DMC-GX85, DMC-L1, DMC-L10, DMC-LC1, @@ -356,27 +411,35 @@ DMC-LX3, DMC-LX5, DMC-LX7, - DMC-LX100 + DMC-LX100, + DMC-ZS40, + DMC-ZS50, + DMC-ZS100
Pentax
645D, + 645Z, + K-1, K-3, + K-3 II, K-5, K-5 II, K-5 II s, K-7, K-50, + K-70, K-500, K-r, + K-S2, K-x, K10D, K20D, K100D, K100D Super, K200D, - K2000, + K2000D, Km, Q7, *ist D, @@ -388,17 +451,12 @@
Caplio GX100, GR, + GR II, GXR MOUNT A12
Samsung
- GX10, - GX20 -
- -
Samsung
-
EK-GN120, EX2F, GX-1S, @@ -407,9 +465,10 @@ NX1, NX10, NX20, - NX30 NX100, NX200, + NX300, + NX500, NX1000, NX2000, NX mini @@ -419,10 +478,15 @@
DSC-R1, DSC-RX1R, + DSC-RX1RM2, DSC-RX10, + DSC-RX10M2, + DSC-RX10M3, DSC-RX100, DSC-RX100M2, DSC-RX100M3, + DSC-RX100M4, + DSC-RX100M5, A100, A200, A230, @@ -436,12 +500,23 @@ A700, A850, A900, + ILCA-77M2, + ILCA-99M2, ILCE-7, + ILCE-7M2, + ILCE-7M3, ILCE-7R, + ILCE-7RM2, + ILCE-7RM3, + ILCE-7S, + ILCE-7SM2, + ILCE-9, ILCE-3000, ILCE-5000, ILCE-5100, ILCE-6000, + ILCE-6300, + ILCE-6500, ILCE-QX1, NEX-3, NEX-3N, @@ -482,4 +557,4 @@ - \ No newline at end of file + diff -Nru lightzone-4.2.2/lightcrafts/ivy.xml lightzone-4.2.3/lightcrafts/ivy.xml --- lightzone-4.2.2/lightcrafts/ivy.xml 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/ivy.xml 2021-04-17 01:19:49.000000000 +0000 @@ -2,16 +2,19 @@ - - - - - - - - - - + + + + + + + + + + + + + diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/arrays/GNUmakefile lightzone-4.2.3/lightcrafts/jnisrc/arrays/GNUmakefile --- lightzone-4.2.2/lightcrafts/jnisrc/arrays/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/arrays/GNUmakefile 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -TARGET_BASE:= LCArrays - -# Uncomment to compile in debug mode. -#DEBUG:= true - -JNI_WINDOWS_LINK:= -lLCJNI -JNI_LINUX_LINK:= $(JNI_WINDOWS_LINK) -JNI_MACOSX_LINK:= ../jniutils/libLCJNI.a - -JAVAH_CLASSES:= com.lightcrafts.utils.LCArrays - -ROOT:= ../../.. -include ../jni.mk - -# vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/arrays/JNI_OnLoad.cpp lightzone-4.2.3/lightcrafts/jnisrc/arrays/JNI_OnLoad.cpp --- lightzone-4.2.2/lightcrafts/jnisrc/arrays/JNI_OnLoad.cpp 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/arrays/JNI_OnLoad.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -/* Copyright (C) 2005-2011 Fabio Riccardi */ - -// standard -#include - -// local -#include "LC_JNIUtils.h" - -/** - * This is called by the Java class-loader. - */ -JNIEXPORT jint JNICALL JNI_OnLoad( JavaVM *jvm, void* ) { - g_jvm = jvm; - return JNI_VERSION_1_4; -} - -/* vim:set et sw=4 ts=4: */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/arrays/LCArrays.cpp lightzone-4.2.3/lightcrafts/jnisrc/arrays/LCArrays.cpp --- lightzone-4.2.2/lightcrafts/jnisrc/arrays/LCArrays.cpp 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/arrays/LCArrays.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -/* Copyright (C) 2005-2011 Fabio Riccardi */ - -/** - * LCArrays - * - * Paul J. Lucas [paul@lightcrafts.com] - */ - -// standard -#include /* for memcpy(3) */ - -// local -#include "LC_JNIUtils.h" -#ifndef AUTO_DEP -#include "javah/com_lightcrafts_utils_LCArrays.h" -#endif - -using namespace std; -using namespace LightCrafts; - -////////// JNI //////////////////////////////////////////////////////////////// - -#define LCArrays_METHOD(method) \ - name4(Java_,com_lightcrafts_utils_LCArrays,_,method) - -/** - * Copy the raw bytes from an int[] to a byte[]. - */ -JNIEXPORT void JNICALL LCArrays_METHOD(copy___3II_3BII) - ( JNIEnv *env, jclass, jintArray jSrc, jint srcPos, - jbyteArray jDest, jint destPos, jint length ) -{ - jarray_to_c const cSrc( env, jSrc ); - jarray_to_c cDest( env, jDest ); - ::memcpy( cDest + destPos, cSrc + srcPos, length ); -} - -/** - * Copy the raw bytes from a short[] to a byte[]. - */ -JNIEXPORT void JNICALL LCArrays_METHOD(copy___3SI_3BII) - ( JNIEnv *env, jclass, jshortArray jSrc, jint srcPos, - jbyteArray jDest, jint destPos, jint length ) -{ - jarray_to_c const cSrc( env, jSrc ); - jarray_to_c cDest( env, jDest ); - ::memcpy( cDest + destPos, cSrc + srcPos, length ); -} - -/** - * Copy the raw bytes from a byte[] to an int[]. - */ -JNIEXPORT void JNICALL LCArrays_METHOD(copy___3BI_3III) - ( JNIEnv *env, jclass, jbyteArray jSrc, jint srcPos, - jintArray jDest, jint destPos, jint length ) -{ - jarray_to_c const cSrc( env, jSrc ); - jarray_to_c cDest( env, jDest ); - ::memcpy( cDest + destPos, cSrc + srcPos, length ); -} - -/** - * Copy the raw bytes from a byte[] to a short[]. - */ -JNIEXPORT void JNICALL LCArrays_METHOD(copy___3BI_3SII) - ( JNIEnv *env, jclass, jbyteArray jSrc, jint srcPos, - jshortArray jDest, jint destPos, jint length ) -{ - jarray_to_c const cSrc( env, jSrc ); - jarray_to_c cDest( env, jDest ); - ::memcpy( cDest + destPos, cSrc + srcPos, length ); -} -/* vim:set et sw=4 ts=4: */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/EDISON/GNUmakefile lightzone-4.2.3/lightcrafts/jnisrc/EDISON/GNUmakefile --- lightzone-4.2.2/lightcrafts/jnisrc/EDISON/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/EDISON/GNUmakefile 2021-04-17 01:19:49.000000000 +0000 @@ -22,7 +22,7 @@ SEGM_LDFLAGS:= $(PLATFORM_LDFLAGS) ifeq ($(UNIVERSAL),1) -$(TARGET_PPC) $(TARGET_X86): $(SEGM_SOURCE) $(SEGM_INCLUDES) +$(TARGET_ARM) $(TARGET_X86): $(SEGM_SOURCE) $(SEGM_INCLUDES) -$(MKDIR) $(TARGET_DIR) $(CC_LINK) $(CFLAGS) $(INCLUDES) $(SEGM_LDFLAGS) -o $@ $(SEGM_SOURCE) ifeq ($(PLATFORM),MacOSX) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/GNUmakefile lightzone-4.2.3/lightcrafts/jnisrc/GNUmakefile --- lightzone-4.2.2/lightcrafts/jnisrc/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/GNUmakefile 2021-04-17 01:19:49.000000000 +0000 @@ -2,7 +2,6 @@ # We need to specify this manually so that jniutils is built first. ## SUBDIRS:= jniutils \ - arrays \ cache \ dcraw \ EDISON \ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/dvec.h lightzone-4.2.3/lightcrafts/jnisrc/include/dvec.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/dvec.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/dvec.h 2021-04-17 01:19:49.000000000 +0000 @@ -115,13 +115,16 @@ I64vec2& operator -=(const I64vec2 &a) { return *this = (I64vec2) _mm_sub_epi64(vec,a); } I64vec2 operator<<(const I64vec2 &a) { return _mm_sll_epi64(vec,a); } - I64vec2 operator<<(int count) { return _mm_slli_epi64(vec,count); } I64vec2& operator<<=(const I64vec2 &a) { return *this = (I64vec2) _mm_sll_epi64(vec,a); } - I64vec2& operator<<=(int count) { return *this = (I64vec2) _mm_slli_epi64(vec,count); } I64vec2 operator>>(const I64vec2 &a) { return _mm_srl_epi64(vec,a); } - I64vec2 operator>>(int count) { return _mm_srli_epi64(vec,count); } I64vec2& operator>>=(const I64vec2 &a) { return *this = (I64vec2) _mm_srl_epi64(vec,a); } + + #if !defined __clang__ && !defined __aarch64__ + I64vec2 operator<<(int count) { return _mm_slli_epi64(vec,count); } + I64vec2& operator<<=(int count) { return *this = (I64vec2) _mm_slli_epi64(vec,count); } + I64vec2 operator>>(int count) { return _mm_srli_epi64(vec,count); } I64vec2& operator>>=(int count) { return *this = (I64vec2) _mm_srli_epi64(vec,count); } + #endif const int64_t& operator[](int i)const { @@ -156,10 +159,12 @@ I32vec4& operator -=(const I32vec4 &a) { return *this = (I32vec4)_mm_sub_epi32(vec,a); } I32vec4 operator<<(const I32vec4 &a) { return _mm_sll_epi32(vec,a); } - I32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); } I32vec4& operator<<=(const I32vec4 &a) { return *this = (I32vec4)_mm_sll_epi32(vec,a); } - I32vec4& operator<<=(int count) { return *this = (I32vec4)_mm_slli_epi32(vec,count); } + #if !defined __clang__ && !defined __aarch64__ + I32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); } + I32vec4& operator<<=(int count) { return *this = (I32vec4)_mm_slli_epi32(vec,count); } + #endif }; inline I32vec4 cmpeq(const I32vec4 &a,const I32vec4 &b) { return _mm_cmpeq_epi32(a,b); } @@ -191,14 +196,16 @@ Is32vec4& operator -=(const I32vec4 &a) { return *this = (Is32vec4)_mm_sub_epi32(vec,a); } Is32vec4 operator<<(const M128 &a) { return _mm_sll_epi32(vec,a); } - Is32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); } Is32vec4& operator<<=(const M128 &a) { return *this = (Is32vec4)_mm_sll_epi32(vec,a); } - Is32vec4& operator<<=(int count) { return *this = (Is32vec4)_mm_slli_epi32(vec,count); } - Is32vec4 operator>>(const M128 &a) { return _mm_sra_epi32(vec,a); } - Is32vec4 operator>>(int count) { return _mm_srai_epi32(vec,count); } Is32vec4& operator>>=(const M128 &a) { return *this = (Is32vec4) _mm_sra_epi32(vec,a); } + + #if !defined __clang__ && !defined __aarch64__ + Is32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); } + Is32vec4& operator<<=(int count) { return *this = (Is32vec4)_mm_slli_epi32(vec,count); } + Is32vec4 operator>>(int count) { return _mm_srai_epi32(vec,count); } Is32vec4& operator>>=(int count) { return *this = (Is32vec4) _mm_srai_epi32(vec,count); } + #endif #if defined(_ENABLE_VEC_DEBUG) @@ -256,13 +263,16 @@ Iu32vec4& operator -=(const I32vec4 &a) { return *this = (Iu32vec4)_mm_sub_epi32(vec,a); } Iu32vec4 operator<<(const M128 &a) { return _mm_sll_epi32(vec,a); } - Iu32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); } Iu32vec4& operator<<=(const M128 &a) { return *this = (Iu32vec4)_mm_sll_epi32(vec,a); } - Iu32vec4& operator<<=(int count) { return *this = (Iu32vec4)_mm_slli_epi32(vec,count); } Iu32vec4 operator>>(const M128 &a) { return _mm_srl_epi32(vec,a); } - Iu32vec4 operator>>(int count) { return _mm_srli_epi32(vec,count); } Iu32vec4& operator>>=(const M128 &a) { return *this = (Iu32vec4) _mm_srl_epi32(vec,a); } + + #if !defined __clang__ && !defined __aarch64__ Iu32vec4& operator>>=(int count) { return *this = (Iu32vec4) _mm_srli_epi32(vec,count); } + Iu32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); } + Iu32vec4& operator<<=(int count) { return *this = (Iu32vec4)_mm_slli_epi32(vec,count); } + Iu32vec4 operator>>(int count) { return _mm_srli_epi32(vec,count); } + #endif #if defined(_ENABLE_VEC_DEBUG) @@ -313,9 +323,12 @@ I16vec8& operator *=(const I16vec8 &a) { return *this = (I16vec8) _mm_mullo_epi16(vec,a); } I16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); } - I16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); } I16vec8& operator<<=(const M128 &a) { return *this = (I16vec8)_mm_sll_epi16(vec,a); } + + #if !defined __clang__ && !defined __aarch64__ I16vec8& operator<<=(int count) { return *this = (I16vec8)_mm_slli_epi16(vec,count); } + I16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); } + #endif }; @@ -355,14 +368,17 @@ Is16vec8& operator *=(const I16vec8 &a) { return *this = (Is16vec8) _mm_mullo_epi16(vec,a); } Is16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); } - Is16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); } Is16vec8& operator<<=(const M128 &a) { return *this = (Is16vec8)_mm_sll_epi16(vec,a); } - Is16vec8& operator<<=(int count) { return *this = (Is16vec8)_mm_slli_epi16(vec,count); } Is16vec8 operator>>(const M128 &a) { return _mm_sra_epi16(vec,a); } - Is16vec8 operator>>(int count) { return _mm_srai_epi16(vec,count); } Is16vec8& operator>>=(const M128 &a) { return *this = (Is16vec8)_mm_sra_epi16(vec,a); } + + #if !defined __clang__ && !defined __aarch64__ + Is16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); } + Is16vec8& operator<<=(int count) { return *this = (Is16vec8)_mm_slli_epi16(vec,count); } + Is16vec8 operator>>(int count) { return _mm_srai_epi16(vec,count); } Is16vec8& operator>>=(int count) { return *this = (Is16vec8)_mm_srai_epi16(vec,count); } + #endif #if defined(_ENABLE_VEC_DEBUG) @@ -440,13 +456,16 @@ Iu16vec8& operator *=(const I16vec8 &a) { return *this = (Iu16vec8) _mm_mullo_epi16(vec,a); } Iu16vec8 operator<<(const M128 &a) { return _mm_sll_epi16(vec,a); } - Iu16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); } Iu16vec8& operator<<=(const M128 &a) { return *this = (Iu16vec8)_mm_sll_epi16(vec,a); } - Iu16vec8& operator<<=(int count) { return *this = (Iu16vec8)_mm_slli_epi16(vec,count); } Iu16vec8 operator>>(const M128 &a) { return _mm_srl_epi16(vec,a); } - Iu16vec8 operator>>(int count) { return _mm_srli_epi16(vec,count); } Iu16vec8& operator>>=(const M128 &a) { return *this = (Iu16vec8) _mm_srl_epi16(vec,a); } + + #if !defined __clang__ && !defined __aarch64__ + Iu16vec8 operator<<(int count) { return _mm_slli_epi16(vec,count); } + Iu16vec8& operator<<=(int count) { return *this = (Iu16vec8)_mm_slli_epi16(vec,count); } + Iu16vec8 operator>>(int count) { return _mm_srli_epi16(vec,count); } Iu16vec8& operator>>=(int count) { return *this = (Iu16vec8) _mm_srli_epi16(vec,count); } + #endif #if defined(_ENABLE_VEC_DEBUG) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/addw_high.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/addw_high.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/addw_high.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/addw_high.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) +#define SIMDE_ARM_NEON_ADDW_HIGH_H + +#include "types.h" +#include "movl.h" +#include "add.h" +#include "get_high.h" +#include "get_low.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vaddw_high_s8(simde_int16x8_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s16(a, simde_vmovl_s8(simde_vget_high_s8(b))); + #else + simde_int16x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_s8 + #define vaddw_high_s8(a, b) simde_vaddw_high_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vaddw_high_s16(simde_int32x4_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s32(a, simde_vmovl_s16(simde_vget_high_s16(b))); + #else + simde_int32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_int16x8_private b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_s16 + #define vaddw_high_s16(a, b) simde_vaddw_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vaddw_high_s32(simde_int64x2_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s64(a, simde_vmovl_s32(simde_vget_high_s32(b))); + #else + simde_int64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_int32x4_private b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_s32 + #define vaddw_high_s32(a, b) simde_vaddw_high_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vaddw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u16(a, simde_vmovl_u8(simde_vget_high_u8(b))); + #else + simde_uint16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_uint8x16_private b_ = simde_uint8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_u8 + #define vaddw_high_u8(a, b) simde_vaddw_high_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vaddw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u32(a, simde_vmovl_u16(simde_vget_high_u16(b))); + #else + simde_uint32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_uint16x8_private b_ = simde_uint16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_u16 + #define vaddw_high_u16(a, b) simde_vaddw_high_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vaddw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u64(a, simde_vmovl_u32(simde_vget_high_u32(b))); + #else + simde_uint64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_uint32x4_private b_ = simde_uint32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_u32 + #define vaddw_high_u32(a, b) simde_vaddw_high_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/bic.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/bic.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/bic.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/bic.h 2021-04-17 01:19:49.000000000 +0000 @@ -243,7 +243,7 @@ return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_andc(a, b); #else simde_int8x16_private @@ -330,7 +330,7 @@ return _mm_andnot_si128(b, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_andnot(a, b); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_andc(a, b); #else simde_int64x2_private diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/bsl.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/bsl.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/bsl.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/bsl.h 2021-04-17 01:19:49.000000000 +0000 @@ -228,6 +228,8 @@ return vbslq_f32(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_castsi128_ps(_mm_ternarylogic_epi32(a, _mm_castps_si128(b), _mm_castps_si128(c), 0xca)); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u32(a), @@ -249,6 +251,8 @@ return vbslq_f64(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_castsi128_pd(_mm_ternarylogic_epi32(a, _mm_castpd_si128(b), _mm_castpd_si128(c), 0xca)); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u64(a), @@ -269,6 +273,8 @@ return vbslq_s8(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = (a), @@ -291,6 +297,8 @@ return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sel(c, b, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u16(a), @@ -311,6 +319,8 @@ return vbslq_s32(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u32(a), @@ -331,6 +341,8 @@ return vbslq_s64(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u64(a), @@ -353,6 +365,8 @@ return wasm_v128_bitselect(b, c, a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_sel(c, b, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xca); #else return simde_veorq_u8(c, simde_vandq_u8(simde_veorq_u8(c, b), a)); #endif @@ -369,6 +383,8 @@ return vbslq_u16(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u16(a), @@ -389,6 +405,8 @@ return vbslq_u32(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u32(a), @@ -409,6 +427,8 @@ return vbslq_u64(a, b, c); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_bitselect(b, c, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xca); #else simde_uint8x16_t a_ = simde_vreinterpretq_u8_u64(a), diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/cls.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/cls.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/cls.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/cls.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_CLS_H) +#define SIMDE_ARM_NEON_CLS_H + +#include "types.h" +#include "bsl.h" +#include "clz.h" +#include "cltz.h" +#include "dup_n.h" +#include "mvn.h" +#include "sub.h" +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vcls_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcls_s8(a); + #else + return simde_vsub_s8(simde_vclz_s8(simde_vbsl_s8(simde_vcltz_s8(a), simde_vmvn_s8(a), a)), simde_vdup_n_s8(INT8_C(1))); + #endif +} +#define simde_vcls_u8(a) simde_vcls_s8(simde_vreinterpret_s8_u8(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcls_s8 + #define vcls_s8(a) simde_vcls_s8(a) + #undef vcls_u8 + #define vcls_u8(a) simde_vcls_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcls_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcls_s16(a); + #else + return simde_vsub_s16(simde_vclz_s16(simde_vbsl_s16(simde_vcltz_s16(a), simde_vmvn_s16(a), a)), simde_vdup_n_s16(INT16_C(1))); + #endif +} +#define simde_vcls_u16(a) simde_vcls_s16(simde_vreinterpret_s16_u16(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcls_s16 + #define vcls_s16(a) simde_vcls_s16(a) + #undef vcls_u16 + #define vcls_u16(a) simde_vcls_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcls_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcls_s32(a); + #else + return simde_vsub_s32(simde_vclz_s32(simde_vbsl_s32(simde_vcltz_s32(a), simde_vmvn_s32(a), a)), simde_vdup_n_s32(INT32_C(1))); + #endif +} +#define simde_vcls_u32(a) simde_vcls_s32(simde_vreinterpret_s32_u32(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcls_s32 + #define vcls_s32(a) simde_vcls_s32(a) + #undef vcls_u32 + #define vcls_u32(a) simde_vcls_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vclsq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclsq_s8(a); + #else + return simde_vsubq_s8(simde_vclzq_s8(simde_vbslq_s8(simde_vcltzq_s8(a), simde_vmvnq_s8(a), a)), simde_vdupq_n_s8(INT8_C(1))); + #endif +} +#define simde_vclsq_u8(a) simde_vclsq_s8(simde_vreinterpretq_s8_u8(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclsq_s8 + #define vclsq_s8(a) simde_vclsq_s8(a) + #undef vclsq_u8 + #define vclsq_u8(a) simde_vclsq_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vclsq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclsq_s16(a); + #else + return simde_vsubq_s16(simde_vclzq_s16(simde_vbslq_s16(simde_vcltzq_s16(a), simde_vmvnq_s16(a), a)), simde_vdupq_n_s16(INT16_C(1))); + #endif +} +#define simde_vclsq_u16(a) simde_vclsq_s16(simde_vreinterpretq_s16_u16(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclsq_s16 + #define vclsq_s16(a) simde_vclsq_s16(a) + #undef vclsq_u16 + #define vclsq_u16(a) simde_vclsq_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vclsq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclsq_s32(a); + #else + return simde_vsubq_s32(simde_vclzq_s32(simde_vbslq_s32(simde_vcltzq_s32(a), simde_vmvnq_s32(a), a)), simde_vdupq_n_s32(INT32_C(1))); + #endif +} +#define simde_vclsq_u32(a) simde_vclsq_s32(simde_vreinterpretq_s32_u32(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclsq_s32 + #define vclsq_s32(a) simde_vclsq_s32(a) + #undef vclsq_u32 + #define vclsq_u32(a) simde_vclsq_u32(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CLS_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/cltz.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/cltz.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/cltz.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/cltz.h 2021-04-17 01:19:49.000000000 +0000 @@ -184,7 +184,7 @@ simde_float64x2_private a_ = simde_float64x2_to_private(a); simde_uint64x2_private r_; - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && 0 + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0)); #else SIMDE_VECTORIZE diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/clz.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/clz.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/clz.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/clz.h 2021-04-17 01:19:49.000000000 +0000 @@ -13,7 +13,7 @@ * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE CLZ + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN @@ -283,9 +283,9 @@ return vclzq_s8(a); #elif defined(SIMDE_X86_GFNI_NATIVE) /* https://gist.github.com/animetosho/6cb732ccb5ecd86675ca0a442b3c0622 */ - a = _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(0x80402010, 0x08040201, 0x80402010, 0x08040201), 0); - a = _mm_andnot_si128(_mm_add_epi8(a, _mm_set1_epi8(0xff)), a); - return _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(0xaaccf0ff, 0, 0xaaccf0ff, 0), 8); + a = _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0); + a = _mm_andnot_si128(_mm_add_epi8(a, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a); + return _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), @@ -353,9 +353,9 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vclzq_u8(a); #elif defined(SIMDE_X86_GFNI_NATIVE) - a = _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(0x80402010, 0x08040201, 0x80402010, 0x08040201), 0); - a = _mm_andnot_si128(_mm_add_epi8(a, _mm_set1_epi8(0xff)), a); - return _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(0xaaccf0ff, 0, 0xaaccf0ff, 0), 8); + a = _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0); + a = _mm_andnot_si128(_mm_add_epi8(a, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a); + return _mm_gf2p8affine_epi64_epi8(a, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a), diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/cnt.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/cnt.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/cnt.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/cnt.h 2021-04-17 01:19:49.000000000 +0000 @@ -94,6 +94,8 @@ simde_vcntq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcntq_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a))); #else simde_int8x16_private r_, @@ -117,6 +119,8 @@ simde_vcntq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vcntq_u8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_popcnt(a); #else simde_uint8x16_private r_, diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/ld1.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/ld1.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/ld1.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/ld1.h 2021-04-17 01:19:49.000000000 +0000 @@ -200,9 +200,6 @@ return vld1q_f32(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_ps(ptr); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 - (void) ptr; - return vec_ld(0, HEDLEY_REINTERPRET_CAST(const float*, ptr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else @@ -243,9 +240,6 @@ return vld1q_s8(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 - (void) ptr; - return vec_ld(0, ptr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else @@ -266,9 +260,6 @@ return vld1q_s16(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 - (void) ptr; - return vec_ld(0, ptr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else @@ -289,9 +280,6 @@ return vld1q_s32(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 - (void) ptr; - return vec_ld(0, ptr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else @@ -332,9 +320,6 @@ return vld1q_u8(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 - (void) ptr; - return vec_ld(0, ptr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else @@ -355,9 +340,6 @@ return vld1q_u16(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 - (void) ptr; - return vec_ld(0, ptr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else @@ -378,9 +360,6 @@ return vld1q_u32(ptr); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(const __m128i*, ptr)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && 0 - (void) ptr; - return vec_ld(0, ptr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_load(ptr); #else diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/ld3.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/ld3.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/ld3.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/ld3.h 2021-04-17 01:19:49.000000000 +0000 @@ -29,9 +29,13 @@ #define SIMDE_ARM_NEON_LD3_H #include "types.h" +#include "ld1.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if defined(HEDLEY_GCC_VERSION) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) @@ -42,12 +46,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_f32(ptr); #else - simde_float32x2_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_float32x2_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_float32x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_float32x2x3_t s_ = { { simde_float32x2_from_private(a_[0]), simde_float32x2_from_private(a_[1]), simde_float32x2_from_private(a_[2]) } }; - return (s_); + + simde_float32x2x3_t r = { { + simde_float32x2_from_private(r_[0]), + simde_float32x2_from_private(r_[1]), + simde_float32x2_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -61,12 +74,21 @@ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3_f64(ptr); #else - simde_float64x1_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_float64x1_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_float64x1_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_float64x1x3_t s_ = { { simde_float64x1_from_private(a_[0]), simde_float64x1_from_private(a_[1]), simde_float64x1_from_private(a_[2]) } }; - return s_; + + simde_float64x1x3_t r = { { + simde_float64x1_from_private(r_[0]), + simde_float64x1_from_private(r_[1]), + simde_float64x1_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -80,12 +102,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_s8(ptr); #else - simde_int8x8_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_int8x8_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_int8x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_int8x8x3_t s_ = { { simde_int8x8_from_private(a_[0]), simde_int8x8_from_private(a_[1]), simde_int8x8_from_private(a_[2]) } }; - return s_; + + simde_int8x8x3_t r = { { + simde_int8x8_from_private(r_[0]), + simde_int8x8_from_private(r_[1]), + simde_int8x8_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -99,12 +130,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_s16(ptr); #else - simde_int16x4_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_int16x4_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_int16x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_int16x4x3_t s_ = { { simde_int16x4_from_private(a_[0]), simde_int16x4_from_private(a_[1]), simde_int16x4_from_private(a_[2]) } }; - return s_; + + simde_int16x4x3_t r = { { + simde_int16x4_from_private(r_[0]), + simde_int16x4_from_private(r_[1]), + simde_int16x4_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -118,12 +158,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_s32(ptr); #else - simde_int32x2_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_int32x2_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_int32x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_int32x2x3_t s_ = { { simde_int32x2_from_private(a_[0]), simde_int32x2_from_private(a_[1]), simde_int32x2_from_private(a_[2]) } }; - return s_; + + simde_int32x2x3_t r = { { + simde_int32x2_from_private(r_[0]), + simde_int32x2_from_private(r_[1]), + simde_int32x2_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -137,12 +186,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_s64(ptr); #else - simde_int64x1_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_int64x1_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_int64x1_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_int64x1x3_t s_ = { { simde_int64x1_from_private(a_[0]), simde_int64x1_from_private(a_[1]), simde_int64x1_from_private(a_[2]) } }; - return s_; + + simde_int64x1x3_t r = { { + simde_int64x1_from_private(r_[0]), + simde_int64x1_from_private(r_[1]), + simde_int64x1_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -156,12 +214,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_u8(ptr); #else - simde_uint8x8_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_uint8x8_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_uint8x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_uint8x8x3_t s_ = { { simde_uint8x8_from_private(a_[0]), simde_uint8x8_from_private(a_[1]), simde_uint8x8_from_private(a_[2]) } }; - return s_; + + simde_uint8x8x3_t r = { { + simde_uint8x8_from_private(r_[0]), + simde_uint8x8_from_private(r_[1]), + simde_uint8x8_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -175,12 +242,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_u16(ptr); #else - simde_uint16x4_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_uint16x4_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_uint16x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_uint16x4x3_t s_ = { { simde_uint16x4_from_private(a_[0]), simde_uint16x4_from_private(a_[1]), simde_uint16x4_from_private(a_[2]) } }; - return s_; + + simde_uint16x4x3_t r = { { + simde_uint16x4_from_private(r_[0]), + simde_uint16x4_from_private(r_[1]), + simde_uint16x4_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -194,12 +270,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_u32(ptr); #else - simde_uint32x2_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_uint32x2_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_uint32x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_uint32x2x3_t s_ = { { simde_uint32x2_from_private(a_[0]), simde_uint32x2_from_private(a_[1]), simde_uint32x2_from_private(a_[2]) } }; - return s_; + + simde_uint32x2x3_t r = { { + simde_uint32x2_from_private(r_[0]), + simde_uint32x2_from_private(r_[1]), + simde_uint32x2_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -213,12 +298,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3_u64(ptr); #else - simde_uint64x1_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_uint64x1_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_uint64x1_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_uint64x1x3_t s_ = { { simde_uint64x1_from_private(a_[0]), simde_uint64x1_from_private(a_[1]), simde_uint64x1_from_private(a_[2]) } }; - return s_; + + simde_uint64x1x3_t r = { { + simde_uint64x1_from_private(r_[0]), + simde_uint64x1_from_private(r_[1]), + simde_uint64x1_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -232,12 +326,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_f32(ptr); #else - simde_float32x4_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_float32x4_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_float32x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_float32x4x3_t s_ = { { simde_float32x4_from_private(a_[0]), simde_float32x4_from_private(a_[1]), simde_float32x4_from_private(a_[2]) } }; - return s_; + + simde_float32x4x3_t r = { { + simde_float32x4_from_private(r_[0]), + simde_float32x4_from_private(r_[1]), + simde_float32x4_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -251,12 +354,21 @@ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_f64(ptr); #else - simde_float64x2_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_float64x2_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_float64x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_float64x2x3_t s_ = { { simde_float64x2_from_private(a_[0]), simde_float64x2_from_private(a_[1]), simde_float64x2_from_private(a_[2]) } }; - return s_; + + simde_float64x2x3_t r = { { + simde_float64x2_from_private(r_[0]), + simde_float64x2_from_private(r_[1]), + simde_float64x2_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -270,12 +382,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s8(ptr); #else - simde_int8x16_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_int8x16_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_int8x16_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_int8x16x3_t s_ = { { simde_int8x16_from_private(a_[0]), simde_int8x16_from_private(a_[1]), simde_int8x16_from_private(a_[2]) } }; - return s_; + + simde_int8x16x3_t r = { { + simde_int8x16_from_private(r_[0]), + simde_int8x16_from_private(r_[1]), + simde_int8x16_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -289,12 +410,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s16(ptr); #else - simde_int16x8_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_int16x8_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_int16x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_int16x8x3_t s_ = { { simde_int16x8_from_private(a_[0]), simde_int16x8_from_private(a_[1]), simde_int16x8_from_private(a_[2]) } }; - return s_; + + simde_int16x8x3_t r = { { + simde_int16x8_from_private(r_[0]), + simde_int16x8_from_private(r_[1]), + simde_int16x8_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -308,12 +438,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_s32(ptr); #else - simde_int32x4_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_int32x4_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_int32x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_int32x4x3_t s_ = { { simde_int32x4_from_private(a_[0]), simde_int32x4_from_private(a_[1]), simde_int32x4_from_private(a_[2]) } }; - return s_; + + simde_int32x4x3_t r = { { + simde_int32x4_from_private(r_[0]), + simde_int32x4_from_private(r_[1]), + simde_int32x4_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -327,12 +466,21 @@ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_s64(ptr); #else - simde_int64x2_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_int64x2_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_int64x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_int64x2x3_t s_ = { { simde_int64x2_from_private(a_[0]), simde_int64x2_from_private(a_[1]), simde_int64x2_from_private(a_[2]) } }; - return s_; + + simde_int64x2x3_t r = { { + simde_int64x2_from_private(r_[0]), + simde_int64x2_from_private(r_[1]), + simde_int64x2_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -347,12 +495,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u8(ptr); #else - simde_uint8x16_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_uint8x16_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_uint8x16_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_uint8x16x3_t s_ = { { simde_uint8x16_from_private(a_[0]), simde_uint8x16_from_private(a_[1]), simde_uint8x16_from_private(a_[2]) } }; - return s_; + + simde_uint8x16x3_t r = { { + simde_uint8x16_from_private(r_[0]), + simde_uint8x16_from_private(r_[1]), + simde_uint8x16_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -366,12 +523,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u16(ptr); #else - simde_uint16x8_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_uint16x8_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_uint16x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_uint16x8x3_t s_ = { { simde_uint16x8_from_private(a_[0]), simde_uint16x8_from_private(a_[1]), simde_uint16x8_from_private(a_[2]) } }; - return s_; + + simde_uint16x8x3_t r = { { + simde_uint16x8_from_private(r_[0]), + simde_uint16x8_from_private(r_[1]), + simde_uint16x8_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -385,12 +551,21 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vld3q_u32(ptr); #else - simde_uint32x4_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_uint32x4_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_uint32x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_uint32x4x3_t s_ = { { simde_uint32x4_from_private(a_[0]), simde_uint32x4_from_private(a_[1]), simde_uint32x4_from_private(a_[2]) } }; - return s_; + + simde_uint32x4x3_t r = { { + simde_uint32x4_from_private(r_[0]), + simde_uint32x4_from_private(r_[1]), + simde_uint32x4_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -404,12 +579,21 @@ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld3q_u64(ptr); #else - simde_uint64x2_private a_[3]; - for (size_t i = 0; i < (sizeof(simde_uint64x2_t) / sizeof(*ptr)) * 3 ; i++) { - a_[i % 3].values[i / 3] = ptr[i]; + simde_uint64x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } } - simde_uint64x2x3_t s_ = { { simde_uint64x2_from_private(a_[0]), simde_uint64x2_from_private(a_[1]), simde_uint64x2_from_private(a_[2]) } }; - return s_; + + simde_uint64x2x3_t r = { { + simde_uint64x2_from_private(r_[0]), + simde_uint64x2_from_private(r_[1]), + simde_uint64x2_from_private(r_[2]) + } }; + + return r; #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/ld4.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/ld4.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/ld4.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/ld4.h 2021-04-17 01:19:49.000000000 +0000 @@ -32,6 +32,9 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if defined(HEDLEY_GCC_VERSION) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/max.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/max.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/max.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/max.h 2021-04-17 01:19:49.000000000 +0000 @@ -343,7 +343,7 @@ res = _mm_or_pd(res, _mm_and_pd(_mm_set1_pd(SIMDE_MATH_NAN), nan_mask)); return res; #else - return _mm_max_ps(a, b); + return _mm_max_pd(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(SIMDE_FAST_NANS) return vec_max(a, b); diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/maxnm.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/maxnm.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/maxnm.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/maxnm.h 2021-04-17 01:19:49.000000000 +0000 @@ -170,7 +170,7 @@ r = _mm_or_pd(r, _mm_and_pd(a, bnan)); return r; #else - return _mm_max_ps(a, b); + return _mm_max_pd(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_max(a, b); diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/minnm.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/minnm.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/minnm.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/minnm.h 2021-04-17 01:19:49.000000000 +0000 @@ -170,7 +170,7 @@ r = _mm_or_pd(r, _mm_and_pd(a, bnan)); return r; #else - return _mm_min_ps(a, b); + return _mm_min_pd(a, b); #endif #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return vec_min(a, b); diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mlal_n.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mlal_n.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mlal_n.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mlal_n.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLAL_N_H) +#define SIMDE_ARM_NEON_MLAL_N_H + +#include "movl.h" +#include "dup_n.h" +#include "mla.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlal_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_n_s16(a, b, c); + #else + return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vdupq_n_s32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_n_s16 + #define vmlal_n_s16(a, b, c) simde_vmlal_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlal_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_n_s32(a, b, c); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(simde_vmovl_s32(b)), + c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_n_s32 + #define vmlal_n_s32(a, b, c) simde_vmlal_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlal_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_n_u16(a, b, c); + #else + return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vdupq_n_u32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_n_u16 + #define vmlal_n_u16(a, b, c) simde_vmlal_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlal_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_n_u32(a, b, c); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)), + c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_n_u32 + #define vmlal_n_u32(a, b, c) simde_vmlal_n_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLAL_N_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mlsl_n.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mlsl_n.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mlsl_n.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mlsl_n.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLSL_N_H) +#define SIMDE_ARM_NEON_MLSL_N_H + +#include "mull_n.h" +#include "sub.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsl_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_n_s16(a, b, c); + #else + return simde_vsubq_s32(a, simde_vmull_n_s16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_n_s16 + #define vmlsl_n_s16(a, b, c) simde_vmlsl_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlsl_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_n_s32(a, b, c); + #else + return simde_vsubq_s64(a, simde_vmull_n_s32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_n_s32 + #define vmlsl_n_s32(a, b, c) simde_vmlsl_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsl_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_n_u16(a, b, c); + #else + return simde_vsubq_u32(a, simde_vmull_n_u16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_n_u16 + #define vmlsl_n_u16(a, b, c) simde_vmlsl_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlsl_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_n_u32(a, b, c); + #else + return simde_vsubq_u64(a, simde_vmull_n_u32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_n_u32 + #define vmlsl_n_u32(a, b, c) simde_vmlsl_n_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLSL_N_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/movn_high.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/movn_high.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/movn_high.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/movn_high.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) +#define SIMDE_ARM_NEON_MOVN_HIGH_H + +#include "types.h" +#include "movn.h" +#include "combine.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_s16(r, a); + #else + return simde_vcombine_s8(r, simde_vmovn_s16(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_s16 + #define vmovn_high_s16(r, a) simde_vmovn_high_s16((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_s32(r, a); + #else + return simde_vcombine_s16(r, simde_vmovn_s32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_s32 + #define vmovn_high_s32(r, a) simde_vmovn_high_s32((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_s64(r, a); + #else + return simde_vcombine_s32(r, simde_vmovn_s64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_s64 + #define vmovn_high_s64(r, a) simde_vmovn_high_s64((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_u16(r, a); + #else + return simde_vcombine_u8(r, simde_vmovn_u16(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_u16 + #define vmovn_high_u16(r, a) simde_vmovn_high_u16((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_u32(r, a); + #else + return simde_vcombine_u16(r, simde_vmovn_u32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_u32 + #define vmovn_high_u32(r, a) simde_vmovn_high_u32((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_u64(r, a); + #else + return simde_vcombine_u32(r, simde_vmovn_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_u64 + #define vmovn_high_u64(r, a) simde_vmovn_high_u64((r), (a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mul.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mul.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mul.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mul.h 2021-04-17 01:19:49.000000000 +0000 @@ -567,9 +567,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_x_vmulq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && 0 - return vmulq_u32(a, b); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) + #if defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i64x2_mul(a, b); #else simde_uint64x2_private @@ -589,10 +587,6 @@ return simde_uint64x2_from_private(r_); #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) - #undef vmulq_u32 - #define vmulq_u32(a, b) simde_vmulq_u32((a), (b)) -#endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mul_lane.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mul_lane.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mul_lane.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mul_lane.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,472 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MUL_LANE_H) +#define SIMDE_ARM_NEON_MUL_LANE_H + +#include "types.h" +#include "mul.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmul_lane_f32(simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_f32(a, b, lane) vmul_lane_f32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_f32 + #define vmul_lane_f32(a, b, lane) simde_vmul_lane_f32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmul_lane_f64(simde_float64x1_t a, simde_float64x1_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmul_lane_f64(a, b, lane) vmul_lane_f64((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_f64 + #define vmul_lane_f64(a, b, lane) simde_vmul_lane_f64((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmul_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_s16(a, b, lane) vmul_lane_s16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_s16 + #define vmul_lane_s16(a, b, lane) simde_vmul_lane_s16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmul_lane_s32(simde_int32x2_t a, simde_int32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_s32(a, b, lane) vmul_lane_s32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_s32 + #define vmul_lane_s32(a, b, lane) simde_vmul_lane_s32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmul_lane_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_u16(a, b, lane) vmul_lane_u16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_u16 + #define vmul_lane_u16(a, b, lane) simde_vmul_lane_u16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmul_lane_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_u32(a, b, lane) vmul_lane_u32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_u32 + #define vmul_lane_u32(a, b, lane) simde_vmul_lane_u32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmulq_lane_f32(simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + simde_float32x2_private b_ = simde_float32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_f32(a, b, lane) vmulq_lane_f32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_f32 + #define vmulq_lane_f32(a, b, lane) simde_vmulq_lane_f32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmulq_lane_f64(simde_float64x2_t a, simde_float64x1_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + simde_float64x1_private b_ = simde_float64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_lane_f64(a, b, lane) vmulq_lane_f64((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_f64 + #define vmulq_lane_f64(a, b, lane) simde_vmulq_lane_f64((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmulq_lane_s16(simde_int16x8_t a, simde_int16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_s16(a, b, lane) vmulq_lane_s16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_s16 + #define vmulq_lane_s16(a, b, lane) simde_vmulq_lane_s16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmulq_lane_s32(simde_int32x4_t a, simde_int32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_s32(a, b, lane) vmulq_lane_s32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_s32 + #define vmulq_lane_s32(a, b, lane) simde_vmulq_lane_s32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmulq_lane_u16(simde_uint16x8_t a, simde_uint16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + simde_uint16x4_private b_ = simde_uint16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_u16(a, b, lane) vmulq_lane_u16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_u16 + #define vmulq_lane_u16(a, b, lane) simde_vmulq_lane_u16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmulq_lane_u32(simde_uint32x4_t a, simde_uint32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + simde_uint32x2_private b_ = simde_uint32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_u32(a, b, lane) vmulq_lane_u32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_u32 + #define vmulq_lane_u32(a, b, lane) simde_vmulq_lane_u32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmulq_laneq_f32(simde_float32x4_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_f32(a, b, lane) vmulq_laneq_f32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_f32 + #define vmulq_laneq_f32(a, b, lane) simde_vmulq_laneq_f32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmulq_laneq_f64(simde_float64x2_t a, simde_float64x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_f64(a, b, lane) vmulq_laneq_f64((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_f64 + #define vmulq_laneq_f64(a, b, lane) simde_vmulq_laneq_f64((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmulq_laneq_s16(simde_int16x8_t a, simde_int16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_s16(a, b, lane) vmulq_laneq_s16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_s16 + #define vmulq_laneq_s16(a, b, lane) simde_vmulq_laneq_s16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmulq_laneq_s32(simde_int32x4_t a, simde_int32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_s32(a, b, lane) vmulq_laneq_s32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_s32 + #define vmulq_laneq_s32(a, b, lane) simde_vmulq_laneq_s32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmulq_laneq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_u16(a, b, lane) vmulq_laneq_u16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_u16 + #define vmulq_laneq_u16(a, b, lane) simde_vmulq_laneq_u16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmulq_laneq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_u32(a, b, lane) vmulq_laneq_u32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_u32 + #define vmulq_laneq_u32(a, b, lane) simde_vmulq_laneq_u32((a), (b), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MUL_LANE_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mvn.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mvn.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/mvn.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/mvn.h 2021-04-17 01:19:49.000000000 +0000 @@ -41,6 +41,8 @@ simde_vmvnq_s8(simde_int8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_s8(a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi8(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) @@ -74,6 +76,8 @@ simde_vmvnq_s16(simde_int16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_s16(a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi16(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) @@ -107,6 +111,8 @@ simde_vmvnq_s32(simde_int32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_s32(a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi32(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) @@ -140,6 +146,8 @@ simde_vmvnq_u8(simde_uint8x16_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_u8(a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi8(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) @@ -173,6 +181,8 @@ simde_vmvnq_u16(simde_uint16x8_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_u16(a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi16(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) @@ -206,6 +216,8 @@ simde_vmvnq_u32(simde_uint32x4_t a) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vmvnq_u32(a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, _mm_cmpeq_epi32(a, a)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/orn.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/orn.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/orn.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/orn.h 2021-04-17 01:19:49.000000000 +0000 @@ -258,6 +258,8 @@ return vornq_s8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_int8x16_private a_ = simde_int8x16_to_private(a), @@ -287,6 +289,8 @@ return vornq_s16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_int16x8_private a_ = simde_int16x8_to_private(a), @@ -316,6 +320,8 @@ return vornq_s32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_int32x4_private a_ = simde_int32x4_to_private(a), @@ -345,6 +351,8 @@ return vornq_s64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi64(a, b, a, 0xf3); #else simde_int64x2_private a_ = simde_int64x2_to_private(a), @@ -374,6 +382,8 @@ return vornq_u8(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_uint8x16_private a_ = simde_uint8x16_to_private(a), @@ -403,6 +413,8 @@ return vornq_u16(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_uint16x8_private a_ = simde_uint16x8_to_private(a), @@ -432,6 +444,8 @@ return vornq_u32(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, a, 0xf3); #else simde_uint32x4_private a_ = simde_uint32x4_to_private(a), @@ -461,6 +475,8 @@ return vornq_u64(a, b); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) return vec_orc(a, b); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi64(a, b, a, 0xf3); #else simde_uint64x2_private a_ = simde_uint64x2_to_private(a), diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/qshl.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/qshl.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/qshl.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/qshl.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,732 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_QSHL_H) +#define SIMDE_ARM_NEON_QSHL_H + +#include "types.h" +#include "cls.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqshlb_s8(int8_t a, int8_t b) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqshlb_s8(a, b); + #else + if (b < -7) + b = -7; + + if (b <= 0) { + r = a >> -b; + } else if (b < 7) { + r = HEDLEY_STATIC_CAST(int8_t, a << b); + if ((r >> b) != a) { + r = (a < 0) ? INT8_MIN : INT8_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT8_MIN : INT8_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlb_s8 + #define vqshlb_s8(a, b) simde_vqshlb_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqshlh_s16(int16_t a, int16_t b) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqshlh_s16(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 < -15) + b8 = -15; + + if (b8 <= 0) { + r = a >> -b8; + } else if (b8 < 15) { + r = HEDLEY_STATIC_CAST(int16_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT16_MIN : INT16_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT16_MIN : INT16_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlh_s16 + #define vqshlh_s16(a, b) simde_vqshlh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqshls_s32(int32_t a, int32_t b) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqshls_s32(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 < -31) + b8 = -31; + + if (b8 <= 0) { + r = a >> -b8; + } else if (b8 < 31) { + r = HEDLEY_STATIC_CAST(int32_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT32_MIN : INT32_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT32_MIN : INT32_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshls_s32 + #define vqshls_s32(a, b) simde_vqshls_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqshld_s64(int64_t a, int64_t b) { + int64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqshld_s64(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 < -63) + b8 = -63; + + if (b8 <= 0) { + r = a >> -b8; + } else if (b8 < 63) { + r = HEDLEY_STATIC_CAST(int64_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT64_MIN : INT64_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT64_MIN : INT64_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshld_s64 + #define vqshld_s64(a, b) simde_vqshld_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqshlb_u8(uint8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqshlb_u8(a, HEDLEY_STATIC_CAST(uint8_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + /* https://github.com/llvm/llvm-project/commit/f0a78bdfdc6d56b25e0081884580b3960a3c2429 */ + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqshlb_u8(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqshlb_u8(a, b); + #endif + #else + uint8_t r; + + if (b < -7) + b = -7; + + if (b <= 0) { + r = a >> -b; + } else if (b < 7) { + r = HEDLEY_STATIC_CAST(uint8_t, a << b); + if ((r >> b) != a) { + r = UINT8_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT8_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlb_u8 + #define vqshlb_u8(a, b) simde_vqshlb_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vqshlh_u16(uint16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqshlh_u16(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqshlh_u16(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqshlh_u16(a, b); + #endif + #else + uint16_t r; + + if (b < -15) + b = -15; + + if (b <= 0) { + r = a >> -b; + } else if (b < 15) { + r = HEDLEY_STATIC_CAST(uint16_t, a << b); + if ((r >> b) != a) { + r = UINT16_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT16_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlh_u16 + #define vqshlh_u16(a, b) simde_vqshlh_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqshls_u32(uint32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqshls_u32(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqshls_u32(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqshls_u32(a, b); + #endif + #else + uint32_t r; + + if (b < -31) + b = -31; + + if (b <= 0) { + r = HEDLEY_STATIC_CAST(uint32_t, a >> -b); + } else if (b < 31) { + r = a << b; + if ((r >> b) != a) { + r = UINT32_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT32_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshls_u32 + #define vqshls_u32(a, b) simde_vqshls_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vqshld_u64(uint64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqshld_u64(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqshld_u64(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqshld_u64(a, b); + #endif + #else + uint64_t r; + + if (b < -63) + b = -63; + + if (b <= 0) { + r = a >> -b; + } else if (b < 63) { + r = HEDLEY_STATIC_CAST(uint64_t, a << b); + if ((r >> b) != a) { + r = UINT64_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT64_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshldb_u64 + #define vqshld_u64(a, b) simde_vqshld_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_s8 + #define vqshl_s8(a, b) simde_vqshl_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_s16 + #define vqshl_s16(a, b) simde_vqshl_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_s32 + #define vqshl_s32(a, b) simde_vqshl_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vqshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_s64 + #define vqshl_s64(a, b) simde_vqshl_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + simde_int8x8_private + b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlb_u8(a_.values[i], b_.values[i]); + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_u8 + #define vqshl_u8(a, b) simde_vqshl_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + simde_int16x4_private + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlh_u16(a_.values[i], b_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_u16 + #define vqshl_u16(a, b) simde_vqshl_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + simde_int32x2_private + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_u32(a_.values[i], b_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_u32 + #define vqshl_u32(a, b) simde_vqshl_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vqshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + simde_int64x1_private + b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_u64(a_.values[i], b_.values[i]); + } + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_u64 + #define vqshl_u64(a, b) simde_vqshl_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_s8 + #define vqshlq_s8(a, b) simde_vqshlq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_s16 + #define vqshlq_s16(a, b) simde_vqshlq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_s32 + #define vqshlq_s32(a, b) simde_vqshlq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_s64 + #define vqshlq_s64(a, b) simde_vqshlq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + simde_int8x16_private + b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlb_u8(a_.values[i], b_.values[i]); + } + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_u8 + #define vqshlq_u8(a, b) simde_vqshlq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + simde_int16x8_private + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlh_u16(a_.values[i], b_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_u16 + #define vqshlq_u16(a, b) simde_vqshlq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + simde_int32x4_private + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_u32(a_.values[i], b_.values[i]); + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_u32 + #define vqshlq_u32(a, b) simde_vqshlq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vqshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + simde_int64x2_private + b_ = simde_int64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_u64(a_.values[i], b_.values[i]); + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_u64 + #define vqshlq_u64(a, b) simde_vqshlq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QSHL_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/rnd.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/rnd.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/rnd.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/rnd.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_RND_H) +#define SIMDE_ARM_NEON_RND_H + +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrnd_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrnd_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_truncf(a_.values[i]); + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrnd_f32 + #define vrnd_f32(a) simde_vrnd_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrnd_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrnd_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_trunc(a_.values[i]); + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrnd_f64 + #define vrnd_f64(a) simde_vrnd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrndq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrndq_f32(a); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_round_ps(a, _MM_FROUND_TO_ZERO); + #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_trunc_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_trunc(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_truncf(a_.values[i]); + } + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrndq_f32 + #define vrndq_f32(a) simde_vrndq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrndq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrndq_f64(a); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_round_pd(a, _MM_FROUND_TO_ZERO); + #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_trunc_pd(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_trunc(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_trunc(a_.values[i]); + } + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrndq_f64 + #define vrndq_f64(a) simde_vrndq_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RND_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/shl_n.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/shl_n.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/shl_n.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/shl_n.h 2021-04-17 01:19:49.000000000 +0000 @@ -272,7 +272,10 @@ simde_int8x16_t simde_vshlq_n_s8 (const simde_int8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_X86_SSE2_NATIVE) + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ + return _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(INT64_C(0x0102040810204080) >> (n * 8)), 0); + #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << n) - 1)), _mm_slli_epi64(a, n)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_shl(a, n); @@ -412,7 +415,10 @@ simde_uint8x16_t simde_vshlq_n_u8 (const simde_uint8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { - #if defined(SIMDE_X86_SSE2_NATIVE) + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ + return _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(INT64_C(0x0102040810204080) >> (n * 8)), 0); + #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << n) - 1)), _mm_slli_epi64((a), (n))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_i8x16_shl((a), (n)); diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/shr_n.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/shr_n.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/shr_n.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/shr_n.h 2021-04-17 01:19:49.000000000 +0000 @@ -291,7 +291,12 @@ simde_int8x16_t simde_vshrq_n_s8 (const simde_int8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ + const int shift = (n <= 7) ? n : 7; + const uint64_t matrix = (UINT64_C(0x8182848890A0C000) << (shift * 8)) ^ UINT64_C(0x8080808080808080); + return _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, matrix)), 0); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_epi8(_mm_srai_epi16((a), (n)), _mm_srai_epi16(_mm_slli_epi16((a), 8), 8 + (n)), @@ -442,7 +447,10 @@ simde_uint8x16_t simde_vshrq_n_u8 (const simde_uint8x16_t a, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { - #if defined(SIMDE_X86_SSE2_NATIVE) + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ + return (n > 7) ? _mm_setzero_si128() : _mm_gf2p8affine_epi64_epi8(a, _mm_set1_epi64x(INT64_C(0x0102040810204080) << (n * 8)), 0); + #elif defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(_mm_srli_epi64((a), (n)), _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (8 - (n))) - 1))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return (((n) == 8) ? wasm_i8x16_splat(0) : wasm_u8x16_shr((a), (n))); diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/subw.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/subw.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/subw.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/subw.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_SUBW_H) +#define SIMDE_ARM_NEON_SUBW_H + +#include "types.h" +#include "sub.h" +#include "movl.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vsubw_s8(simde_int16x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s16(a, simde_vmovl_s8(b)); + #else + simde_int16x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_int8x8_private b_ = simde_int8x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_s8 + #define vsubw_s8(a, b) simde_vsubw_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vsubw_s16(simde_int32x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s32(a, simde_vmovl_s16(b)); + #else + simde_int32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_s16 + #define vsubw_s16(a, b) simde_vsubw_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vsubw_s32(simde_int64x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s64(a, simde_vmovl_s32(b)); + #else + simde_int64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_s32 + #define vsubw_s32(a, b) simde_vsubw_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsubw_u8(simde_uint16x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u16(a, simde_vmovl_u8(b)); + #else + simde_uint16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_uint8x8_private b_ = simde_uint8x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_u8 + #define vsubw_u8(a, b) simde_vsubw_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsubw_u16(simde_uint32x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u32(a, simde_vmovl_u16(b)); + #else + simde_uint32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_uint16x4_private b_ = simde_uint16x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_u16 + #define vsubw_u16(a, b) simde_vsubw_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsubw_u32(simde_uint64x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u64(a, simde_vmovl_u32(b)); + #else + simde_uint64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_uint32x2_private b_ = simde_uint32x2_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_u32 + #define vsubw_u32(a, b) simde_vsubw_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SUBW_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/subw_high.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/subw_high.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/subw_high.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/subw_high.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_SUBW_HIGH_H) +#define SIMDE_ARM_NEON_SUBW_HIGH_H + +#include "types.h" +#include "movl.h" +#include "sub.h" +#include "get_high.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vsubw_high_s8(simde_int16x8_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s16(a, simde_vmovl_s8(simde_vget_high_s8(b))); + #else + simde_int16x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_s8 + #define vsubw_high_s8(a, b) simde_vsubw_high_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vsubw_high_s16(simde_int32x4_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s32(a, simde_vmovl_s16(simde_vget_high_s16(b))); + #else + simde_int32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_int16x8_private b_ = simde_int16x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_s16 + #define vsubw_high_s16(a, b) simde_vsubw_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vsubw_high_s32(simde_int64x2_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s64(a, simde_vmovl_s32(simde_vget_high_s32(b))); + #else + simde_int64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_int32x4_private b_ = simde_int32x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_s32 + #define vsubw_high_s32(a, b) simde_vsubw_high_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsubw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u16(a, simde_vmovl_u8(simde_vget_high_u8(b))); + #else + simde_uint16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_uint8x16_private b_ = simde_uint8x16_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_u8 + #define vsubw_high_u8(a, b) simde_vsubw_high_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsubw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u32(a, simde_vmovl_u16(simde_vget_high_u16(b))); + #else + simde_uint32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_uint16x8_private b_ = simde_uint16x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_u16 + #define vsubw_high_u16(a, b) simde_vsubw_high_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsubw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u64(a, simde_vmovl_u32(simde_vget_high_u32(b))); + #else + simde_uint64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_uint32x4_private b_ = simde_uint32x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_u32 + #define vsubw_high_u32(a, b) simde_vsubw_high_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SUBW_HIGH_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/uqadd.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/uqadd.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon/uqadd.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon/uqadd.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,309 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_UQADD_H) +#define SIMDE_ARM_NEON_UQADD_H + +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vuqaddb_s8(int8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddb_s8(a, b); + #else + int16_t r_ = HEDLEY_STATIC_CAST(int16_t, a) + HEDLEY_STATIC_CAST(int16_t, b); + return (r_ < INT8_MIN) ? INT8_MIN : ((r_ > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddb_s8 + #define vuqaddb_s8(a, b) simde_vuqaddb_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vuqaddh_s16(int16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddh_s16(a, b); + #else + int32_t r_ = HEDLEY_STATIC_CAST(int32_t, a) + HEDLEY_STATIC_CAST(int32_t, b); + return (r_ < INT16_MIN) ? INT16_MIN : ((r_ > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddh_s16 + #define vuqaddh_s16(a, b) simde_vuqaddh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vuqadds_s32(int32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadds_s32(a, b); + #else + int64_t r_ = HEDLEY_STATIC_CAST(int64_t, a) + HEDLEY_STATIC_CAST(int64_t, b); + return (r_ < INT32_MIN) ? INT32_MIN : ((r_ > INT32_MAX) ? INT32_MAX : HEDLEY_STATIC_CAST(int32_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadds_s32 + #define vuqadds_s32(a, b) simde_vuqadds_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vuqaddd_s64(int64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddd_s64(a, b); + #else + /* TODO: I suspect there is room for improvement here. This is + * just the first thing that worked, and I don't feel like messing + * with it now. */ + int64_t r; + + if (a < 0) { + uint64_t na = HEDLEY_STATIC_CAST(uint64_t, -a); + if (na > b) { + uint64_t t = na - b; + r = (t > (HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) + 1)) ? INT64_MIN : -HEDLEY_STATIC_CAST(int64_t, t); + } else { + uint64_t t = b - na; + r = (t > (HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) )) ? INT64_MAX : HEDLEY_STATIC_CAST(int64_t, t); + } + } else { + uint64_t ua = HEDLEY_STATIC_CAST(uint64_t, a); + r = ((INT64_MAX - ua) < b) ? INT64_MAX : HEDLEY_STATIC_CAST(int64_t, ua + b); + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddd_s64 + #define vuqaddd_s64(a, b) simde_vuqaddd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vuqadd_s8(simde_int8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadd_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + simde_uint8x8_private b_ = simde_uint8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadd_s8 + #define vuqadd_s8(a, b) simde_vuqadd_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vuqadd_s16(simde_int16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadd_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + simde_uint16x4_private b_ = simde_uint16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadd_s16 + #define vuqadd_s16(a, b) simde_vuqadd_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vuqadd_s32(simde_int32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadd_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + simde_uint32x2_private b_ = simde_uint32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqadds_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadd_s32 + #define vuqadd_s32(a, b) simde_vuqadd_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vuqadd_s64(simde_int64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadd_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + simde_uint64x1_private b_ = simde_uint64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddd_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadd_s64 + #define vuqadd_s64(a, b) simde_vuqadd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vuqaddq_s8(simde_int8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddq_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + simde_uint8x16_private b_ = simde_uint8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddq_s8 + #define vuqaddq_s8(a, b) simde_vuqaddq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vuqaddq_s16(simde_int16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + simde_uint16x8_private b_ = simde_uint16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddq_s16 + #define vuqaddq_s16(a, b) simde_vuqaddq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vuqaddq_s32(simde_int32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + simde_uint32x4_private b_ = simde_uint32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqadds_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddq_s32 + #define vuqaddq_s32(a, b) simde_vuqaddq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vuqaddq_s64(simde_int64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddq_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + simde_uint64x2_private b_ = simde_uint64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddd_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddq_s64 + #define vuqaddq_s64(a, b) simde_vuqaddq_s64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_UQADD_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/arm/neon.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/arm/neon.h 2021-04-17 01:19:49.000000000 +0000 @@ -39,6 +39,7 @@ #include "neon/addl_high.h" #include "neon/addv.h" #include "neon/addw.h" +#include "neon/addw_high.h" #include "neon/and.h" #include "neon/bic.h" #include "neon/bsl.h" @@ -51,6 +52,7 @@ #include "neon/cgtz.h" #include "neon/cle.h" #include "neon/clez.h" +#include "neon/cls.h" #include "neon/clt.h" #include "neon/cltz.h" #include "neon/clz.h" @@ -82,13 +84,17 @@ #include "neon/mla_n.h" #include "neon/mlal.h" #include "neon/mlal_high.h" +#include "neon/mlal_n.h" #include "neon/mls.h" #include "neon/mlsl.h" #include "neon/mlsl_high.h" +#include "neon/mlsl_n.h" #include "neon/movl.h" #include "neon/movl_high.h" #include "neon/movn.h" +#include "neon/movn_high.h" #include "neon/mul.h" +#include "neon/mul_lane.h" #include "neon/mul_n.h" #include "neon/mull.h" #include "neon/mull_high.h" @@ -113,6 +119,7 @@ #include "neon/qmovn_high.h" #include "neon/qneg.h" #include "neon/qsub.h" +#include "neon/qshl.h" #include "neon/qtbl.h" #include "neon/qtbx.h" #include "neon/rbit.h" @@ -121,6 +128,7 @@ #include "neon/rev32.h" #include "neon/rev64.h" #include "neon/rhadd.h" +#include "neon/rnd.h" #include "neon/rshl.h" #include "neon/rshr_n.h" #include "neon/rsra_n.h" @@ -135,12 +143,15 @@ #include "neon/st4.h" #include "neon/sub.h" #include "neon/subl.h" +#include "neon/subw.h" +#include "neon/subw_high.h" #include "neon/tbl.h" #include "neon/tbx.h" #include "neon/trn.h" #include "neon/trn1.h" #include "neon/trn2.h" #include "neon/tst.h" +#include "neon/uqadd.h" #include "neon/uzp.h" #include "neon/uzp1.h" #include "neon/uzp2.h" diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/COPYING lightzone-4.2.3/lightcrafts/jnisrc/include/simde/COPYING --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/COPYING 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/COPYING 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,20 @@ +Copyright (c) 2017 Evan Nemerson + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/README.md lightzone-4.2.3/lightcrafts/jnisrc/include/simde/README.md --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/README.md 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/README.md 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,10 @@ +# SIMDe Without Test Cases + +This repository contains only the core of +[SIMDe](https://github.com/simd-everywhere/simde/simde). +It is generated automatically for every commit to master, and is +intended to be used as a submodule in projects which don't want to +include the (rather large) test cases. + +All development work happens in the main repository, please do not +file issues or create pull requests against this repository. diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-align.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-align.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-align.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-align.h 2021-04-17 01:19:49.000000000 +0000 @@ -121,6 +121,7 @@ HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ @@ -153,6 +154,10 @@ * of two. MSVC is the exception (of course), so we need to cap the * alignment requests at values that the implementation supports. * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * * If the compiler accepts any power-of-two value within reason then * this macro should be left undefined, and the SIMDE_ALIGN_CAP * macro will just return the value passed to it. */ @@ -171,6 +176,8 @@ #elif defined(_M_ARM) || defined(_M_ARM64) #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 #endif #endif @@ -436,7 +443,3 @@ #define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) #endif /* !defined(SIMDE_ALIGN_H) */ - -#if defined(SIMDE_TESTING) -SIMDE_ALIGN_ASSUME_LIKE(Foo, bar) -#endif diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-arch.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-arch.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-arch.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-arch.h 2021-04-17 01:19:49.000000000 +0000 @@ -278,6 +278,9 @@ # if defined(__AVX512VP2INTERSECT__) # define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 # endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif # if defined(__AVX512BW__) # define SIMDE_ARCH_X86_AVX512BW 1 # endif @@ -296,6 +299,12 @@ # if defined(__GFNI__) # define SIMDE_ARCH_X86_GFNI 1 # endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif #endif /* Itanium diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-common.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-common.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-common.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-common.h 2021-04-17 01:19:49.000000000 +0000 @@ -30,7 +30,7 @@ #include "hedley.h" #define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 5 +#define SIMDE_VERSION_MINOR 7 #define SIMDE_VERSION_MICRO 0 #define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) @@ -97,10 +97,17 @@ #endif /* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but on ARMv7 (for + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for * example) doesn't support it and it must be emulated (which is rather * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. */ + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ #if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) #define SIMDE_FAST_ROUND_TIES #endif @@ -770,23 +777,14 @@ # define SIMDE_BUG_GCC_REV_247851 # endif # if !HEDLEY_GCC_VERSION_CHECK(10,0,0) -# define SIMDE_BUG_GCC_REV_274313 # define SIMDE_BUG_GCC_91341 # endif -# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) -# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -# endif # if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) # define SIMDE_BUG_GCC_94482 # endif # if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_SYSTEMZ) # define SIMDE_BUG_GCC_53784 # endif -# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ -# define SIMDE_BUG_GCC_95144 -# endif -# endif # if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_94488 # endif @@ -806,6 +804,7 @@ # if defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_CLANG_45541 # define SIMDE_BUG_CLANG_46844 +# define SIMDE_BUG_CLANG_48257 # if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) # define SIMDE_BUG_CLANG_BAD_VI64_OPS # endif @@ -813,23 +812,17 @@ # if defined(SIMDE_ARCH_POWER) # define SIMDE_BUG_CLANG_46770 # endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif # if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) -# define SIMDE_BUG_CLANG_45931 +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 # endif # endif -# define SIMDE_BUG_CLANG_45959 -# elif defined(HEDLEY_MSVC_VERSION) -# if defined(SIMDE_ARCH_X86) -# define SIMDE_BUG_MSVC_ROUND_EXTRACT -# endif # elif defined(HEDLEY_INTEL_VERSION) # define SIMDE_BUG_INTEL_857088 # endif -# if defined(HEDLEY_EMSCRIPTEN_VERSION) -# define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */ -# define SIMDE_BUG_EMSCRIPTEN_5242 -# endif #endif /* GCC and Clang both have the same issue: diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-detect-clang.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-detect-clang.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-detect-clang.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-detect-clang.h 2021-04-17 01:19:49.000000000 +0000 @@ -57,7 +57,9 @@ * anything we can detect. */ #if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -# if __has_warning("-Wimplicit-const-int-float-conversion") +# if __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") # define SIMDE_DETECT_CLANG_VERSION 110000 # elif __has_warning("-Wmisleading-indentation") # define SIMDE_DETECT_CLANG_VERSION 100000 diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-diagnostic.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-diagnostic.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-diagnostic.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-diagnostic.h 2021-04-17 01:19:49.000000000 +0000 @@ -370,6 +370,13 @@ #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ #endif +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-features.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-features.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-features.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-features.h 2021-04-17 01:19:49.000000000 +0000 @@ -52,6 +52,15 @@ #define SIMDE_X86_AVX512F_NATIVE #endif +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512CD) #define SIMDE_X86_AVX512CD_NATIVE @@ -187,6 +196,18 @@ #endif #endif +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + #if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(__INTEL_COMPILER) #define SIMDE_X86_SVML_NATIVE @@ -199,9 +220,7 @@ #endif #if \ - defined(SIMDE_X86_AVX_NATIVE) || \ - defined(SIMDE_X86_GFNI_NATIVE) || \ - defined(SIMDE_X86_SVML_NATIVE) + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) #include #elif defined(SIMDE_X86_SSE4_2_NATIVE) #include @@ -233,7 +252,7 @@ #endif #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) - #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80) && (__ARM_NEON_FP & 0x02) #define SIMDE_ARM_NEON_A32V8_NATIVE #endif #endif @@ -442,6 +461,12 @@ #if !defined(SIMDE_X86_GFNI_NATIVE) #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-math.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-math.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/simde-math.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/simde-math.h 2021-04-17 01:19:49.000000000 +0000 @@ -449,13 +449,13 @@ #endif #endif -#if !defined(simde_math_absf) - #if SIMDE_MATH_BUILTIN_LIBM(absf) - #define simde_math_absf(v) __builtin_absf(v) +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) - #define simde_math_absf(v) std::abs(v) + #define simde_math_fabsf(v) std::abs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) - #define simde_math_absf(v) absf(v) + #define simde_math_fabsf(v) fabsf(v) #endif #endif @@ -1017,6 +1017,26 @@ #endif #endif +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + #if !defined(simde_math_nearbyint) #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) #define simde_math_nearbyint(v) __builtin_nearbyint(v) @@ -1097,6 +1117,46 @@ #endif #endif +#if !defined(simde_math_roundeven) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_roundeven) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_roundevenf) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + #if !defined(simde_math_sin) #if SIMDE_MATH_BUILTIN_LIBM(sin) #define simde_math_sin(v) __builtin_sin(v) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx2.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx2.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx2.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx2.h 2021-04-17 01:19:49.000000000 +0000 @@ -22,7 +22,7 @@ * * Copyright: * 2018-2020 Evan Nemerson - * 2019 Michael R. Crusoe + * 2019-2020 Michael R. Crusoe * 2020 Himanshi Mathur * 2020 Hidayat Khan */ @@ -46,10 +46,15 @@ r_, a_ = simde__m256i_to_private(a); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif return simde__m256i_from_private(r_); #endif @@ -69,10 +74,15 @@ r_, a_ = simde__m256i_to_private(a); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif return simde__m256i_from_private(r_); #endif @@ -92,10 +102,15 @@ r_, a_ = simde__m256i_to_private(a); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif return simde__m256i_from_private(r_); #endif @@ -238,9 +253,8 @@ b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_add_epi64(a_.m128i[i], b_.m128i[i]); - } + r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) r_.i64 = a_.i64 + b_.i64; #else @@ -341,9 +355,9 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.m128i_private[0] = simde__m128i_to_private(simde_mm_andnot_si128(simde__m128i_from_private(a_.m128i_private[0]), simde__m128i_from_private(b_.m128i_private[0]))); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_andnot_si128(simde__m128i_from_private(a_.m128i_private[1]), simde__m128i_from_private(b_.m128i_private[1]))); + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { @@ -370,11 +384,9 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_adds_epi8(a_.m128i[i], b_.m128i[i]); - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { @@ -401,11 +413,9 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_adds_epi16(a_.m128i[i], b_.m128i[i]); - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { @@ -447,9 +457,8 @@ b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_adds_epu8(a_.m128i[i], b_.m128i[i]); - } + r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { @@ -642,8 +651,8 @@ mask_ = simde__m256i_to_private(mask); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - r_.m128i_private[0] = simde__m128i_to_private(simde_mm_blendv_epi8(simde__m128i_from_private(a_.m128i_private[0]), simde__m128i_from_private(b_.m128i_private[0]), simde__m128i_from_private(mask_.m128i_private[0]))); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_blendv_epi8(simde__m128i_from_private(a_.m128i_private[1]), simde__m128i_from_private(b_.m128i_private[1]), simde__m128i_from_private(mask_.m128i_private[1]))); + r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); + r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { @@ -1065,10 +1074,15 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif return simde__m256i_from_private(r_); #endif @@ -1089,7 +1103,7 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); #else @@ -1118,7 +1132,7 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); #else @@ -2976,7 +2990,7 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); #else @@ -3005,7 +3019,7 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); #else @@ -3034,7 +3048,7 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); #else @@ -3372,9 +3386,8 @@ b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_mul_epi32(a_.m128i[i], b_.m128i[i]); - } + r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { @@ -3403,9 +3416,8 @@ b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_mul_epu32(a_.m128i[i], b_.m128i[i]); - } + r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { @@ -3635,9 +3647,9 @@ simde__m256i_to_private(b) }; - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.m128i_private[0] = simde__m128i_to_private(simde_mm_packs_epi32(simde__m128i_from_private(v_[0].m128i_private[0]), simde__m128i_from_private(v_[1].m128i_private[0]))); - r_.m128i_private[1] = simde__m128i_to_private(simde_mm_packs_epi32(simde__m128i_from_private(v_[0].m128i_private[1]), simde__m128i_from_private(v_[1].m128i_private[1]))); + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); + r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { @@ -4392,7 +4404,7 @@ b_ = simde__m256i_to_private(b), r_; - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) @@ -4452,7 +4464,7 @@ b_ = simde__m256i_to_private(b), r_; - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) @@ -5229,6 +5241,9 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 - b_.u32; + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { @@ -5250,11 +5265,9 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_subs_epi8(a_.m128i[i], b_.m128i[i]); - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { @@ -5281,11 +5294,9 @@ a_ = simde__m256i_to_private(a), b_ = simde__m256i_to_private(b); - #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_subs_epi16(a_.m128i[i], b_.m128i[i]); - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/kshift.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/kshift.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/kshift.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/kshift.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_KSHIFT_H) +#define SIMDE_X86_AVX512_KSHIFT_H + +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_kshiftli_mask16 (simde__mmask16 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a << count) : 0); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_kshiftli_mask16(a, count) _kshiftli_mask16(a, count) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask16 + #define _kshiftli_mask16(a, count) simde_kshiftli_mask16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_kshiftli_mask32 (simde__mmask32 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 31) ? (a << count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_kshiftli_mask32(a, count) _kshiftli_mask32(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask32 + #define _kshiftli_mask32(a, count) simde_kshiftli_mask32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_kshiftli_mask64 (simde__mmask64 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 63) ? (a << count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_kshiftli_mask64(a, count) _kshiftli_mask64(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask64 + #define _kshiftli_mask64(a, count) simde_kshiftli_mask64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_kshiftli_mask8 (simde__mmask8 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a << count) : 0); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_kshiftli_mask8(a, count) _kshiftli_mask8(a, count) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask8 + #define _kshiftli_mask8(a, count) simde_kshiftli_mask8(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_kshiftri_mask16 (simde__mmask16 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a >> count) : 0); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_kshiftri_mask16(a, count) _kshiftri_mask16(a, count) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask16 + #define _kshiftri_mask16(a, count) simde_kshiftri_mask16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_kshiftri_mask32 (simde__mmask32 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 31) ? (a >> count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_kshiftri_mask32(a, count) _kshiftri_mask32(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask32 + #define _kshiftri_mask32(a, count) simde_kshiftri_mask32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_kshiftri_mask64 (simde__mmask64 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 63) ? (a >> count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_kshiftri_mask64(a, count) _kshiftri_mask64(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask64 + #define _kshiftri_mask64(a, count) simde_kshiftri_mask64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_kshiftri_mask8 (simde__mmask8 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a >> count) : 0); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_kshiftri_mask8(a, count) _kshiftri_mask8(a, count) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask8 + #define _kshiftri_mask8(a, count) simde_kshiftri_mask8(a, count) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_KSHIFT_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/maddubs.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/maddubs.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/maddubs.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/maddubs.h 2021-04-17 01:19:49.000000000 +0000 @@ -108,9 +108,12 @@ r_.m256i[i] = simde_mm256_maddubs_epi16(a_.m256i[i], b_.m256i[i]); } #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_maddubs_epi16(a_.m256i[i], b_.m256i[i]); + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; } #endif diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/permutex2var.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/permutex2var.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/permutex2var.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/permutex2var.h 2021-04-17 01:19:49.000000000 +0000 @@ -29,17 +29,1105 @@ #define SIMDE_X86_AVX512_PERMUTEX2VAR_H #include "types.h" +#include "and.h" +#include "andnot.h" +#include "blend.h" #include "mov.h" +#include "or.h" +#include "set1.h" +#include "slli.h" +#include "srli.h" +#include "test.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +/* The following generic code avoids many, nearly identical, repetitions of fairly complex code. + * If the compiler optimizes well, in particular extracting invariant code from loops + * and simplifying code involving constants passed as arguments, it should not be + * significantly slower than specific code. + * Note that when the original vector contains few elements, these implementations + * may not be faster than portable code. + */ +#if defined(SIMDE_X86_SSSE3_NATIVE) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_X_PERMUTEX2VAR_USE_GENERIC +#endif + +#if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_permutex2var128 (const simde__m128i *a, const simde__m128i idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { + const int idx_mask = (1 << (5 - log2_index_size + log2_data_length)) - 1; + + #if defined(SIMDE_X86_SSE3_NATIVE) + __m128i ra, rb, t, test, select, index; + const __m128i sixteen = _mm_set1_epi8(16); + + /* Avoid the mullo intrinsics which have high latency (and the 32-bit one requires SSE4.1) */ + switch (log2_index_size) { + default: /* Avoid uninitialized variable warning/error */ + case 0: + index = _mm_and_si128(idx, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, idx_mask))); + break; + case 1: + index = _mm_and_si128(idx, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, idx_mask))); + index = _mm_slli_epi32(index, 1); + t = _mm_slli_epi32(index, 8); + index = _mm_or_si128(index, t); + index = _mm_add_epi16(index, _mm_set1_epi16(0x0100)); + break; + case 2: + index = _mm_and_si128(idx, _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, idx_mask))); + index = _mm_slli_epi32(index, 2); + t = _mm_slli_epi32(index, 8); + index = _mm_or_si128(index, t); + t = _mm_slli_epi32(index, 16); + index = _mm_or_si128(index, t); + index = _mm_add_epi32(index, _mm_set1_epi32(0x03020100)); + break; + } + + test = index; + index = _mm_and_si128(index, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (4 + log2_data_length)) - 1))); + test = _mm_cmpgt_epi8(test, index); + + ra = _mm_shuffle_epi8(a[0], index); + rb = _mm_shuffle_epi8(b[0], index); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + select = _mm_cmplt_epi8(index, sixteen); + index = _mm_sub_epi8(index, sixteen); + ra = _mm_blendv_epi8(_mm_shuffle_epi8(a[i], index), ra, select); + rb = _mm_blendv_epi8(_mm_shuffle_epi8(b[i], index), rb, select); + } + + return _mm_blendv_epi8(ra, rb, test); + #else + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + select = _mm_cmplt_epi8(index, sixteen); + index = _mm_sub_epi8(index, sixteen); + ra = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(a[i], index)), _mm_and_si128(select, ra)); + rb = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(b[i], index)), _mm_and_si128(select, rb)); + } + + return _mm_or_si128(_mm_andnot_si128(test, ra), _mm_and_si128(test, rb)); + #endif + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16_t index, r; + uint16x8_t index16; + uint32x4_t index32; + uint8x16x2_t table2_a, table2_b; + uint8x16x4_t table4_a, table4_b; + + switch (log2_index_size) { + case 0: + index = vandq_u8(simde__m128i_to_neon_u8(idx), vdupq_n_u8(HEDLEY_STATIC_CAST(int8_t, idx_mask))); + break; + case 1: + index16 = vandq_u16(simde__m128i_to_neon_u16(idx), vdupq_n_u16(HEDLEY_STATIC_CAST(int16_t, idx_mask))); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, vdupq_n_u16(0x0100)); + index = vreinterpretq_u8_u16(index16); + break; + case 2: + index32 = vandq_u32(simde__m128i_to_neon_u32(idx), vdupq_n_u32(HEDLEY_STATIC_CAST(int32_t, idx_mask))); + index32 = vmulq_n_u32(index32, 0x04040404); + index32 = vaddq_u32(index32, vdupq_n_u32(0x03020100)); + index = vreinterpretq_u8_u32(index32); + break; + } + + uint8x16_t mask = vdupq_n_u8(HEDLEY_STATIC_CAST(int8_t, (1 << (4 + log2_data_length)) - 1)); + + switch (log2_data_length) { + case 0: + r = vqtbx1q_u8(vqtbl1q_u8(simde__m128i_to_neon_u8(b[0]), vandq_u8(index, mask)), simde__m128i_to_neon_u8(a[0]), index); + break; + case 1: + table2_a.val[0] = simde__m128i_to_neon_u8(a[0]); + table2_a.val[1] = simde__m128i_to_neon_u8(a[1]); + table2_b.val[0] = simde__m128i_to_neon_u8(b[0]); + table2_b.val[1] = simde__m128i_to_neon_u8(b[1]); + r = vqtbx2q_u8(vqtbl2q_u8(table2_b, vandq_u8(index, mask)), table2_a, index); + break; + case 2: + table4_a.val[0] = simde__m128i_to_neon_u8(a[0]); + table4_a.val[1] = simde__m128i_to_neon_u8(a[1]); + table4_a.val[2] = simde__m128i_to_neon_u8(a[2]); + table4_a.val[3] = simde__m128i_to_neon_u8(a[3]); + table4_b.val[0] = simde__m128i_to_neon_u8(b[0]); + table4_b.val[1] = simde__m128i_to_neon_u8(b[1]); + table4_b.val[2] = simde__m128i_to_neon_u8(b[2]); + table4_b.val[3] = simde__m128i_to_neon_u8(b[3]); + r = vqtbx4q_u8(vqtbl4q_u8(table4_b, vandq_u8(index, mask)), table4_a, index); + break; + } + + return simde__m128i_from_neon_u8(r); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r, ra, rb, t, index, s, thirty_two = vec_splats(HEDLEY_STATIC_CAST(uint8_t, 32)); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) temp32, index32; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) select, test; + + switch (log2_index_size) { + default: /* Avoid uninitialized variable warning/error */ + case 0: + index = vec_and(simde__m128i_to_altivec_u8(idx), vec_splats(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); + break; + case 1: + index16 = simde__m128i_to_altivec_u16(idx); + index16 = vec_and(index16, vec_splats(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); + index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); + break; + case 2: + index32 = simde__m128i_to_altivec_u32(idx); + index32 = vec_and(index32, vec_splats(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); + + /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ + temp32 = vec_sl(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16))); + index32 = vec_add(index32, temp32); + index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)), + vec_splat_u16(0))); + + index32 = vec_add(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100))); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); + break; + } + + if (log2_data_length == 0) { + r = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(b[0]), HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index)); + } + else { + s = index; + index = vec_and(index, vec_splats(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1))); + test = vec_cmpgt(s, index); + + ra = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(a[1]), index); + rb = vec_perm(simde__m128i_to_altivec_u8(b[0]), simde__m128i_to_altivec_u8(b[1]), index); + + SIMDE_VECTORIZE + for (int i = 2 ; i < (1 << log2_data_length) ; i += 2) { + select = vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), index), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), thirty_two)); + index = vec_sub(index, thirty_two); + t = vec_perm(simde__m128i_to_altivec_u8(a[i]), simde__m128i_to_altivec_u8(a[i + 1]), index); + ra = vec_sel(t, ra, select); + t = vec_perm(simde__m128i_to_altivec_u8(b[i]), simde__m128i_to_altivec_u8(b[i + 1]), index); + rb = vec_sel(t, rb, select); + } + + r = vec_sel(ra, rb, test); + } + + return simde__m128i_from_altivec_u8(r); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sixteen = wasm_i8x16_splat(16); + + v128_t index = simde__m128i_to_wasm_v128(idx); + + switch (log2_index_size) { + case 0: + index = wasm_v128_and(index, wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, idx_mask))); + break; + case 1: + index = wasm_v128_and(index, wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, idx_mask))); + index = wasm_i16x8_mul(index, wasm_i16x8_splat(0x0202)); + index = wasm_i16x8_add(index, wasm_i16x8_splat(0x0100)); + break; + case 2: + index = wasm_v128_and(index, wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, idx_mask))); + index = wasm_i32x4_mul(index, wasm_i32x4_splat(0x04040404)); + index = wasm_i32x4_add(index, wasm_i32x4_splat(0x03020100)); + break; + } + + v128_t r = wasm_v8x16_swizzle(simde__m128i_to_wasm_v128(a[0]), index); + + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + index = wasm_i8x16_sub(index, sixteen); + r = wasm_v128_or(r, wasm_v8x16_swizzle(simde__m128i_to_wasm_v128(a[i]), index)); + } + + SIMDE_VECTORIZE + for (int i = 0 ; i < (1 << log2_data_length) ; i++) { + index = wasm_i8x16_sub(index, sixteen); + r = wasm_v128_or(r, wasm_v8x16_swizzle(simde__m128i_to_wasm_v128(b[i]), index)); + } + + return simde__m128i_from_wasm_v128(r); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_permutex2var (simde__m128i *r, const simde__m128i *a, const simde__m128i *idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { + SIMDE_VECTORIZE + for (int i = 0 ; i < (1 << log2_data_length) ; i++) { + r[i] = simde_x_permutex2var128(a, idx[i], b, log2_index_size, log2_data_length); + } +} +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi16(a, idx, b); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 1, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 8) ? b_ : a_).i16[idx_.i16[i] & 7]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi16 + #define _mm_permutex2var_epi16(a, idx, b) simde_mm_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi16 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm_mask_mov_epi16(a, k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi16 +#define _mm_mask_permutex2var_epi16(a, k, idx, b) simde_mm_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm_mask_mov_epi16(idx, k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi16 +#define _mm_mask2_permutex2var_epi16(a, idx, k, b) simde_mm_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi16 +#define _mm_maskz_permutex2var_epi16(k, a, idx, b) simde_mm_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi32(a, idx, b); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) /* This may not be faster than the portable version */ + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 2, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 4) ? b_ : a_).i32[idx_.i32[i] & 3]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi32 + #define _mm_permutex2var_epi32(a, idx, b) simde_mm_permutex2var_epi32(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi32 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi32(a, k, idx, b); + #else + return simde_mm_mask_mov_epi32(a, k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi32 +#define _mm_mask_permutex2var_epi32(a, k, idx, b) simde_mm_mask_permutex2var_epi32(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi32(a, idx, k, b); + #else + return simde_mm_mask_mov_epi32(idx, k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi32 +#define _mm_mask2_permutex2var_epi32(a, idx, k, b) simde_mm_mask2_permutex2var_epi32(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi32(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi32 +#define _mm_maskz_permutex2var_epi32(k, a, idx, b) simde_mm_maskz_permutex2var_epi32(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi64(a, idx, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((idx_.i64[i] & 2) ? b_ : a_).i64[idx_.i64[i] & 1]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi64 + #define _mm_permutex2var_epi64(a, idx, b) simde_mm_permutex2var_epi64(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi64 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi64(a, k, idx, b); + #else + return simde_mm_mask_mov_epi64(a, k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi64 +#define _mm_mask_permutex2var_epi64(a, k, idx, b) simde_mm_mask_permutex2var_epi64(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi64(a, idx, k, b); + #else + return simde_mm_mask_mov_epi64(idx, k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi64 +#define _mm_mask2_permutex2var_epi64(a, idx, k, b) simde_mm_mask2_permutex2var_epi64(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi64(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi64 +#define _mm_maskz_permutex2var_epi64(k, a, idx, b) simde_mm_maskz_permutex2var_epi64(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtepi32_epi8(_mm512_permutex2var_epi32(_mm512_cvtepu8_epi32(a), _mm512_cvtepu8_epi32(idx), _mm512_cvtepu8_epi32(b))); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 0, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x10) ? b_ : a_).i8[idx_.i8[i] & 0x0F]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi8 + #define _mm_permutex2var_epi8(a, idx, b) simde_mm_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi8 (simde__m128i a, simde__mmask16 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm_mask_mov_epi8(a, k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi8 +#define _mm_mask_permutex2var_epi8(a, k, idx, b) simde_mm_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__mmask16 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm_mask_mov_epi8(idx, k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi8 +#define _mm_mask2_permutex2var_epi8(a, idx, k, b) simde_mm_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi8 +#define _mm_maskz_permutex2var_epi8(k, a, idx, b) simde_mm_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_pd(a, idx, b); + #else + return simde_mm_castsi128_pd(simde_mm_permutex2var_epi64(simde_mm_castpd_si128(a), idx, simde_mm_castpd_si128(b))); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_pd + #define _mm_permutex2var_pd(a, idx, b) simde_mm_permutex2var_pd(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_permutex2var_pd (simde__m128d a, simde__mmask8 k, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_pd(a, k, idx, b); + #else + return simde_mm_mask_mov_pd(a, k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_pd +#define _mm_mask_permutex2var_pd(a, k, idx, b) simde_mm_mask_permutex2var_pd(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask2_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__mmask8 k, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_pd(a, idx, k, b); + #else + return simde_mm_mask_mov_pd(simde_mm_castsi128_pd(idx), k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_pd +#define _mm_mask2_permutex2var_pd(a, idx, k, b) simde_mm_mask2_permutex2var_pd(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_permutex2var_pd (simde__mmask8 k, simde__m128d a, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_pd(k, a, idx, b); + #else + return simde_mm_maskz_mov_pd(k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_pd +#define _mm_maskz_permutex2var_pd(k, a, idx, b) simde_mm_maskz_permutex2var_pd(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_ps(a, idx, b); + #else + return simde_mm_castsi128_ps(simde_mm_permutex2var_epi32(simde_mm_castps_si128(a), idx, simde_mm_castps_si128(b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_ps + #define _mm_permutex2var_ps(a, idx, b) simde_mm_permutex2var_ps(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_permutex2var_ps (simde__m128 a, simde__mmask8 k, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_ps(a, k, idx, b); + #else + return simde_mm_mask_mov_ps(a, k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_ps +#define _mm_mask_permutex2var_ps(a, k, idx, b) simde_mm_mask_permutex2var_ps(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask2_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__mmask8 k, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_ps(a, idx, k, b); + #else + return simde_mm_mask_mov_ps(simde_mm_castsi128_ps(idx), k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_ps +#define _mm_mask2_permutex2var_ps(a, idx, k, b) simde_mm_mask2_permutex2var_ps(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_permutex2var_ps (simde__mmask8 k, simde__m128 a, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_ps(k, a, idx, b); + #else + return simde_mm_maskz_mov_ps(k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_ps +#define _mm_maskz_permutex2var_ps(k, a, idx, b) simde_mm_maskz_permutex2var_ps(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi16(a, idx, b); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i hilo, hilo2, hi, lo, idx2, ta, tb, select; + const __m256i ones = _mm256_set1_epi16(1); + + idx2 = _mm256_srli_epi32(idx, 1); + + ta = _mm256_permutevar8x32_epi32(a, idx2); + tb = _mm256_permutevar8x32_epi32(b, idx2); + select = _mm256_slli_epi32(idx2, 28); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + idx2 = _mm256_srli_epi32(idx2, 16); + + ta = _mm256_permutevar8x32_epi32(a, idx2); + tb = _mm256_permutevar8x32_epi32(b, idx2); + select = _mm256_slli_epi32(idx2, 28); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + + lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo, 0x55); + hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo, 16), 0x55); + + select = _mm256_cmpeq_epi16(_mm256_and_si256(idx, ones), ones); + return _mm256_blendv_epi8(lo, hi, select); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 0x10) ? b_ : a_).i16[idx_.i16[i] & 0x0F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi16 + #define _mm256_permutex2var_epi16(a, idx, b) simde_mm256_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi16 (simde__m256i a, simde__mmask16 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi16(a, k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi16 +#define _mm256_mask_permutex2var_epi16(a, k, idx, b) simde_mm256_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__mmask16 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi16(idx, k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi16 +#define _mm256_mask2_permutex2var_epi16(a, idx, k, b) simde_mm256_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi16 +#define _mm256_maskz_permutex2var_epi16(k, a, idx, b) simde_mm256_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi32(a, idx, b); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i ta, tb, select; + ta = _mm256_permutevar8x32_epi32(a, idx); + tb = _mm256_permutevar8x32_epi32(b, idx); + select = _mm256_slli_epi32(idx, 28); + return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 8) ? b_ : a_).i32[idx_.i32[i] & 7]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi32 + #define _mm256_permutex2var_epi32(a, idx, b) simde_mm256_permutex2var_epi32(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi32 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi32(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi32(a, k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi32 +#define _mm256_mask_permutex2var_epi32(a, k, idx, b) simde_mm256_mask_permutex2var_epi32(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi32(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi32(idx, k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi32 +#define _mm256_mask2_permutex2var_epi32(a, idx, k, b) simde_mm256_mask2_permutex2var_epi32(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi32(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi32 +#define _mm256_maskz_permutex2var_epi32(k, a, idx, b) simde_mm256_maskz_permutex2var_epi32(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi64(a, idx, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((idx_.i64[i] & 4) ? b_ : a_).i64[idx_.i64[i] & 3]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi64 + #define _mm256_permutex2var_epi64(a, idx, b) simde_mm256_permutex2var_epi64(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi64 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi64(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi64(a, k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi64 +#define _mm256_mask_permutex2var_epi64(a, k, idx, b) simde_mm256_mask_permutex2var_epi64(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi64(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi64(idx, k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi64 +#define _mm256_mask2_permutex2var_epi64(a, idx, k, b) simde_mm256_mask2_permutex2var_epi64(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi64(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi64 +#define _mm256_maskz_permutex2var_epi64(k, a, idx, b) simde_mm256_maskz_permutex2var_epi64(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cvtepi16_epi8(_mm512_permutex2var_epi16(_mm512_cvtepu8_epi16(a), _mm512_cvtepu8_epi16(idx), _mm512_cvtepu8_epi16(b))); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t0, t1, index, select0x10, select0x20, a01, b01; + const __m256i mask = _mm256_set1_epi8(0x3F); + const __m256i a0 = _mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a1 = _mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b0 = _mm256_permute4x64_epi64(b, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b1 = _mm256_permute4x64_epi64(b, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + + index = _mm256_and_si256(idx, mask); + t0 = _mm256_shuffle_epi8(a0, index); + t1 = _mm256_shuffle_epi8(a1, index); + select0x10 = _mm256_slli_epi64(index, 3); + a01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(b0, index); + t1 = _mm256_shuffle_epi8(b1, index); + b01 = _mm256_blendv_epi8(t0, t1, select0x10); + select0x20 = _mm256_slli_epi64(index, 2); + return _mm256_blendv_epi8(a01, b01, select0x20); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x20) ? b_ : a_).i8[idx_.i8[i] & 0x1F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi8 + #define _mm256_permutex2var_epi8(a, idx, b) simde_mm256_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi8 (simde__m256i a, simde__mmask32 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi8(a, k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi8 +#define _mm256_mask_permutex2var_epi8(a, k, idx, b) simde_mm256_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__mmask32 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi8(idx, k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi8 +#define _mm256_mask2_permutex2var_epi8(a, idx, k, b) simde_mm256_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi8 +#define _mm256_maskz_permutex2var_epi8(k, a, idx, b) simde_mm256_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_pd(a, idx, b); + #else + return simde_mm256_castsi256_pd(simde_mm256_permutex2var_epi64(simde_mm256_castpd_si256(a), idx, simde_mm256_castpd_si256(b))); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_pd + #define _mm256_permutex2var_pd(a, idx, b) simde_mm256_permutex2var_pd(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_permutex2var_pd (simde__m256d a, simde__mmask8 k, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_pd(a, k, idx, b); + #else + return simde_mm256_mask_mov_pd(a, k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_pd +#define _mm256_mask_permutex2var_pd(a, k, idx, b) simde_mm256_mask_permutex2var_pd(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask2_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__mmask8 k, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_pd(a, idx, k, b); + #else + return simde_mm256_mask_mov_pd(simde_mm256_castsi256_pd(idx), k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_pd +#define _mm256_mask2_permutex2var_pd(a, idx, k, b) simde_mm256_mask2_permutex2var_pd(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_permutex2var_pd (simde__mmask8 k, simde__m256d a, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_pd(k, a, idx, b); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_pd +#define _mm256_maskz_permutex2var_pd(k, a, idx, b) simde_mm256_maskz_permutex2var_pd(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_ps(a, idx, b); + #else + return simde_mm256_castsi256_ps(simde_mm256_permutex2var_epi32(simde_mm256_castps_si256(a), idx, simde_mm256_castps_si256(b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_ps + #define _mm256_permutex2var_ps(a, idx, b) simde_mm256_permutex2var_ps(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_permutex2var_ps (simde__m256 a, simde__mmask8 k, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_ps(a, k, idx, b); + #else + return simde_mm256_mask_mov_ps(a, k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_ps +#define _mm256_mask_permutex2var_ps(a, k, idx, b) simde_mm256_mask_permutex2var_ps(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask2_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__mmask8 k, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_ps(a, idx, k, b); + #else + return simde_mm256_mask_mov_ps(simde_mm256_castsi256_ps(idx), k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_ps +#define _mm256_mask2_permutex2var_ps(a, idx, k, b) simde_mm256_mask2_permutex2var_ps(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_permutex2var_ps (simde__mmask8 k, simde__m256 a, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_ps(k, a, idx, b); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_ps +#define _mm256_maskz_permutex2var_ps(k, a, idx, b) simde_mm256_maskz_permutex2var_ps(k, a, idx, b) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde__m512i -simde_mm512_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__m512i b) { +simde_mm512_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__m512i b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_epi32(a, idx, b); + return _mm512_permutex2var_epi16(a, idx, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), @@ -47,15 +1135,160 @@ b_ = simde__m512i_to_private(b), r_; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).i32[idx_.i32[i] & 0x0F]; - } + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i hilo, hilo1, hilo2, hi, lo, idx1, idx2, ta, tb, select; + const __m256i ones = _mm256_set1_epi16(1); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + idx1 = idx_.m256i[i]; + idx2 = _mm256_srli_epi32(idx1, 1); + + select = _mm256_slli_epi32(idx2, 27); + ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); + hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + select = _mm256_add_epi32(select, select); + hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), + _mm256_castsi256_ps(hilo1), + _mm256_castsi256_ps(select))); + + idx2 = _mm256_srli_epi32(idx2, 16); + + select = _mm256_slli_epi32(idx2, 27); + ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + select = _mm256_add_epi32(select, select); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), + _mm256_castsi256_ps(hilo2), + _mm256_castsi256_ps(select))); + + lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo1, 0x55); + hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo1, 16), 0x55); + + select = _mm256_cmpeq_epi16(_mm256_and_si256(idx1, ones), ones); + r_.m256i[i] = _mm256_blendv_epi8(lo, hi, select); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 0x20) ? b_ : a_).i16[idx_.i16[i] & 0x1F]; + } + #endif return simde__m512i_from_private(r_); #endif } +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi16 + #define _mm512_permutex2var_epi16(a, idx, b) simde_mm512_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi16 (simde__m512i a, simde__mmask32 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi16(a, k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi16 +#define _mm512_mask_permutex2var_epi16(a, k, idx, b) simde_mm512_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__mmask32 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi16(idx, k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi16 +#define _mm512_mask2_permutex2var_epi16(a, idx, k, b) simde_mm512_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi16 +#define _mm512_maskz_permutex2var_epi16(k, a, idx, b) simde_mm512_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutex2var_epi32(a, idx, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i index, t0, t1, a01, b01, select; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + t0 = _mm256_permutevar8x32_epi32(a_.m256i[0], index); + t1 = _mm256_permutevar8x32_epi32(a_.m256i[1], index); + select = _mm256_slli_epi32(index, 28); + a01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), + _mm256_castsi256_ps(t1), + _mm256_castsi256_ps(select))); + t0 = _mm256_permutevar8x32_epi32(b_.m256i[0], index); + t1 = _mm256_permutevar8x32_epi32(b_.m256i[1], index); + b01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), + _mm256_castsi256_ps(t1), + _mm256_castsi256_ps(select))); + select = _mm256_slli_epi32(index, 27); + r_.m256i[i] = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a01), + _mm256_castsi256_ps(b01), + _mm256_castsi256_ps(select))); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).i32[idx_.i32[i] & 0x0F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_epi32 #define _mm512_permutex2var_epi32(a, idx, b) simde_mm512_permutex2var_epi32(a, idx, b) #endif @@ -63,13 +1296,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutex2var_epi32 (simde__m512i a, simde__mmask16 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutex2var_epi32(a, k, idx, b); #else return simde_mm512_mask_mov_epi32(a, k, simde_mm512_permutex2var_epi32(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_epi32 #define _mm512_mask_permutex2var_epi32(a, k, idx, b) simde_mm512_mask_permutex2var_epi32(a, k, idx, b) #endif @@ -77,13 +1310,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask2_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__mmask16 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask2_permutex2var_epi32(a, idx, k, b); #else return simde_mm512_mask_mov_epi32(idx, k, simde_mm512_permutex2var_epi32(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_epi32 #define _mm512_mask2_permutex2var_epi32(a, idx, k, b) simde_mm512_mask2_permutex2var_epi32(a, idx, k, b) #endif @@ -91,13 +1324,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutex2var_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutex2var_epi32(k, a, idx, b); #else return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutex2var_epi32(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_epi32 #define _mm512_maskz_permutex2var_epi32(k, a, idx, b) simde_mm512_maskz_permutex2var_epi32(k, a, idx, b) #endif @@ -105,7 +1338,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutex2var_epi64(a, idx, b); #else simde__m512i_private @@ -116,13 +1349,13 @@ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((idx_.i64[i] & 0x08) ? b_ : a_).i64[idx_.i64[i] & 0x07]; + r_.i64[i] = ((idx_.i64[i] & 8) ? b_ : a_).i64[idx_.i64[i] & 7]; } return simde__m512i_from_private(r_); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_epi64 #define _mm512_permutex2var_epi64(a, idx, b) simde_mm512_permutex2var_epi64(a, idx, b) #endif @@ -130,13 +1363,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask_permutex2var_epi64 (simde__m512i a, simde__mmask8 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutex2var_epi64(a, k, idx, b); #else return simde_mm512_mask_mov_epi64(a, k, simde_mm512_permutex2var_epi64(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_epi64 #define _mm512_mask_permutex2var_epi64(a, k, idx, b) simde_mm512_mask_permutex2var_epi64(a, k, idx, b) #endif @@ -144,13 +1377,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_mask2_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__mmask8 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask2_permutex2var_epi64(a, idx, k, b); #else return simde_mm512_mask_mov_epi64(idx, k, simde_mm512_permutex2var_epi64(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_epi64 #define _mm512_mask2_permutex2var_epi64(a, idx, k, b) simde_mm512_mask2_permutex2var_epi64(a, idx, k, b) #endif @@ -158,35 +1391,149 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutex2var_epi64(k, a, idx, b); #else return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutex2var_epi64(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_epi64 #define _mm512_maskz_permutex2var_epi64(k, a, idx, b) simde_mm512_maskz_permutex2var_epi64(k, a, idx, b) #endif SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m512i hilo, hi, lo, hi2, lo2, idx2; + const __m512i ones = _mm512_set1_epi8(1); + const __m512i low_bytes = _mm512_set1_epi16(0x00FF); + + idx2 = _mm512_srli_epi16(idx, 1); + hilo = _mm512_permutex2var_epi16(a, idx2, b); + __mmask64 mask = _mm512_test_epi8_mask(idx, ones); + lo = _mm512_and_si512(hilo, low_bytes); + hi = _mm512_srli_epi16(hilo, 8); + + idx2 = _mm512_srli_epi16(idx, 9); + hilo = _mm512_permutex2var_epi16(a, idx2, b); + lo2 = _mm512_slli_epi16(hilo, 8); + hi2 = _mm512_andnot_si512(low_bytes, hilo); + + lo = _mm512_or_si512(lo, lo2); + hi = _mm512_or_si512(hi, hi2); + + return _mm512_mask_blend_epi8(mask, lo, hi); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i t0, t1, index, select0x10, select0x20, select0x40, t01, t23, a0123, b0123; + const __m256i mask = _mm256_set1_epi8(0x7F); + const __m256i a0 = _mm256_permute4x64_epi64(a_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a1 = _mm256_permute4x64_epi64(a_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i a2 = _mm256_permute4x64_epi64(a_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a3 = _mm256_permute4x64_epi64(a_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b0 = _mm256_permute4x64_epi64(b_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b1 = _mm256_permute4x64_epi64(b_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b2 = _mm256_permute4x64_epi64(b_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b3 = _mm256_permute4x64_epi64(b_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = _mm256_and_si256(idx_.m256i[i], mask); + t0 = _mm256_shuffle_epi8(a0, index); + t1 = _mm256_shuffle_epi8(a1, index); + select0x10 = _mm256_slli_epi64(index, 3); + t01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(a2, index); + t1 = _mm256_shuffle_epi8(a3, index); + t23 = _mm256_blendv_epi8(t0, t1, select0x10); + select0x20 = _mm256_slli_epi64(index, 2); + a0123 = _mm256_blendv_epi8(t01, t23, select0x20); + t0 = _mm256_shuffle_epi8(b0, index); + t1 = _mm256_shuffle_epi8(b1, index); + t01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(b2, index); + t1 = _mm256_shuffle_epi8(b3, index); + t23 = _mm256_blendv_epi8(t0, t1, select0x10); + b0123 = _mm256_blendv_epi8(t01, t23, select0x20); + select0x40 = _mm256_slli_epi64(index, 1); + r_.m256i[i] = _mm256_blendv_epi8(a0123, b0123, select0x40); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x40) ? b_ : a_).i8[idx_.i8[i] & 0x3F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi8 + #define _mm512_permutex2var_epi8(a, idx, b) simde_mm512_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi8 (simde__m512i a, simde__mmask64 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi8(a, k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi8 +#define _mm512_mask_permutex2var_epi8(a, k, idx, b) simde_mm512_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__mmask64 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi8(idx, k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi8 +#define _mm512_mask2_permutex2var_epi8(a, idx, k, b) simde_mm512_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi8 +#define _mm512_maskz_permutex2var_epi8(k, a, idx, b) simde_mm512_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__m512d b) { #if defined(SIMDE_X86_AVX512BW_NATIVE) return _mm512_permutex2var_pd(a, idx, b); #else - simde__m512i_private idx_ = simde__m512i_to_private(idx); - simde__m512d_private - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((idx_.i64[i] & 0x08) ? b_ : a_).f64[idx_.i64[i] & 0x07]; - } - - return simde__m512d_from_private(r_); + return simde_mm512_castsi512_pd(simde_mm512_permutex2var_epi64(simde_mm512_castpd_si512(a), idx, simde_mm512_castpd_si512(b))); #endif } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) @@ -197,13 +1544,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask_permutex2var_pd (simde__m512d a, simde__mmask8 k, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutex2var_pd(a, k, idx, b); #else return simde_mm512_mask_mov_pd(a, k, simde_mm512_permutex2var_pd(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_pd #define _mm512_mask_permutex2var_pd(a, k, idx, b) simde_mm512_mask_permutex2var_pd(a, k, idx, b) #endif @@ -211,13 +1558,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_mask2_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__mmask8 k, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask2_permutex2var_pd(a, idx, k, b); #else return simde_mm512_mask_mov_pd(simde_mm512_castsi512_pd(idx), k, simde_mm512_permutex2var_pd(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_pd #define _mm512_mask2_permutex2var_pd(a, idx, k, b) simde_mm512_mask2_permutex2var_pd(a, idx, k, b) #endif @@ -225,13 +1572,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512d simde_mm512_maskz_permutex2var_pd (simde__mmask8 k, simde__m512d a, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutex2var_pd(k, a, idx, b); #else return simde_mm512_maskz_mov_pd(k, simde_mm512_permutex2var_pd(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_pd #define _mm512_maskz_permutex2var_pd(k, a, idx, b) simde_mm512_maskz_permutex2var_pd(k, a, idx, b) #endif @@ -239,24 +1586,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutex2var_ps(a, idx, b); #else - simde__m512i_private idx_ = simde__m512i_to_private(idx); - simde__m512_private - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).f32[idx_.i32[i] & 0x0F]; - } - - return simde__m512_from_private(r_); + return simde_mm512_castsi512_ps(simde_mm512_permutex2var_epi32(simde_mm512_castps_si512(a), idx, simde_mm512_castps_si512(b))); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_permutex2var_ps #define _mm512_permutex2var_ps(a, idx, b) simde_mm512_permutex2var_ps(a, idx, b) #endif @@ -264,13 +1600,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask_permutex2var_ps (simde__m512 a, simde__mmask16 k, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask_permutex2var_ps(a, k, idx, b); #else return simde_mm512_mask_mov_ps(a, k, simde_mm512_permutex2var_ps(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_permutex2var_ps #define _mm512_mask_permutex2var_ps(a, k, idx, b) simde_mm512_mask_permutex2var_ps(a, k, idx, b) #endif @@ -278,13 +1614,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_mask2_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__mmask16 k, simde__m512 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_mask2_permutex2var_ps(a, idx, k, b); #else return simde_mm512_mask_mov_ps(simde_mm512_castsi512_ps(idx), k, simde_mm512_permutex2var_ps(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask2_permutex2var_ps #define _mm512_mask2_permutex2var_ps(a, idx, k, b) simde_mm512_mask2_permutex2var_ps(a, idx, k, b) #endif @@ -292,13 +1628,13 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m512 simde_mm512_maskz_permutex2var_ps (simde__mmask16 k, simde__m512 a, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) + #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_maskz_permutex2var_ps(k, a, idx, b); #else return simde_mm512_maskz_mov_ps(k, simde_mm512_permutex2var_ps(a, idx, b)); #endif } -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_permutex2var_ps #define _mm512_maskz_permutex2var_ps(k, a, idx, b) simde_mm512_maskz_permutex2var_ps(k, a, idx, b) #endif diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/permutexvar.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/permutexvar.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/permutexvar.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/permutexvar.h 2021-04-17 01:19:49.000000000 +0000 @@ -29,13 +29,730 @@ #define SIMDE_X86_AVX512_PERMUTEXVAR_H #include "types.h" +#include "and.h" +#include "andnot.h" +#include "blend.h" #include "mov.h" +#include "or.h" +#include "set1.h" +#include "slli.h" +#include "srli.h" +#include "test.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutexvar_epi16 (simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutexvar_epi16(idx, a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + simde__m128i mask16 = simde_mm_set1_epi16(0x0007); + simde__m128i shift16 = simde_mm_set1_epi16(0x0202); + simde__m128i byte_index16 = simde_mm_set1_epi16(0x0100); + simde__m128i index16 = simde_mm_and_si128(idx, mask16); + index16 = simde_mm_mullo_epi16(index16, shift16); + index16 = simde_mm_add_epi16(index16, byte_index16); + return simde_mm_shuffle_epi8(a, index16); + #else + simde__m128i_private + idx_ = simde__m128i_to_private(idx), + a_ = simde__m128i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint16x8_t mask16 = vdupq_n_u16(0x0007); + uint16x8_t byte_index16 = vdupq_n_u16(0x0100); + uint16x8_t index16 = vandq_u16(idx_.neon_u16, mask16); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, byte_index16); + r_.neon_u8 = vqtbl1q_u8(a_.neon_u8, vreinterpretq_u8_u16(index16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; + index16 = vec_and(idx_.altivec_u16, vec_splat_u16(7)); + index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); + r_.altivec_u8 = vec_perm(a_.altivec_u8, a_.altivec_u8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t mask16 = wasm_i16x8_splat(0x0007); + const v128_t shift16 = wasm_i16x8_splat(0x0202); + const v128_t byte_index16 = wasm_i16x8_splat(0x0100); + v128_t index16 = wasm_v128_and(idx_.wasm_v128, mask16); + index16 = wasm_i16x8_mul(index16, shift16); + index16 = wasm_i16x8_add(index16, byte_index16); + r_.wasm_v128 = wasm_v8x16_swizzle(a_.wasm_v128, index16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[idx_.i16[i] & 0x07]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutexvar_epi16 + #define _mm_permutexvar_epi16(idx, a) simde_mm_permutexvar_epi16(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutexvar_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutexvar_epi16(src, k, idx, a); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutexvar_epi16 + #define _mm_mask_permutexvar_epi16(src, k, idx, a) simde_mm_mask_permutexvar_epi16(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutexvar_epi16 (simde__mmask8 k, simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutexvar_epi16(k, idx, a); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutexvar_epi16 + #define _mm_maskz_permutexvar_epi16(k, idx, a) simde_mm_maskz_permutexvar_epi16(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutexvar_epi8 (simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutexvar_epi8(idx, a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + simde__m128i mask = simde_mm_set1_epi8(0x0F); + simde__m128i index = simde_mm_and_si128(idx, mask); + return simde_mm_shuffle_epi8(a, index); + #else + simde__m128i_private + idx_ = simde__m128i_to_private(idx), + a_ = simde__m128i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16_t mask = vdupq_n_u8(0x0F); + uint8x16_t index = vandq_u8(idx_.neon_u8, mask); + r_.neon_u8 = vqtbl1q_u8(a_.neon_u8, index); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_perm(a_.altivec_u8, a_.altivec_u8, idx_.altivec_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t mask = wasm_i8x16_splat(0x0F); + v128_t index = wasm_v128_and(idx_.wasm_v128, mask); + r_.wasm_v128 = wasm_v8x16_swizzle(a_.wasm_v128, index); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[idx_.i8[i] & 0x0F]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutexvar_epi8 + #define _mm_permutexvar_epi8(idx, a) simde_mm_permutexvar_epi8(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutexvar_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutexvar_epi8(src, k, idx, a); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutexvar_epi8 + #define _mm_mask_permutexvar_epi8(src, k, idx, a) simde_mm_mask_permutexvar_epi8(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutexvar_epi8 (simde__mmask16 k, simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutexvar_epi8(k, idx, a); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutexvar_epi8 + #define _mm_maskz_permutexvar_epi8(k, idx, a) simde_mm_maskz_permutexvar_epi8(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutexvar_epi16 (simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_epi16(idx, a); + #elif defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i mask16 = simde_mm256_set1_epi16(0x001F); + simde__m256i shift16 = simde_mm256_set1_epi16(0x0202); + simde__m256i byte_index16 = simde_mm256_set1_epi16(0x0100); + simde__m256i index16 = simde_mm256_and_si256(idx, mask16); + index16 = simde_mm256_mullo_epi16(index16, shift16); + simde__m256i lo = simde_mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + simde__m256i hi = simde_mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + simde__m256i select = simde_mm256_slli_epi64(index16, 3); + index16 = simde_mm256_add_epi16(index16, byte_index16); + lo = simde_mm256_shuffle_epi8(lo, index16); + hi = simde_mm256_shuffle_epi8(hi, index16); + return simde_mm256_blendv_epi8(lo, hi, select); + #else + simde__m256i_private + idx_ = simde__m256i_to_private(idx), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8 } }; + uint16x8_t mask16 = vdupq_n_u16(0x000F); + uint16x8_t byte_index16 = vdupq_n_u16(0x0100); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + uint16x8_t index16 = vandq_u16(idx_.m128i_private[i].neon_u16, mask16); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, byte_index16); + r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vreinterpretq_u8_u16(index16)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16, mask16, shift16, byte_index16; + mask16 = vec_splat_u16(0x000F); + shift16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)); + byte_index16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index16 = vec_and(idx_.m128i_private[i].altivec_u16, mask16); + index16 = vec_mladd(index16, shift16, byte_index16); + r_.m128i_private[i].altivec_u8 = vec_perm(a_.m128i_private[0].altivec_u8, + a_.m128i_private[1].altivec_u8, + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16)); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, index16, r, t; + const v128_t mask16 = wasm_i16x8_splat(0x000F); + const v128_t shift16 = wasm_i16x8_splat(0x0202); + const v128_t byte_index16 = wasm_i16x8_splat(0x0100); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index16 = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask16); + index16 = wasm_i16x8_mul(index16, shift16); + index = wasm_i16x8_add(index16, byte_index16); + r = wasm_v8x16_swizzle(a0, index); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a1, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[idx_.i16[i] & 0x0F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_epi16 + #define _mm256_permutexvar_epi16(idx, a) simde_mm256_permutexvar_epi16(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutexvar_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_epi16(src, k, idx, a); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_epi16 + #define _mm256_mask_permutexvar_epi16(src, k, idx, a) simde_mm256_mask_permutexvar_epi16(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutexvar_epi16 (simde__mmask16 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_epi16(k, idx, a); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_epi16 + #define _mm256_maskz_permutexvar_epi16(k, idx, a) simde_mm256_maskz_permutexvar_epi16(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutexvar_epi32 (simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_epi32(idx, a); + #elif defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_permutevar8x32_epi32(a, idx); + #else + simde__m256i_private + idx_ = simde__m256i_to_private(idx), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8 } }; + uint32x4_t mask32 = vdupq_n_u32(0x00000007); + uint32x4_t byte_index32 = vdupq_n_u32(0x03020100); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + uint32x4_t index32 = vandq_u32(idx_.m128i_private[i].neon_u32, mask32); + index32 = vmulq_n_u32(index32, 0x04040404); + index32 = vaddq_u32(index32, byte_index32); + r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vreinterpretq_u8_u32(index32)); + } + #else + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 0x07]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_epi32 + #define _mm256_permutexvar_epi32(idx, a) simde_mm256_permutexvar_epi32(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutexvar_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_epi32(src, k, idx, a); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_permutexvar_epi32(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_epi32 + #define _mm256_mask_permutexvar_epi32(src, k, idx, a) simde_mm256_mask_permutexvar_epi32(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutexvar_epi32 (simde__mmask8 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_epi32(k, idx, a); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutexvar_epi32(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_epi32 + #define _mm256_maskz_permutexvar_epi32(k, idx, a) simde_mm256_maskz_permutexvar_epi32(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutexvar_epi64 (simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_epi64(idx, a); + #else + simde__m256i_private + idx_ = simde__m256i_to_private(idx), + a_ = simde__m256i_to_private(a), + r_; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[idx_.i64[i] & 3]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_epi64 + #define _mm256_permutexvar_epi64(idx, a) simde_mm256_permutexvar_epi64(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutexvar_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_epi64(src, k, idx, a); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_permutexvar_epi64(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_epi64 + #define _mm256_mask_permutexvar_epi64(src, k, idx, a) simde_mm256_mask_permutexvar_epi64(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutexvar_epi64 (simde__mmask8 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_epi64(k, idx, a); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutexvar_epi64(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_epi64 + #define _mm256_maskz_permutexvar_epi64(k, idx, a) simde_mm256_maskz_permutexvar_epi64(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutexvar_epi8 (simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_epi8(idx, a); + #elif defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i mask = simde_mm256_set1_epi8(0x0F); + simde__m256i lo = simde_mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + simde__m256i hi = simde_mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + simde__m256i index = simde_mm256_and_si256(idx, mask); + simde__m256i select = simde_mm256_slli_epi64(idx, 3); + lo = simde_mm256_shuffle_epi8(lo, index); + hi = simde_mm256_shuffle_epi8(hi, index); + return simde_mm256_blendv_epi8(lo, hi, select); + #else + simde__m256i_private + idx_ = simde__m256i_to_private(idx), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8 } }; + uint8x16_t mask = vdupq_n_u8(0x1F); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vandq_u8(idx_.m128i_private[i].neon_u8, mask)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_u8 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, idx_.m128i_private[i].altivec_u8); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, r, t; + const v128_t mask = wasm_i8x16_splat(0x1F); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); + r = wasm_v8x16_swizzle(a0, index); + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a1, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[idx_.i8[i] & 0x1F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_epi8 + #define _mm256_permutexvar_epi8(idx, a) simde_mm256_permutexvar_epi8(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutexvar_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_epi8(src, k, idx, a); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_epi8 + #define _mm256_mask_permutexvar_epi8(src, k, idx, a) simde_mm256_mask_permutexvar_epi8(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutexvar_epi8 (simde__mmask32 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_epi8(k, idx, a); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_epi8 + #define _mm256_maskz_permutexvar_epi8(k, idx, a) simde_mm256_maskz_permutexvar_epi8(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutexvar_pd (simde__m256i idx, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_pd(idx, a); + #else + return simde_mm256_castsi256_pd(simde_mm256_permutexvar_epi64(idx, simde_mm256_castpd_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_pd + #define _mm256_permutexvar_pd(idx, a) simde_mm256_permutexvar_pd(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_permutexvar_pd (simde__m256d src, simde__mmask8 k, simde__m256i idx, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_pd(src, k, idx, a); + #else + return simde_mm256_mask_mov_pd(src, k, simde_mm256_permutexvar_pd(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_pd + #define _mm256_mask_permutexvar_pd(src, k, idx, a) simde_mm256_mask_permutexvar_pd(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_permutexvar_pd (simde__mmask8 k, simde__m256i idx, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_pd(k, idx, a); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_permutexvar_pd(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_pd + #define _mm256_maskz_permutexvar_pd(k, idx, a) simde_mm256_maskz_permutexvar_pd(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutexvar_ps (simde__m256i idx, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_ps(idx, a); + #elif defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_permutevar8x32_ps(a, idx); + #else + return simde_mm256_castsi256_ps(simde_mm256_permutexvar_epi32(idx, simde_mm256_castps_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_ps + #define _mm256_permutexvar_ps(idx, a) simde_mm256_permutexvar_ps(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_permutexvar_ps (simde__m256 src, simde__mmask8 k, simde__m256i idx, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_ps(src, k, idx, a); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_permutexvar_ps(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_ps + #define _mm256_mask_permutexvar_ps(src, k, idx, a) simde_mm256_mask_permutexvar_ps(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_permutexvar_ps (simde__mmask8 k, simde__m256i idx, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_ps(k, idx, a); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_permutexvar_ps(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_ps + #define _mm256_maskz_permutexvar_ps(k, idx, a) simde_mm256_maskz_permutexvar_ps(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutexvar_epi16 (simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_permutexvar_epi16(idx, a); + #else + simde__m512i_private + idx_ = simde__m512i_to_private(idx), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i t0, t1, index, select, a01, a23; + simde__m256i mask = simde_mm256_set1_epi16(0x001F); + simde__m256i shift = simde_mm256_set1_epi16(0x0202); + simde__m256i byte_index = simde_mm256_set1_epi16(0x0100); + simde__m256i a0 = simde_mm256_broadcastsi128_si256(a_.m128i[0]); + simde__m256i a1 = simde_mm256_broadcastsi128_si256(a_.m128i[1]); + simde__m256i a2 = simde_mm256_broadcastsi128_si256(a_.m128i[2]); + simde__m256i a3 = simde_mm256_broadcastsi128_si256(a_.m128i[3]); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + index = simde_mm256_and_si256(index, mask); + index = simde_mm256_mullo_epi16(index, shift); + index = simde_mm256_add_epi16(index, byte_index); + t0 = simde_mm256_shuffle_epi8(a0, index); + t1 = simde_mm256_shuffle_epi8(a1, index); + select = simde_mm256_slli_epi64(index, 3); + a01 = simde_mm256_blendv_epi8(t0, t1, select); + t0 = simde_mm256_shuffle_epi8(a2, index); + t1 = simde_mm256_shuffle_epi8(a3, index); + a23 = simde_mm256_blendv_epi8(t0, t1, select); + select = simde_mm256_slli_epi64(index, 2); + r_.m256i[i] = simde_mm256_blendv_epi8(a01, a23, select); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8, + a_.m128i_private[2].neon_u8, + a_.m128i_private[3].neon_u8 } }; + uint16x8_t mask16 = vdupq_n_u16(0x001F); + uint16x8_t byte_index16 = vdupq_n_u16(0x0100); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + uint16x8_t index16 = vandq_u16(idx_.m128i_private[i].neon_u16, mask16); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, byte_index16); + r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vreinterpretq_u8_u16(index16)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16, mask16, shift16, byte_index16; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) index, test, r01, r23; + mask16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x001F)); + shift16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)); + byte_index16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100)); + test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index16 = vec_and(idx_.m128i_private[i].altivec_u16, mask16); + index16 = vec_mladd(index16, shift16, byte_index16); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); + r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, index); + r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, index); + r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(index, test), test)); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, r, t; + const v128_t mask = wasm_i16x8_splat(0x001F); + const v128_t shift = wasm_i16x8_splat(0x0202); + const v128_t byte_index = wasm_i16x8_splat(0x0100); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + const v128_t a2 = a_.m128i_private[2].wasm_v128; + const v128_t a3 = a_.m128i_private[3].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); + index = wasm_i16x8_mul(index, shift); + index = wasm_i16x8_add(index, byte_index); + r = wasm_v8x16_swizzle(a0, index); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a1, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a2, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a3, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[idx_.i16[i] & 0x1F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutexvar_epi16 + #define _mm512_permutexvar_epi16(idx, a) simde_mm512_permutexvar_epi16(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutexvar_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_permutexvar_epi16(src, k, idx, a); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutexvar_epi16 + #define _mm512_mask_permutexvar_epi16(src, k, idx, a) simde_mm512_mask_permutexvar_epi16(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutexvar_epi16 (simde__mmask32 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_permutexvar_epi16(k, idx, a); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutexvar_epi16 + #define _mm512_maskz_permutexvar_epi16(k, idx, a) simde_mm512_maskz_permutexvar_epi16(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_permutexvar_epi32 (simde__m512i idx, simde__m512i a) { #if defined(SIMDE_X86_AVX512F_NATIVE) @@ -46,12 +763,100 @@ a_ = simde__m512i_to_private(a), r_; - #if !defined(__INTEL_COMPILER) + #if defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i index, r0, r1, select; SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + r0 = simde_mm256_permutevar8x32_epi32(a_.m256i[0], index); + r1 = simde_mm256_permutevar8x32_epi32(a_.m256i[1], index); + select = simde_mm256_slli_epi32(index, 28); + r_.m256i[i] = simde_mm256_castps_si256(simde_mm256_blendv_ps(simde_mm256_castsi256_ps(r0), + simde_mm256_castsi256_ps(r1), + simde_mm256_castsi256_ps(select))); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8, + a_.m128i_private[2].neon_u8, + a_.m128i_private[3].neon_u8 } }; + uint32x4_t mask32 = vdupq_n_u32(0x0000000F); + uint32x4_t byte_index32 = vdupq_n_u32(0x03020100); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + uint32x4_t index32 = vandq_u32(idx_.m128i_private[i].neon_u32, mask32); + index32 = vmulq_n_u32(index32, 0x04040404); + index32 = vaddq_u32(index32, byte_index32); + r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vreinterpretq_u8_u32(index32)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) index32, mask32, byte_index32, temp32, sixteen; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) zero, shift; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) index, test, r01, r23; + mask32 = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x0000000F)); + byte_index32 = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100)); + zero = vec_splat_u16(0); + shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)); + sixteen = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16)); + test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index32 = vec_and(idx_.m128i_private[i].altivec_u32, mask32); + + /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ + temp32 = vec_sl(index32, sixteen); + index32 = vec_add(index32, temp32); + index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), + shift, + zero)); + + index32 = vec_add(index32, byte_index32); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); + r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, index); + r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, index); + r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(index, test), test)); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, r, t; + const v128_t mask = wasm_i32x4_splat(0x0000000F); + const v128_t shift = wasm_i32x4_splat(0x04040404); + const v128_t byte_index = wasm_i32x4_splat(0x03020100); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + const v128_t a2 = a_.m128i_private[2].wasm_v128; + const v128_t a3 = a_.m128i_private[3].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); + index = wasm_i32x4_mul(index, shift); + index = wasm_i32x4_add(index, byte_index); + r = wasm_v8x16_swizzle(a0, index); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a1, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a2, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a3, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 0x0F]; + } #endif - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 0x0F]; - } return simde__m512i_from_private(r_); #endif @@ -144,24 +949,154 @@ #endif SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_permutexvar_pd (simde__m512i idx, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_pd(idx, a); +simde__m512i +simde_mm512_permutexvar_epi8 (simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_permutexvar_epi8(idx, a); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + simde__m512i hilo, hi, lo, hi2, lo2, idx2; + simde__m512i ones = simde_mm512_set1_epi8(1); + simde__m512i low_bytes = simde_mm512_set1_epi16(0x00FF); + + idx2 = simde_mm512_srli_epi16(idx, 1); + hilo = simde_mm512_permutexvar_epi16(idx2, a); + simde__mmask64 mask = simde_mm512_test_epi8_mask(idx, ones); + lo = simde_mm512_and_si512(hilo, low_bytes); + hi = simde_mm512_srli_epi16(hilo, 8); + + idx2 = simde_mm512_srli_epi16(idx, 9); + hilo = simde_mm512_permutexvar_epi16(idx2, a); + lo2 = simde_mm512_slli_epi16(hilo, 8); + hi2 = simde_mm512_andnot_si512(low_bytes, hilo); + + lo = simde_mm512_or_si512(lo, lo2); + hi = simde_mm512_or_si512(hi, hi2); + + return simde_mm512_mask_blend_epi8(mask, lo, hi); #else - simde__m512i_private idx_ = simde__m512i_to_private(idx); - simde__m512d_private - a_ = simde__m512d_to_private(a), + simde__m512i_private + idx_ = simde__m512i_to_private(idx), + a_ = simde__m512i_to_private(a), r_; - #if !defined(__INTEL_COMPILER) + #if defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i t0, t1, index, select, a01, a23; + simde__m256i mask = simde_mm256_set1_epi8(0x3F); + simde__m256i a0 = simde_mm256_broadcastsi128_si256(a_.m128i[0]); + simde__m256i a1 = simde_mm256_broadcastsi128_si256(a_.m128i[1]); + simde__m256i a2 = simde_mm256_broadcastsi128_si256(a_.m128i[2]); + simde__m256i a3 = simde_mm256_broadcastsi128_si256(a_.m128i[3]); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + index = simde_mm256_and_si256(index, mask); + select = simde_mm256_slli_epi64(index, 3); + t0 = simde_mm256_shuffle_epi8(a0, index); + t1 = simde_mm256_shuffle_epi8(a1, index); + a01 = simde_mm256_blendv_epi8(t0, t1, select); + t0 = simde_mm256_shuffle_epi8(a2, index); + t1 = simde_mm256_shuffle_epi8(a3, index); + a23 = simde_mm256_blendv_epi8(t0, t1, select); + select = simde_mm256_slli_epi64(index, 2); + r_.m256i[i] = simde_mm256_blendv_epi8(a01, a23, select); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8, + a_.m128i_private[2].neon_u8, + a_.m128i_private[3].neon_u8 } }; + uint8x16_t mask = vdupq_n_u8(0x3F); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vandq_u8(idx_.m128i_private[i].neon_u8, mask)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) test, r01, r23; + test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, idx_.m128i_private[i].altivec_u8); + r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, idx_.m128i_private[i].altivec_u8); + r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(idx_.m128i_private[i].altivec_u8, test), test)); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, r, t; + const v128_t mask = wasm_i8x16_splat(0x3F); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + const v128_t a2 = a_.m128i_private[2].wasm_v128; + const v128_t a3 = a_.m128i_private[3].wasm_v128; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); + r = wasm_v8x16_swizzle(a0, index); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a1, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a2, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_v8x16_swizzle(a3, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[idx_.i8[i] & 0x3F]; + } #endif - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[idx_.i64[i] & 7]; - } - return simde__m512d_from_private(r_); + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutexvar_epi8 + #define _mm512_permutexvar_epi8(idx, a) simde_mm512_permutexvar_epi8(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutexvar_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask_permutexvar_epi8(src, k, idx, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutexvar_epi8 + #define _mm512_mask_permutexvar_epi8(src, k, idx, a) simde_mm512_mask_permutexvar_epi8(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutexvar_epi8 (simde__mmask64 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_maskz_permutexvar_epi8(k, idx, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutexvar_epi8 + #define _mm512_maskz_permutexvar_epi8(k, idx, a) simde_mm512_maskz_permutexvar_epi8(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_permutexvar_pd (simde__m512i idx, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutexvar_pd(idx, a); + #else + return simde_mm512_castsi512_pd(simde_mm512_permutexvar_epi64(idx, simde_mm512_castpd_si512(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) @@ -203,19 +1138,7 @@ #if defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_permutexvar_ps(idx, a); #else - simde__m512i_private idx_ = simde__m512i_to_private(idx); - simde__m512_private - a_ = simde__m512_to_private(a), - r_; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[idx_.i32[i] & 0x0F]; - } - - return simde__m512_from_private(r_); + return simde_mm512_castsi512_ps(simde_mm512_permutexvar_epi32(idx, simde_mm512_castps_si512(a))); #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/shuffle.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/shuffle.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/shuffle.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/shuffle.h 2021-04-17 01:19:49.000000000 +0000 @@ -94,6 +94,82 @@ #define _mm512_maskz_shuffle_epi8(k, a, b) simde_mm512_maskz_shuffle_epi8(k, a, b) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_i32x4 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i[0] = a_.m128i[ imm8 & 1]; + r_.m128i[1] = b_.m128i[(imm8 >> 1) & 1]; + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_shuffle_i32x4(a, b, imm8) _mm256_shuffle_i32x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_i32x4 + #define _mm256_shuffle_i32x4(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) +#endif + +#define simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_shuffle_i32x4(a, b, imm8)) +#define simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_shuffle_i32x4(a, b, imm8)) + +#define simde_mm256_shuffle_f32x4(a, b, imm8) simde_mm256_castsi256_ps(simde_mm256_shuffle_i32x4(simde_mm256_castps_si256(a), simde_mm256_castps_si256(b), imm8)) +#define simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_shuffle_f32x4(a, b, imm8)) +#define simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_shuffle_f32x4(a, b, imm8)) + +#define simde_mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) +#define simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_shuffle_i64x2(a, b, imm8)) +#define simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_shuffle_i64x2(a, b, imm8)) + +#define simde_mm256_shuffle_f64x2(a, b, imm8) simde_mm256_castsi256_pd(simde_mm256_shuffle_i64x2(simde_mm256_castpd_si256(a), simde_mm256_castpd_si256(b), imm8)) +#define simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_shuffle_f64x2(a, b, imm8)) +#define simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_shuffle_f64x2(a, b, imm8)) + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_shuffle_i32x4 (simde__m512i a, simde__m512i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + r_.m128i[0] = a_.m128i[ imm8 & 3]; + r_.m128i[1] = a_.m128i[(imm8 >> 2) & 3]; + r_.m128i[2] = b_.m128i[(imm8 >> 4) & 3]; + r_.m128i[3] = b_.m128i[(imm8 >> 6) & 3]; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_shuffle_i32x4(a, b, imm8) _mm512_shuffle_i32x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_i32x4 + #define _mm512_shuffle_i32x4(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) +#endif + +#define simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_shuffle_i32x4(a, b, imm8)) +#define simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_shuffle_i32x4(a, b, imm8)) + +#define simde_mm512_shuffle_f32x4(a, b, imm8) simde_mm512_castsi512_ps(simde_mm512_shuffle_i32x4(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b), imm8)) +#define simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_shuffle_f32x4(a, b, imm8)) +#define simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_shuffle_f32x4(a, b, imm8)) + +#define simde_mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) +#define simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_shuffle_i64x2(a, b, imm8)) +#define simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_shuffle_i64x2(a, b, imm8)) + +#define simde_mm512_shuffle_f64x2(a, b, imm8) simde_mm512_castsi512_pd(simde_mm512_shuffle_i64x2(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b), imm8)) +#define simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_shuffle_f64x2(a, b, imm8)) +#define simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_shuffle_f64x2(a, b, imm8)) + SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/test.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/test.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512/test.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512/test.h 2021-04-17 01:19:49.000000000 +0000 @@ -23,23 +23,47 @@ * Copyright: * 2020 Evan Nemerson * 2020 Hidayat Khan + * 2020 Christopher Moore */ #if !defined(SIMDE_X86_AVX512_TEST_H) #define SIMDE_X86_AVX512_TEST_H #include "types.h" -#include "mov.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_test_epi16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_test_epi16_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask32 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, !!(a_.i16[i] & b_.i16[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi16_mask + #define _mm512_test_epi16_mask(a, b) simde_mm512_test_epi16_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES simde__mmask16 -simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { +simde_mm512_test_epi32_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_test_epi32_mask(k1, a, b); + return _mm512_test_epi32_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), @@ -51,19 +75,19 @@ r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); } - return r & k1; + return r; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi32_mask - #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b) + #undef _mm512_test_epi32_mask +#define _mm512_test_epi32_mask(a, b) simde_mm512_test_epi32_mask(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__mmask8 -simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { +simde_mm512_test_epi64_mask (simde__m512i a, simde__m512i b) { #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_test_epi64_mask(k1, a, b); + return _mm512_test_epi64_mask(a, b); #else simde__m512i_private a_ = simde__m512i_to_private(a), @@ -72,10 +96,76 @@ SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= !!(a_.i64[i] & b_.i64[i]) << i; + r |= HEDLEY_STATIC_CAST(simde__mmask8, !!(a_.i64[i] & b_.i64[i]) << i); } - return r & k1; + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi64_mask + #define _mm512_test_epi64_mask(a, b) simde_mm512_test_epi64_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_test_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_test_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(uint64_t, !!(a_.i8[i] & b_.i8[i])) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi8_mask + #define _mm512_test_epi8_mask(a, b) simde_mm512_test_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_test_epi16_mask (simde__mmask32 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_test_epi16_mask(k1, a, b); + #else + return simde_mm512_test_epi16_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi16_mask + #define _mm512_mask_test_epi16_mask(k1, a, b) simde_mm512_mask_test_epi16_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_test_epi32_mask(k1, a, b); + #else + return simde_mm512_test_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi32_mask + #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_test_epi64_mask(k1, a, b); + #else + return simde_mm512_test_epi64_mask(a, b) & k1; #endif } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) @@ -83,6 +173,20 @@ #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_test_epi8_mask (simde__mmask64 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_test_epi8_mask(k1, a, b); + #else + return simde_mm512_test_epi8_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi8_mask + #define _mm512_mask_test_epi8_mask(k1, a, b) simde_mm512_mask_test_epi8_mask(k1, a, b) +#endif + SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512bw.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512bw.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512bw.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512bw.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,2484 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Christopher Moore - * 2020 Hidayat Khan - */ - -#if !defined(SIMDE_X86_AVX512BW_H) -#define SIMDE_X86_AVX512BW_H - -#include "avx512vl.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi8(simde__m512i src, simde__mmask64 k, int8_t a) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_set1_epi8(src, k, a); -#else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_set1_epi8(a)); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_set1_epi8(src, k, a) simde_mm512_mask_set1_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi8(simde__mmask64 k, int8_t a) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_set1_epi8(k, a); -#else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_set1_epi8(a)); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_set1_epi8(k, a) simde_mm512_maskz_set1_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi16(simde__m512i src, simde__mmask32 k, int16_t a) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_set1_epi16(src, k, a); -#else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_set1_epi16(a)); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_set1_epi16(src, k, a) simde_mm512_mask_set1_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi16(simde__mmask32 k, int16_t a) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_set1_epi16(k, a); -#else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_set1_epi16(a)); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_set1_epi16(k, a) simde_mm512_maskz_set1_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi8 (simde__m512i a) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_abs_epi8(a); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi8 - #define _mm512_abs_epi8(a) simde_mm512_abs_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_abs_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi8 - #define _mm512_mask_abs_epi8(src, k, a) simde_mm512_mask_abs_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi8 (simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_abs_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_abs_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi8 - #define _mm512_maskz_abs_epi8(k, a) simde_mm512_maskz_abs_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi16 (simde__m512i a) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_abs_epi16(a); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi16 - #define _mm512_abs_epi16(a) simde_mm512_abs_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_abs_epi16(src, k, a); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_abs_epi16 - #define _mm512_mask_abs_epi16(src, k, a) simde_mm512_mask_abs_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi16 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_abs_epi16(k, a); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_abs_epi16(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_abs_epi16 - #define _mm512_maskz_abs_epi16(k, a) simde_mm512_maskz_abs_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_add_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi8 - #define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_add_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi8 - #define _mm512_mask_add_epi8(src, k, a, b) simde_mm512_mask_add_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_add_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_add_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi8 - #define _mm512_maskz_add_epi8(k, a, b) simde_mm512_maskz_add_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_add_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi16 - #define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_add_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi16 - #define _mm512_mask_add_epi16(src, k, a, b) simde_mm512_mask_add_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_add_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_add_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi16 - #define _mm512_maskz_add_epi16(k, a, b) simde_mm512_maskz_add_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_adds_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_adds_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_adds_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int16_t tmp = - HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) + - HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN) : INT8_MAX)); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_adds_epi8 - #define _mm512_adds_epi8(a, b) simde_mm512_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_adds_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_adds_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_adds_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_adds_epi8 - #define _mm512_mask_adds_epi8(src, k, a, b) simde_mm512_mask_adds_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_adds_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_adds_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_adds_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_adds_epi8 - #define _mm512_maskz_adds_epi8(k, a, b) simde_mm512_maskz_adds_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_adds_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_adds_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_adds_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int32_t tmp = - HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) + - HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((tmp < INT16_MAX) ? ((tmp > INT16_MIN) ? tmp : INT16_MIN) : INT16_MAX)); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_adds_epi16 - #define _mm512_adds_epi16(a, b) simde_mm512_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_adds_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_adds_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_adds_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_adds_epi16 - #define _mm512_mask_adds_epi16(src, k, a, b) simde_mm512_mask_adds_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_adds_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_adds_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_adds_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_adds_epi16 - #define _mm512_maskz_adds_epi16(k, a, b) simde_mm512_maskz_adds_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_adds_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_adds_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_adds_epu8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i]) ? (a_.u8[i] + b_.u8[i]) : UINT8_MAX; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_adds_epu8 - #define _mm512_adds_epu8(a, b) simde_mm512_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_adds_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_adds_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_adds_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_adds_epu8 - #define _mm512_mask_adds_epu8(src, k, a, b) simde_mm512_mask_adds_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_adds_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_adds_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_adds_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_adds_epu8 - #define _mm512_maskz_adds_epu8(k, a, b) simde_mm512_maskz_adds_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_adds_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_adds_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_adds_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i]) ? (a_.u16[i] + b_.u16[i]) : UINT16_MAX; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_adds_epu16 - #define _mm512_adds_epu16(a, b) simde_mm512_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_adds_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_adds_epu16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_adds_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_adds_epu16 - #define _mm512_mask_adds_epu16(src, k, a, b) simde_mm512_mask_adds_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_adds_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_adds_epu16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_adds_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_adds_epu16 - #define _mm512_maskz_adds_epu16(k, a, b) simde_mm512_maskz_adds_epu16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_avg_epu8 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_avg_epu8(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_avg_epu8 - #define _mm512_avg_epu8(a, b) simde_mm512_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_avg_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_avg_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_avg_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_avg_epu8 - #define _mm512_mask_avg_epu8(src, k, a, b) simde_mm512_mask_avg_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_avg_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_avg_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_avg_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_avg_epu8 - #define _mm512_maskz_avg_epu8(k, a, b) simde_mm512_maskz_avg_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_avg_epu16 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_avg_epu16(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_avg_epu16 - #define _mm512_avg_epu16(a, b) simde_mm512_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastb_epi8 (simde__m128i a) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_broadcastb_epi8(a); -#else - simde__m128i_private a_= simde__m128i_to_private(a); - return simde_mm512_set1_epi8(a_.i8[0]); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastb_epi8 - #define _mm512_broadcastb_epi8(a) simde_mm512_broadcastb_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastb_epi8 (simde__m512i src, simde__mmask64 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_broadcastb_epi8(src, k, a); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_broadcastb_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_broadcastb_epi8 - #define _mm512_mask_broadcastb_epi8(src, k, a) simde_mm512_mask_broadcastb_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastb_epi8 (simde__mmask64 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_broadcastb_epi8(k, a); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_broadcastb_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_broadcastb_epi8 - #define _mm512_maskz_broadcastb_epi8(k, a) simde_mm512_maskz_broadcastb_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastw_epi16 (simde__m128i a) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_broadcastw_epi16(a); -#else - simde__m128i_private a_= simde__m128i_to_private(a); - return simde_mm512_set1_epi16(a_.i16[0]); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastw_epi16 - #define _mm512_broadcastw_epi16(a) simde_mm512_broadcastw_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_shuffle_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_shuffle_epi8(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_shuffle_epi8(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); - r_.m128i[2] = simde_mm_shuffle_epi8(a_.m128i[2], b_.m128i[2]); - r_.m128i[3] = simde_mm_shuffle_epi8(a_.m128i[3], b_.m128i[3]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (b_.u8[i] & 0x80) ? 0 : a_.u8[(b_.u8[i] & 0x0f) + (i & 0x30)]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_shuffle_epi8 - #define _mm512_shuffle_epi8(a, b) simde_mm512_shuffle_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_shuffle_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_shuffle_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_shuffle_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_shuffle_epi8 - #define _mm512_mask_shuffle_epi8(src, k, a, b) simde_mm512_mask_shuffle_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_shuffle_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_shuffle_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_shuffle_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_shuffle_epi8 - #define _mm512_maskz_shuffle_epi8(k, a, b) simde_mm512_maskz_shuffle_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpeq_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r; - - #if defined(SIMDE_X86_AVX2_NATIVE) && 0 - /* The second cast is absolutely necessary otherwise if the sign bit is set it will be sign extended to 64 bits */ - r = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[1], b_.m256i[1]))); - r = (r << 32) | HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[0], b_.m256i[0]))); - #elif defined(SIMDE_X86_SSE2_NATIVE) && 0 - r = HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[3], b_.m128i[3]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[2], b_.m128i[2]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_movemask_epi8(simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 == b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] == b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi8_mask - #define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpge_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 >= b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] >= b_.i8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi8_mask - #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpge_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpge_epu8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.u8 >= b_.u8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] >= b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu8_mask - #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpgt_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpgt_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r; - - #if defined(SIMDE_X86_AVX2_NATIVE) - /* The second cast is absolutely necessary otherwise if the sign bit is set it will be sign extended to 64 bits */ - r = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpgt_epi8(a_.m256i[1], b_.m256i[1]))); - r = (r << 32) | HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpgt_epi8(a_.m256i[0], b_.m256i[0]))); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r = HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_movemask_epi8(simde_mm_cmpgt_epi8(a_.m128i[3], b_.m128i[3]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_movemask_epi8(simde_mm_cmpgt_epi8(a_.m128i[2], b_.m128i[2]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_movemask_epi8(simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_movemask_epi8(simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]))); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 > b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] > b_.i8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi8_mask - #define _mm512_cmpgt_epi8_mask(a, b) simde_mm512_cmpgt_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmpgt_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmpgt_epu8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.u8 > b_.u8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] > b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epu8_mask - #define _mm512_cmpgt_epu8_mask(a, b) simde_mm512_cmpgt_epu8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmple_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmple_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 <= b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] <= b_.i8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi8_mask - #define _mm512_cmple_epi8_mask(a, b) simde_mm512_cmple_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmple_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmple_epu8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.u8 <= b_.u8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] <= b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu8_mask - #define _mm512_cmple_epu8_mask(a, b) simde_mm512_cmple_epu8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmplt_epi8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmplt_epi8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.i8 < b_.i8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < b_.i8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_epi8_mask - #define _mm512_cmplt_epi8_mask(a, b) simde_mm512_cmplt_epi8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_cmplt_epu8_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cmplt_epu8_mask(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask64 r = 0; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m512i_private tmp; - - tmp.i8 = HEDLEY_STATIC_CAST(__typeof__(tmp.i8), a_.u8 < b_.u8); - r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); - #else - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { - r |= (a_.u8[i] < b_.u8[i]) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_epu8_mask - #define _mm512_cmplt_epu8_mask(a, b) simde_mm512_cmplt_epu8_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtepi16_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cvtepi16_epi8(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i8, a_.i16); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi16_epi8 - #define _mm512_cvtepi16_epi8(a) simde_mm512_cvtepi16_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtepi16_epi8 (simde__m256i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cvtepi16_epi8(src, k, a); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm512_cvtepi16_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtepi16_epi8 - #define _mm512_mask_cvtepi16_epi8(src, k, a) simde_mm512_mask_cvtepi16_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtepi16_epi8 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_cvtepi16_epi8(k, a); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm512_cvtepi16_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtepi16_epi8 - #define _mm512_maskz_cvtepi16_epi8(k, a) simde_mm512_maskz_cvtepi16_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_cvtepi8_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cvtepi8_epi16(a); - #else - simde__m512i_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi8_epi16 - #define _mm512_cvtepi8_epi16(a) simde_mm512_cvtepi8_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtsepi16_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_cvtsepi16_epi8(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = - (a_.i16[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i16[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi16_epi8 - #define _mm512_cvtsepi16_epi8(a) simde_mm512_cvtsepi16_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtsepi16_epi8 (simde__m256i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_cvtsepi16_epi8(src, k, a); - #else - return simde_mm256_mask_mov_epi8(src, k, simde_mm512_cvtsepi16_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi16_epi8 - #define _mm512_mask_cvtsepi16_epi8(src, k, a) simde_mm512_mask_cvtsepi16_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtsepi16_epi8 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_cvtsepi16_epi8(k, a); - #else - return simde_mm256_maskz_mov_epi8(k, simde_mm512_cvtsepi16_epi8(a)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtsepi16_epi8 - #define _mm512_maskz_cvtsepi16_epi8(k, a) simde_mm512_maskz_cvtsepi16_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epi8 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_min_epi8(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? a_.i8[i] : b_.i8[i]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -# define _mm512_min_epi8(a, b) simde_mm512_min_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_min_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_min_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epi8 - #define _mm512_mask_min_epi8(src, k, a, b) simde_mm512_mask_min_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_min_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_min_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epi8 - #define _mm512_maskz_min_epi8(k, a, b) simde_mm512_maskz_min_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epu8 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_min_epu8(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_min_epu8(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_min_epu8(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epu8 - #define _mm512_min_epu8(a, b) simde_mm512_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_min_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_min_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epu8 - #define _mm512_mask_min_epu8(src, k, a, b) simde_mm512_mask_min_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_min_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_min_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epu8 - #define _mm512_maskz_min_epu8(k, a, b) simde_mm512_maskz_min_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epi8 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_max_epi8(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? a_.i8[i] : b_.i8[i]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -# define _mm512_max_epi8(a, b) simde_mm512_max_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_max_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_max_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epi8 - #define _mm512_mask_max_epi8(src, k, a, b) simde_mm512_mask_max_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_max_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_max_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epi8 - #define _mm512_maskz_max_epi8(k, a, b) simde_mm512_maskz_max_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epu8 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_max_epu8(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_max_epu8(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_max_epu8(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epu8 - #define _mm512_max_epu8(a, b) simde_mm512_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_max_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_max_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epu8 - #define _mm512_mask_max_epu8(src, k, a, b) simde_mm512_mask_max_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_max_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_max_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epu8 - #define _mm512_maskz_max_epu8(k, a, b) simde_mm512_maskz_max_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epi16 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_min_epi16(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -# define _mm512_min_epi16(a, b) simde_mm512_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_min_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_min_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epi16 - #define _mm512_mask_min_epi16(src, k, a, b) simde_mm512_mask_min_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_min_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_min_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epi16 - #define _mm512_maskz_min_epi16(k, a, b) simde_mm512_maskz_min_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epu16 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_min_epu16(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_min_epu16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_min_epu16(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epu16 - #define _mm512_min_epu16(a, b) simde_mm512_min_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_min_epu16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_min_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epu16 - #define _mm512_mask_min_epu16(src, k, a, b) simde_mm512_mask_min_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_min_epu16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_min_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epu16 - #define _mm512_maskz_min_epu16(k, a, b) simde_mm512_maskz_min_epu16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epi16 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_max_epi16(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) -# define _mm512_max_epi16(a, b) simde_mm512_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_max_epi16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_max_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epi16 - #define _mm512_mask_max_epi16(src, k, a, b) simde_mm512_mask_max_epi16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_max_epi16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_max_epi16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epi16 - #define _mm512_maskz_max_epi16(k, a, b) simde_mm512_maskz_max_epi16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epu16 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_max_epu16(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_max_epu16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_max_epu16(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epu16 - #define _mm512_max_epu16(a, b) simde_mm512_max_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_max_epu16(src, k, a, b); - #else - return simde_mm512_mask_mov_epi16(src, k, simde_mm512_max_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epu16 - #define _mm512_mask_max_epu16(src, k, a, b) simde_mm512_mask_max_epu16(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_max_epu16(k, a, b); - #else - return simde_mm512_maskz_mov_epi16(k, simde_mm512_max_epu16(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epu16 - #define _mm512_maskz_max_epu16(k, a, b) simde_mm512_maskz_max_epu16(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sub_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi8(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi8 - #define _mm512_sub_epi8(a, b) simde_mm512_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_sub_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_sub_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi8 - #define _mm512_mask_sub_epi8(src, k, a, b) simde_mm512_mask_sub_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_sub_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_sub_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi8 - #define _mm512_maskz_sub_epi8(k, a, b) simde_mm512_maskz_sub_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mulhi_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mulhi_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mulhi_epi16 - #define _mm512_mulhi_epi16(a, b) simde_mm512_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mulhrs_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mulhrs_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mulhrs_epi16 - #define _mm512_mulhrs_epi16(a, b) simde_mm512_mulhrs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mullo_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mullo_epi16(a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mullo_epi16 - #define _mm512_mullo_epi16(a, b) simde_mm512_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_packs_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_packs_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_packs_epi16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_packs_epi16(a_.m256i[1], b_.m256i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - const size_t octet_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 8; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < octet_point ; i++) { - r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - r_.i8[i + octet_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); - r_.i8[quarter_point + i] = (a_.i16[octet_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[octet_point + i])); - r_.i8[quarter_point + i + octet_point] = (b_.i16[octet_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[octet_point + i])); - r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); - r_.i8[halfway_point + i + octet_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); - r_.i8[halfway_point + quarter_point + i] = (a_.i16[quarter_point + octet_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + octet_point + i])); - r_.i8[halfway_point + quarter_point + i + octet_point] = (b_.i16[quarter_point + octet_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + octet_point + i])); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_packs_epi16 - #define _mm512_packs_epi16(a, b) simde_mm512_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_packs_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_packs_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_packs_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_packs_epi32(a_.m256i[1], b_.m256i[1]); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - const size_t octet_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 8; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < octet_point ; i++) { - r_.i16[i] = (a_.i32[i] > INT16_MAX) ? INT16_MAX : ((a_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); - r_.i16[i + octet_point] = (b_.i32[i] > INT16_MAX) ? INT16_MAX : ((b_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[i])); - r_.i16[quarter_point + i] = (a_.i32[octet_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[octet_point + i])); - r_.i16[quarter_point + i + octet_point] = (b_.i32[octet_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[octet_point + i])); - r_.i16[halfway_point + i] = (a_.i32[quarter_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[quarter_point +i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[quarter_point + i])); - r_.i16[halfway_point + i + octet_point] = (b_.i32[quarter_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[quarter_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[quarter_point +i])); - r_.i16[halfway_point + quarter_point + i] = (a_.i32[quarter_point + octet_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[quarter_point + octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[quarter_point + octet_point + i])); - r_.i16[halfway_point + quarter_point + i + octet_point] = (b_.i32[quarter_point + octet_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[quarter_point + octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[quarter_point + octet_point + i])); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_packs_epi32 - #define _mm512_packs_epi32(a, b) simde_mm512_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_packus_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_packus_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256i[0] = simde_mm256_packus_epi16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_packus_epi16(a_.m256i[1], b_.m256i[1]); - #else - const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; - const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; - const size_t octet_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 8; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < octet_point ; i++) { - r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); - r_.u8[i + octet_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); - r_.u8[quarter_point + i] = (a_.i16[octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[octet_point + i])); - r_.u8[quarter_point + i + octet_point] = (b_.i16[octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[octet_point + i])); - r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); - r_.u8[halfway_point + i + octet_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); - r_.u8[halfway_point + quarter_point + i] = (a_.i16[quarter_point + octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + octet_point + i])); - r_.u8[halfway_point + quarter_point + i + octet_point] = (b_.i16[quarter_point + octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + octet_point + i])); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_packus_epi16 - #define _mm512_packus_epi16(a, b) simde_mm512_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_packus_epi32 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_packus_epi32(a, b); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_packus_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_packus_epi32(a_.m256i[1], b_.m256i[1]); - #else - const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; - const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; - const size_t octet_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 8; - SIMDE_VECTORIZE - for (size_t i = 0 ; i < octet_point ; i++) { - r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); - r_.u16[i + octet_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); - r_.u16[quarter_point + i] = (a_.i32[octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[octet_point + i])); - r_.u16[quarter_point + i + octet_point] = (b_.i32[octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[octet_point + i])); - r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point +i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); - r_.u16[halfway_point + i + octet_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point +i])); - r_.u16[halfway_point + quarter_point + i] = (a_.i32[quarter_point + octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + octet_point + i])); - r_.u16[halfway_point + quarter_point + i + octet_point] = (b_.i32[quarter_point + octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + octet_point + i])); - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_packus_epi32 - #define _mm512_packus_epi32(a, b) simde_mm512_packus_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sad_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sad_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sad_epu8(a_.m256i[i], b_.m256i[i]); - } - #else - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+:tmp) - for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 8) ; j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sad_epu8 - #define _mm512_sad_epu8(a, b) simde_mm512_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi16 (simde__m512i a, const int imm8) - SIMDE_REQUIRE_RANGE(imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - if(imm8 < 16) - r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8))); - else - return simde_mm512_setzero_si512(); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = (imm8 < 16) ? HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)) : 0; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi16 - #define _mm512_slli_epi16(a, imm8) simde_mm512_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sra_epi16 (simde__m512i a, simde__m128i count) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sra_epi16(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sra_epi16(a_.m256i[i], count); - } - #else - simde__m128i_private - count_ = simde__m128i_to_private(count); - uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sra_epi16 - #define _mm512_sra_epi16(a, count) simde_mm512_sra_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srai_epi16 (simde__m512i a, const int imm8) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); - - if (shift > 15) shift = 15; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] >> shift; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) -# define simde_mm512_srai_epi16(a, imm8) _mm512_srai_epi16(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srai_epi16 - #define _mm512_srai_epi16(a, imm8) simde_mm512_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sllv_epi16 (simde__m512i a, simde__m512i b) { - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = HEDLEY_STATIC_CAST(__typeof__(r_.u16), (b_.u16 < 16) & (a_.u16 << b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (b_.u16[i] < 16) ? HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << b_.u16[i])) : 0; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_sllv_epi16(a, b) _mm512_sllv_epi16(a, b) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sllv_epi16 - #define _mm512_sllv_epi16(a, b) simde_mm512_sllv_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srav_epi16 (simde__m512i a, simde__m512i count) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_srav_epi16(a, count); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - count_ = simde__m512i_to_private(count); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i16[i]); - if (shift > 16) shift = 15; - r_.i16[i] = a_.i16[i] >> shift; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srav_epi16 - #define _mm512_srav_epi16(a, count) simde_mm512_srav_epi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi16 (simde__m512i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - if (imm8 > 15) - return simde_mm512_setzero_si512(); - - if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(int16_t, imm8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] >> imm8; - } - #endif - } - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_srli_epi16(a, imm8) _mm512_srli_epi16(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi16 - #define _mm512_srli_epi16(a, imm8) simde_mm512_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srlv_epi16 (simde__m512i a, simde__m512i b) { - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = HEDLEY_STATIC_CAST(__typeof__(r_.u16), (b_.u16 < 16) & (a_.u16 >> b_.u16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; - } - #endif - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512BW_NATIVE) - #define simde_mm512_srlv_epi16(a, b) _mm512_srlv_epi16(a, b) -#endif -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_srlv_epi16 - #define _mm512_srlv_epi16(a, b) simde_mm512_srlv_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_sub_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi16(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi16 - #define _mm512_sub_epi16(a, b) simde_mm512_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_subs_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_subs_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_subs_epi8(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - const int16_t tmp = - HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) - - HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN) : INT8_MAX)); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_subs_epi8 - #define _mm512_subs_epi8(a, b) simde_mm512_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_subs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_subs_epi8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_subs_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_subs_epi8 - #define _mm512_mask_subs_epi8(src, k, a, b) simde_mm512_mask_subs_epi8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_subs_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_subs_epi8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_subs_epi8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_subs_epi8 - #define _mm512_maskz_subs_epi8(k, a, b) simde_mm512_maskz_subs_epi8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_subs_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_subs_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_subs_epi16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - const int32_t tmp = - HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) - - HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((tmp < INT16_MAX) ? ((tmp > INT16_MIN) ? tmp : INT16_MIN) : INT16_MAX)); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_subs_epi16 - #define _mm512_subs_epi16(a, b) simde_mm512_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_subs_epu8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_subs_epu8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_mm_subs_epu8(a_.m128i[i], b_.m128i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? (a_.u8[i] - b_.u8[i]) : UINT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_subs_epu8 - #define _mm512_subs_epu8(a, b) simde_mm512_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_subs_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_subs_epu8(src, k, a, b); - #else - return simde_mm512_mask_mov_epi8(src, k, simde_mm512_subs_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_subs_epu8 - #define _mm512_mask_subs_epu8(src, k, a, b) simde_mm512_mask_subs_epu8(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_subs_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_subs_epu8(k, a, b); - #else - return simde_mm512_maskz_mov_epi8(k, simde_mm512_subs_epu8(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_subs_epu8 - #define _mm512_maskz_subs_epu8(k, a, b) simde_mm512_maskz_subs_epu8(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_subs_epu16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_subs_epu16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if !defined(HEDLEY_INTEL_VERSION) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_subs_epu16(a_.m256i[i], b_.m256i[i]); - } - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? (a_.u16[i] - b_.u16[i]) : UINT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_subs_epu16 - #define _mm512_subs_epu16(a, b) simde_mm512_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpacklo_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_unpacklo_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 64, a_.i8, b_.i8, - 0, 64, 1, 65, 2, 66, 3, 67, - 4, 68, 5, 69, 6, 70, 7, 71, - 16, 80, 17, 81, 18, 82, 19, 83, - 20, 84, 21, 85, 22, 86, 23, 87, - 32, 96, 33, 97, 34, 98, 35, 99, - 36, 100, 37, 101, 38, 102, 39, 103, - 48, 112, 49, 113, 50, 114, 51, 115, - 52, 116, 53, 117, 54, 118, 55, 119); - #else - r_.m256i[0] = simde_mm256_unpacklo_epi8(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpacklo_epi8(a_.m256i[1], b_.m256i[1]); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_epi8 - #define _mm512_unpacklo_epi8(a, b) simde_mm512_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpacklo_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_unpacklo_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 64, a_.i16, b_.i16, - 0, 32, 1, 33, 2, 34, 3, 35, 8, 40, 9, 41, 10, 42, 11, 43, - 16, 48, 17, 49, 18, 50, 19, 51, 24, 56, 25, 57, 26, 58, 27, 59); - #else - r_.m256i[0] = simde_mm256_unpacklo_epi16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpacklo_epi16(a_.m256i[1], b_.m256i[1]); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_epi16 - #define _mm512_unpacklo_epi16(a, b) simde_mm512_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpackhi_epi8 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_unpackhi_epi8(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 64, a_.i8, b_.i8, - 8, 72, 9, 73, 10, 74, 11, 75, - 12, 76, 13, 77, 14, 78, 15, 79, - 24, 88, 25, 89, 26, 90, 27, 91, - 28, 92, 29, 93, 30, 94, 31, 95, - 40, 104, 41, 105, 42, 106, 43, 107, - 44, 108, 45, 109, 46, 110, 47, 111, - 56, 120, 57, 121, 58, 122, 59, 123, - 60, 124, 61, 125, 62, 126, 63, 127); - #else - r_.m256i[0] = simde_mm256_unpackhi_epi8(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpackhi_epi8(a_.m256i[1], b_.m256i[1]); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_epi8 - #define _mm512_unpackhi_epi8(a, b) simde_mm512_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpackhi_epi16 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_unpackhi_epi16(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 64, a_.i16, b_.i16, - 4, 36, 5, 37, 6, 38, 7, 39, 12, 44, 13, 45, 14, 46, 15, 47, - 20, 52, 21, 53, 22, 54, 23, 55, 28, 60, 29, 61, 30, 62, 31, 63); - #else - r_.m256i[0] = simde_mm256_unpackhi_epi16(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpackhi_epi16(a_.m256i[1], b_.m256i[1]); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_epi16 - #define _mm512_unpackhi_epi16(a, b) simde_mm512_unpackhi_epi16(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512BW_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512cd.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512cd.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512cd.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512cd.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,208 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_AVX512CD_H) -#define SIMDE_X86_AVX512CD_H - -#include "avx512vl.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -#if \ - ( HEDLEY_HAS_BUILTIN(__builtin_clz) || \ - HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - HEDLEY_ARM_VERSION_CHECK(4,1,0) ) && \ - defined(__INT_MAX__) && defined(__LONG_MAX__) && defined(__LONG_LONG_MAX__) && \ - defined(__INT32_MAX__) && defined(__INT64_MAX__) - #if __INT_MAX__ == __INT32_MAX__ - #define simde_x_clz32(v) __builtin_clz(HEDLEY_STATIC_CAST(unsigned int, (v))) - #elif __LONG_MAX__ == __INT32_MAX__ - #define simde_x_clz32(v) __builtin_clzl(HEDLEY_STATIC_CAST(unsigned long, (v))) - #elif __LONG_LONG_MAX__ == __INT32_MAX__ - #define simde_x_clz32(v) __builtin_clzll(HEDLEY_STATIC_CAST(unsigned long long, (v))) - #endif - - #if __INT_MAX__ == __INT64_MAX__ - #define simde_x_clz64(v) __builtin_clz(HEDLEY_STATIC_CAST(unsigned int, (v))) - #elif __LONG_MAX__ == __INT64_MAX__ - #define simde_x_clz64(v) __builtin_clzl(HEDLEY_STATIC_CAST(unsigned long, (v))) - #elif __LONG_LONG_MAX__ == __INT64_MAX__ - #define simde_x_clz64(v) __builtin_clzll(HEDLEY_STATIC_CAST(unsigned long long, (v))) - #endif -#elif HEDLEY_MSVC_VERSION_CHECK(14,0,0) - static int simde_x_clz32(uint32_t x) { - unsigned long r; - _BitScanReverse(&r, x); - return 31 - HEDLEY_STATIC_CAST(int, r); - } - #define simde_x_clz32 simde_x_clz32 - - static int simde_x_clz64(uint64_t x) { - unsigned long r; - - #if defined(_M_AMD64) || defined(_M_ARM64) - _BitScanReverse64(&r, x); - return 63 - HEDLEY_STATIC_CAST(int, r); - #else - uint32_t high = HEDLEY_STATIC_CAST(uint32_t, x >> 32); - if (high != 0) - return _BitScanReverse(&r, HEDLEY_STATIC_CAST(unsigned long, high)); - else - return _BitScanReverse(&r, HEDLEY_STATIC_CAST(unsigned long, x & ~UINT32_C(0))) + 32; - #endif - } - #define simde_x_clz64 simde_x_clz64 -#endif - -#if !defined(simde_x_clz32) || !defined(simde_x_clz64) - static uint8_t simde_x_avx512cd_lz_lookup(const uint8_t value) { - static const uint8_t lut[256] = { - 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - return lut[value]; - }; - - #if !defined(simde_x_clz32) - static int simde_x_clz32(uint32_t x) { - size_t s = sizeof(x) * 8; - uint32_t r; - - while ((s -= 8) != 0) { - r = x >> s; - if (r != 0) - return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, r)) + - (((sizeof(x) - 1) * 8) - s); - } - - if (x == 0) - return (int) ((sizeof(x) * 8) - 1); - else - return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, x)) + - ((sizeof(x) - 1) * 8); - } - #endif - - #if !defined(simde_x_clz64) - static int simde_x_clz64(uint64_t x) { - size_t s = sizeof(x) * 8; - uint64_t r; - - while ((s -= 8) != 0) { - r = x >> s; - if (r != 0) - return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, r)) + - (((sizeof(x) - 1) * 8) - s); - } - - if (x == 0) - return (int) ((sizeof(x) * 8) - 1); - else - return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, x)) + - ((sizeof(x) - 1) * 8); - } - #endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_lzcnt_epi32(simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_lzcnt_epi32(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - /* https://stackoverflow.com/a/58827596/501126 */ - a = _mm_andnot_si128(_mm_srli_epi32(a, 8), a); - a = _mm_castps_si128(_mm_cvtepi32_ps(a)); - a = _mm_srli_epi32(a, 23); - a = _mm_subs_epu16(_mm_set1_epi32(158), a); - a = _mm_min_epi16(a, _mm_set1_epi32(32)); - return a; - #else - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (HEDLEY_UNLIKELY(a_.i32[i] == 0) ? HEDLEY_STATIC_CAST(int32_t, sizeof(int32_t) * CHAR_BIT) : HEDLEY_STATIC_CAST(int32_t, simde_x_clz32(HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])))); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) - #undef _mm_lzcnt_epi32 - #define _mm_lzcnt_epi32(a) simde_mm_lzcnt_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_lzcnt_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_mask_lzcnt_epi32(src, k, a); - #else - return simde_mm_mask_mov_epi32(src, k, simde_mm_lzcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_lzcnt_epi32 - #define _mm_mask_lzcnt_epi32(src, k, a) simde_mm_mask_lzcnt_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_lzcnt_epi32(simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) - return _mm_maskz_lzcnt_epi32(k, a); - #else - return simde_mm_maskz_mov_epi32(k, simde_mm_lzcnt_epi32(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_lzcnt_epi32 - #define _mm_maskz_lzcnt_epi32(k, a) simde_mm_maskz_lzcnt_epi32(k, a) -#endif - -SIMDE_END_DECLS_ -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512CD_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512dq.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512dq.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512dq.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512dq.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,435 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - */ - -#if !defined(SIMDE_X86_AVX512DQ_H) -#define SIMDE_X86_AVX512DQ_H - -#include "avx512bw.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_and_pd (simde__m512d a, simde__m512d b) { -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_and_pd(a, b); -#else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - -#if defined(SIMDE_X86_AVX_NATIVE) - r_.m256d[0] = simde_mm256_and_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_and_pd(a_.m256d[1], b_.m256d[1]); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } -#endif - - return simde__m512d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_pd - #define _mm512_and_pd(a, b) simde_mm512_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_and_ps (simde__m512 a, simde__m512 b) { -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_and_ps(a, b); -#else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - -#if defined(SIMDE_X86_AVX_NATIVE) - r_.m256[0] = simde_mm256_and_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_and_ps(a_.m256[1], b_.m256[1]); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } -#endif - - return simde__m512_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_ps - #define _mm512_and_ps(a, b) simde_mm512_and_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_and_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_and_ps(src, k, a, b); -#else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_and_ps(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_and_ps(src, k, a, b) simde_mm512_mask_and_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_and_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_and_ps(k, a, b); -#else - return simde_mm512_maskz_mov_ps(k, simde_mm512_and_ps(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_and_ps(k, a, b) simde_mm512_maskz_and_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_and_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_mask_and_pd(src, k, a, b); -#else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_and_pd(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_and_pd(src, k, a, b) simde_mm512_mask_and_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_and_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_maskz_and_pd(k, a, b); -#else - return simde_mm512_maskz_mov_pd(k, simde_mm512_and_pd(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_and_pd(k, a, b) simde_mm512_maskz_and_pd(k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_andnot_ps(a, b) _mm512_andnot_ps(a, b) -#else - #define simde_mm512_andnot_ps(a, b) simde_mm512_castsi512_ps(simde_mm512_andnot_si512(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_ps - #define _mm512_andnot_ps(a, b) simde_mm512_andnot_ps(a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_andnot_ps(src, k, a, b) _mm512_mask_andnot_ps((src), (k), (a), (b)) -#else - #define simde_mm512_mask_andnot_ps(src, k, a, b) simde_mm512_castsi512_ps(simde_mm512_mask_andnot_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_andnot_ps(src, k, a, b) simde_mm512_mask_andnot_ps(src, k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_andnot_ps(k, a, b) _mm512_maskz_andnot_ps((k), (a), (b)) -#else - #define simde_mm512_maskz_andnot_ps(k, a, b) simde_mm512_castsi512_ps(simde_mm512_maskz_andnot_epi32(k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_andnot_ps(k, a, b) simde_mm512_maskz_andnot_ps(k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_andnot_pd(a, b) _mm512_andnot_pd(a, b) -#else - #define simde_mm512_andnot_pd(a, b) simde_mm512_castsi512_pd(simde_mm512_andnot_si512(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_pd - #define _mm512_andnot_pd(a, b) simde_mm512_andnot_pd(a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_mask_andnot_pd(src, k, a, b) _mm512_mask_andnot_pd((src), (k), (a), (b)) -#else - #define simde_mm512_mask_andnot_pd(src, k, a, b) simde_mm512_castsi512_pd(simde_mm512_mask_andnot_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_andnot_pd(src, k, a, b) simde_mm512_mask_andnot_pd(src, k, a, b) -#endif - -#if defined(SIMDE_X86_AVX512DQ_NATIVE) - #define simde_mm512_maskz_andnot_pd(k, a, b) _mm512_maskz_andnot_pd((k), (a), (b)) -#else - #define simde_mm512_maskz_andnot_pd(k, a, b) simde_mm512_castsi512_pd(simde_mm512_maskz_andnot_epi64(k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) -#endif -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_andnot_pd(k, a, b) simde_mm512_maskz_andnot_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_or_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_or_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256[0] = simde_mm256_or_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_or_ps(a_.m256[1], b_.m256[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_ps - #define _mm512_or_ps(a, b) simde_mm512_or_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_or_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_or_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_or_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_or_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_pd - #define _mm512_or_pd(a, b) simde_mm512_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_xor_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_xor_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256[0] = simde_mm256_xor_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_xor_ps(a_.m256[1], b_.m256[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_ps - #define _mm512_xor_ps(a, b) simde_mm512_xor_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_xor_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_xor_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - r_.m256d[0] = simde_mm256_xor_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_xor_pd(a_.m256d[1], b_.m256d[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_pd - #define _mm512_xor_pd(a, b) simde_mm512_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_copysign_ps(simde__m512 dest, simde__m512 src) { - simde__m512_private - r_, - dest_ = simde__m512_to_private(dest), - src_ = simde__m512_to_private(src); - - #if defined(simde_math_copysignf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } - #else - simde__m512 sgnbit = simde_mm512_xor_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm512_set1_ps(-SIMDE_FLOAT32_C(0.0))); - return simde_mm512_xor_ps(simde_mm512_and_ps(sgnbit, src), simde_mm512_andnot_ps(sgnbit, dest)); - #endif - - return simde__m512_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_copysign_pd(simde__m512d dest, simde__m512d src) { - simde__m512d_private - r_, - dest_ = simde__m512d_to_private(dest), - src_ = simde__m512d_to_private(src); - - #if defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } - #else - simde__m512d sgnbit = simde_mm512_xor_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm512_set1_pd(-SIMDE_FLOAT64_C(0.0))); - return simde_mm512_xor_pd(simde_mm512_and_pd(sgnbit, src), simde_mm512_andnot_pd(sgnbit, dest)); - #endif - - return simde__m512d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_negate_ps(simde__m512 a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return simde_mm512_xor_ps(a,_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0))); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = -a_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_negate_pd(simde__m512d a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return simde_mm512_xor_pd(a, _mm512_set1_pd(SIMDE_FLOAT64_C(-0.0))); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = -a_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_xorsign_ps(simde__m512 dest, simde__m512 src) { - return simde_mm512_xor_ps(simde_mm512_and_ps(simde_mm512_set1_ps(-0.0f), src), dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_xorsign_pd(simde__m512d dest, simde__m512d src) { - return simde_mm512_xor_pd(simde_mm512_and_pd(simde_mm512_set1_pd(-0.0), src), dest); -} - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512DQ_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512f.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512f.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512f.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512f.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,9163 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Hidayat Khan - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512F_H) -#define SIMDE_X86_AVX512F_H - -#include "avx2.h" -#include "fma.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for - __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte - aligned even if we reduce the alignment requirements of other members. - - Even if we're on x86 and use the native AVX-512 types for arguments/return values, the - to/from private functions will break, and I'm not willing to change their APIs to use - pointers (which would also require more verbose code on the caller side) just to make - MSVC happy. - - If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, - or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to - fix this without requiring API changes (except transparently through macros), patches - are welcome. */ - -# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) -# if defined(SIMDE_X86_AVX512F_NATIVE) -# undef SIMDE_X86_AVX512F_NATIVE -# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") -# endif -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN(32) -# else -# define SIMDE_AVX512_ALIGN SIMDE_ALIGN(64) -# endif - -typedef union { -#if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; -#else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; -#endif - - SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; - SIMDE_AVX512_ALIGN simde__m128 m128[4]; - SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; - SIMDE_AVX512_ALIGN simde__m256 m256[2]; - -#if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512 n; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif -#endif -} simde__m512_private; - -typedef union { -#if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; -#else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; -#endif - - SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; - SIMDE_AVX512_ALIGN simde__m128d m128d[4]; - SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; - SIMDE_AVX512_ALIGN simde__m256d m256d[2]; - -#if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512d n; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif -#endif -} simde__m512d_private; - -typedef union { -#if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; -#else - SIMDE_AVX512_ALIGN int8_t i8[64]; - SIMDE_AVX512_ALIGN int16_t i16[32]; - SIMDE_AVX512_ALIGN int32_t i32[16]; - SIMDE_AVX512_ALIGN int64_t i64[8]; - SIMDE_AVX512_ALIGN uint8_t u8[64]; - SIMDE_AVX512_ALIGN uint16_t u16[32]; - SIMDE_AVX512_ALIGN uint32_t u32[16]; - SIMDE_AVX512_ALIGN uint64_t u64[8]; - SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; - SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; - #if defined(SIMDE_HAVE_INT128_) - SIMDE_AVX512_ALIGN simde_int128 i128[4]; - SIMDE_AVX512_ALIGN simde_uint128 u128[4]; - #endif - SIMDE_AVX512_ALIGN simde_float32 f32[16]; - SIMDE_AVX512_ALIGN simde_float64 f64[8]; -#endif - - SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; - SIMDE_AVX512_ALIGN simde__m128i m128i[4]; - SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; - SIMDE_AVX512_ALIGN simde__m256i m256i[2]; - -#if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_AVX512_ALIGN __m512i n; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; - #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; - SIMDE_ALIGN(16) SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; - #endif -#endif -} simde__m512i_private; - -/* Intel uses the same header (immintrin.h) for everything AVX and - * later. If native aliases are enabled, and the machine has native - * support for AVX imintrin.h will already have been included, which - * means simde__m512* will already have been defined. So, even - * if the machine doesn't support AVX512F we need to use the native - * type; it has already been defined. - * - * However, we also can't just assume that including immintrin.h does - * actually define these. It could be a compiler which supports AVX - * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we - * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, - * so we assume that if it's present AVX-512F has already been - * declared. - * - * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC - * uses the preprocessor to define all the _MM_CMPINT_* members, - * in most compilers they are simply normal enum members. However, - * all compilers I've looked at use an object-like macro for - * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT - * is included in case a compiler does the reverse, though I haven't - * run into one which does. - * - * As for the ICC check, unlike other compilers, merely using the - * AVX-512 types causes ICC to generate AVX-512 instructions. */ -#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && (defined(SIMDE_X86_AVX512F_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) - typedef __m512 simde__m512; - typedef __m512i simde__m512i; - typedef __m512d simde__m512d; - - typedef __mmask8 simde__mmask8; - typedef __mmask16 simde__mmask16; -#else - #if defined(SIMDE_VECTOR_SUBSCRIPT) - typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; - #else - typedef simde__m512_private simde__m512; - typedef simde__m512i_private simde__m512i; - typedef simde__m512d_private simde__m512d; - #endif - - typedef uint8_t simde__mmask8; - typedef uint16_t simde__mmask16; -#endif - -/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is - * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang - * both are in avx512bwintrin.h), not AVX-512F. However, we don't have - * a good (not-compiler-specific) way to detect if these headers have - * been included. In compilers which support AVX-512F but not - * AVX-512BW/VL (e.g., GCC 4.9) we these typedefs since __mmask{32,64) - * won't exist. - * - * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t - * in all compilers, so it's safe to use these instead of typedefs to - * __mmask{16,32}. If you run into a problem with this please file an - * issue and we'll try to figure out a work-around. */ -typedef uint32_t simde__mmask32; -typedef uint64_t simde__mmask64; - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) - #if !defined(HEDLEY_INTEL_VERSION) - typedef simde__m512 __m512; - typedef simde__m512i __m512i; - typedef simde__m512d __m512d; - #else - #define __m512 simde__m512 - #define __m512i simde__m512i - #define __m512d simde__m512d - #endif -#endif - -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); -HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde__m512_from_private(simde__m512_private v) { - simde__m512 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512_private -simde__m512_to_private(simde__m512 v) { - simde__m512_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde__m512i_from_private(simde__m512i_private v) { - simde__m512i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i_private -simde__m512i_to_private(simde__m512i v) { - simde__m512i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde__m512d_from_private(simde__m512d_private v) { - simde__m512d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d_private -simde__m512d_to_private(simde__m512d v) { - simde__m512d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castpd_ps (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_ps - #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castpd_si512 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd_si512 - #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castps_pd (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_pd - #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castps_si512 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps_si512(a); - #else - simde__m512i r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps_si512 - #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castsi512_ps (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_ps(a); - #else - simde__m512 r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_ps - #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castsi512_pd (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_pd(a); - #else - simde__m512d r; - simde_memcpy(&r, &a, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_pd - #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd128_pd512 (simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd128_pd512(a); - #else - simde__m512d_private r_; - r_.m128d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd128_pd512 - #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_castpd256_pd512 (simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd256_pd512(a); - #else - simde__m512d_private r_; - r_.m256d[0] = a; - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd256_pd512 - #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm512_castpd512_pd128 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd128(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m128d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd128 - #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm512_castpd512_pd256 (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castpd512_pd256(a); - #else - simde__m512d_private a_ = simde__m512d_to_private(a); - return a_.m256d[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castpd512_pd256 - #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps128_ps512 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps128_ps512(a); - #else - simde__m512_private r_; - r_.m128[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps128_ps512 - #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_castps256_ps512 (simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps256_ps512(a); - #else - simde__m512_private r_; - r_.m256[0] = a; - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps256_ps512 - #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm512_castps512_ps128 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps128(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m128[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps128 - #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm512_castps512_ps256 (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castps512_ps256(a); - #else - simde__m512_private a_ = simde__m512_to_private(a); - return a_.m256[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castps512_ps256 - #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi128_si512 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi128_si512(a); - #else - simde__m512i_private r_; - r_.m128i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi128_si512 - #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_castsi256_si512 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi256_si512(a); - #else - simde__m512i_private r_; - r_.m256i[0] = a; - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi256_si512 - #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_castsi512_si128 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si128(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m128i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si128 - #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_castsi512_si256 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_castsi512_si256(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - return a_.m256i[0]; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_castsi512_si256 - #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi8 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi8(k); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - const simde__m128i zero = simde_mm_setzero_si128(); - const simde__m128i bits = simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80); - const simde__m128i shuffle = simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_shuffle_epi8(r, shuffle); - r = simde_mm_cmpgt_epi8(zero, r); - - return r; - #else - simde__m128i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi8 - #define _mm_movm_epi8(k) simde_mm_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi8 (simde__mmask32 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi8(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const simde__m256i zero = simde_mm256_setzero_si256(); - const simde__m256i bits = simde_mm256_broadcastsi128_si256(simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80)); - const simde__m256i shuffle = simde_mm256_broadcastsi128_si256(simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); - simde__m256i r; - - r = simde_mm256_set_m128i(simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k >> 16)), simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k))); - r = simde_mm256_mullo_epi16(r, bits); - r = simde_mm256_shuffle_epi8(r, shuffle); - r = simde_mm256_cmpgt_epi8(zero, r); - - return r; - #else - simde__m256i_private r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k)); - r_.m128i[1] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi8 - #define _mm256_movm_epi8(k) simde_mm256_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi8 (simde__mmask64 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movm_epi8(k); - #else - simde__m512i_private r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k)); - r_.m256i[1] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi8 - #define _mm512_movm_epi8(k) simde_mm512_movm_epi8(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi16 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi16(k); - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi16(0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi16(r, 15); - - return r; - #else - simde__m128i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi16 - #define _mm_movm_epi16(k) simde_mm_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi16 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi16(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const simde__m256i bits = simde_mm256_set_epi16(0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); - simde__m256i r; - - r = simde_mm256_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm256_mullo_epi16(r, bits); - r = simde_mm256_srai_epi16(r, 15); - - return r; - #else - simde__m256i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k)); - r_.m128i[1] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi16 - #define _mm256_movm_epi16(k) simde_mm256_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi16 (simde__mmask32 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movm_epi16(k); - #else - simde__m512i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k)); - r_.m256i[1] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi16 - #define _mm512_movm_epi16(k) simde_mm512_movm_epi16(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi32 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi32(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const simde__m128i shifts = simde_mm_set_epi32(28, 29, 30, 31); - simde__m128i r; - - r = simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = simde_mm_sllv_epi32(r, shifts); - r = simde_mm_srai_epi32(r, 31); - - return r; - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi32(0x10000000, 0x20000000, 0x40000000, INT32_MIN /* 0x80000000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi32(r, 31); - - return r; - #else - simde__m128i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi32 - #define _mm_movm_epi32(k) simde_mm_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi32 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi32(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const simde__m256i shifts = simde_mm256_set_epi32(24, 25, 26, 27, 28, 29, 30, 31); - simde__m256i r; - - r = simde_mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = simde_mm256_sllv_epi32(r, shifts); - r = simde_mm256_srai_epi32(r, 31); - - return r; - #else - simde__m256i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_movm_epi32(k ); - r_.m128i[1] = simde_mm_movm_epi32(k >> 4); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi32 - #define _mm256_movm_epi32(k) simde_mm256_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi32 (simde__mmask16 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movm_epi32(k); - #else - simde__m512i_private r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k )); - r_.m256i[1] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi32 - #define _mm512_movm_epi32(k) simde_mm512_movm_epi32(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_movm_epi64(k); - /* N.B. CM: These fallbacks may not be faster as there are only two elements */ - #elif defined(SIMDE_X86_AVX2_NATIVE) - const simde__m128i shifts = simde_mm_set_epi32(30, 30, 31, 31); - simde__m128i r; - - r = simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = simde_mm_sllv_epi32(r, shifts); - r = simde_mm_srai_epi32(r, 31); - - return r; - #elif defined(SIMDE_X86_SSE2_NATIVE) - const simde__m128i bits = simde_mm_set_epi32(0x40000000, 0x40000000, INT32_MIN /* 0x80000000 */, INT32_MIN /* 0x80000000 */); - simde__m128i r; - - r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); - r = simde_mm_mullo_epi16(r, bits); - r = simde_mm_srai_epi32(r, 31); - - return r; - #else - simde__m128i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_movm_epi64 - #define _mm_movm_epi64(k) simde_mm_movm_epi64(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_movm_epi64(k); - #elif defined(SIMDE_X86_AVX2_NATIVE) - const simde__m256i shifts = simde_mm256_set_epi32(28, 28, 29, 29, 30, 30, 31, 31); - simde__m256i r; - - r = simde_mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); - r = simde_mm256_sllv_epi32(r, shifts); - r = simde_mm256_srai_epi32(r, 31); - - return r; - #else - simde__m256i_private r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_movm_epi64(k ); - r_.m128i[1] = simde_mm_movm_epi64(k >> 2); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_movm_epi64 - #define _mm256_movm_epi64(k) simde_mm256_movm_epi64(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_movm_epi64 (simde__mmask8 k) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movm_epi64(k); - #else - simde__m512i_private r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_movm_epi64(k ); - r_.m256i[1] = simde_mm256_movm_epi64(k >> 4); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movm_epi64 - #define _mm512_movm_epi64(k) simde_mm512_movm_epi64(k) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi8(src, k, a); - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - return simde_mm_blendv_epi8(src, a, simde_mm_movm_epi8(k)); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - simde__m128i mask = simde_mm_movm_epi8(k); - return simde_mm_or_si128(simde_mm_and_si128(mask, a), simde_mm_andnot_si128(mask, src)); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi8 - #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi16(src, k, a); - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - return simde_mm_blendv_epi8(src, a, simde_mm_movm_epi16(k)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde__m128i mask = simde_mm_movm_epi16(k); - return simde_mm_or_si128(simde_mm_and_si128(mask, a), simde_mm_andnot_si128(mask, src)); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi16 - #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi32(src, k, a); - #elif defined(SIMDE_X86_SSE4_1_NATIVE) - return simde_mm_blendv_epi8(src, a, simde_mm_movm_epi32(k)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - simde__m128i mask = simde_mm_movm_epi32(k); - return simde_mm_or_si128(simde_mm_and_si128(mask, a), simde_mm_andnot_si128(mask, src)); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi32 - #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_epi64(src, k, a); - #else - simde__m128i_private - src_ = simde__m128i_to_private(src), - a_ = simde__m128i_to_private(a), - r_; - - /* N.B. CM: No fallbacks as there are only two elements */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_epi64 - #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_mask_mov_pd(simde__m128d src, simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_pd(src, k, a); - #else - return simde_mm_castsi128_pd(simde_mm_mask_mov_epi64(simde_mm_castpd_si128(src), k, simde_mm_castpd_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_pd - #define _mm_mask_mov_pd(src, k, a) simde_mm_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_mask_mov_ps (simde__m128 src, simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_mask_mov_ps(src, k, a); - #else - return simde_mm_castsi128_ps(simde_mm_mask_mov_epi32(simde_mm_castps_si128(src), k, simde_mm_castps_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_mask_mov_ps - #define _mm_mask_mov_ps(src, k, a) simde_mm_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi8(src, k, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_blendv_epi8(src, a, simde_mm256_movm_epi8(k)); - #else - simde__m256i_private - r_, - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a); - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi8(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi8(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi8 - #define _mm256_mask_mov_epi8(src, k, a) simde_mm256_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi16(src, k, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_blendv_epi8(src, a, simde_mm256_movm_epi16(k)); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi16(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi16(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi16 - #define _mm256_mask_mov_epi16(src, k, a) simde_mm256_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi32(src, k, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_blendv_epi8(src, a, simde_mm256_movm_epi32(k)); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi32(src_.m128i[0], k , a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi32(src_.m128i[1], k >> 4, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi32 - #define _mm256_mask_mov_epi32(src, k, a) simde_mm256_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_mov_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_epi64(src, k, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_blendv_epi8(src, a, simde_mm256_movm_epi64(k)); - #else - simde__m256i_private - src_ = simde__m256i_to_private(src), - a_ = simde__m256i_to_private(a), - r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_mask_mov_epi64(src_.m128i[0], k , a_.m128i[0]); - r_.m128i[1] = simde_mm_mask_mov_epi64(src_.m128i[1], k >> 2, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_epi64 - #define _mm256_mask_mov_epi64(src, k, a) simde_mm256_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_mask_mov_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_pd(src, k, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_mask_mov_epi64(simde_mm256_castpd_si256(src), k, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_pd - #define _mm256_mask_mov_pd(src, k, a) simde_mm256_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_mask_mov_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_mov_ps(src, k, a); - #else - return simde_mm256_castsi256_ps(simde_mm256_mask_mov_epi32(simde_mm256_castps_si256(src), k, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_mov_ps - #define _mm256_mask_mov_ps(src, k, a) simde_mm256_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_mov_epi8(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi8(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi8(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi8 - #define _mm512_mask_mov_epi8(src, k, a) simde_mm512_mask_mov_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_mov_epi16(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi16(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi16(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi16 - #define _mm512_mask_mov_epi16(src, k, a) simde_mm512_mask_mov_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_epi32(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi32(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi32(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi32 - #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mov_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_epi64(src, k, a); - #else - simde__m512i_private - src_ = simde__m512i_to_private(src), - a_ = simde__m512i_to_private(a), - r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_mask_mov_epi64(src_.m256i[0], k , a_.m256i[0]); - r_.m256i[1] = simde_mm256_mask_mov_epi64(src_.m256i[1], k >> 4, a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_epi64 - #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_mov_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_pd(src, k, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_mask_mov_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_pd - #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_mov_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mov_ps(src, k, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_mask_mov_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mov_ps - #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi8 (simde__mmask16 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi8(k, a); - #elif defined(SIMDE_X86_SSSE3_NATIVE) - return simde_mm_and_si128(a, simde_mm_movm_epi8(k)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi8 - #define _mm_maskz_mov_epi8(k, a) simde_mm_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi16 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi16(k, a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_and_si128(a, simde_mm_movm_epi16(k)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi16 - #define _mm_maskz_mov_epi16(k, a) simde_mm_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi32 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi32(k, a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - return simde_mm_and_si128(a, simde_mm_movm_epi32(k)); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi32 - #define _mm_maskz_mov_epi32(k, a) simde_mm_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_maskz_mov_epi64 (simde__mmask8 k, simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_epi64(k, a); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - r_; - - /* N.B. CM: No fallbacks as there are only two elements */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_epi64 - #define _mm_maskz_mov_epi64(k, a) simde_mm_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_maskz_mov_pd (simde__mmask8 k, simde__m128d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_pd(k, a); - #else - return simde_mm_castsi128_pd(simde_mm_maskz_mov_epi64(k, simde_mm_castpd_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_pd - #define _mm_maskz_mov_pd(k, a) simde_mm_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_maskz_mov_ps (simde__mmask8 k, simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_maskz_mov_ps(k, a); - #else - return simde_mm_castsi128_ps(simde_mm_maskz_mov_epi32(k, simde_mm_castps_si128(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_maskz_mov_ps - #define _mm_maskz_mov_ps(k, a) simde_mm_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi8 (simde__mmask32 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi8(k, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_and_si256(a, simde_mm256_movm_epi8(k)); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi8 - #define _mm256_maskz_mov_epi8(k, a) simde_mm256_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi16 (simde__mmask16 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi16(k, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_and_si256(a, simde_mm256_movm_epi16(k)); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi16 - #define _mm256_maskz_mov_epi16(k, a) simde_mm256_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi32 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi32(k, a); - #elif defined(SIMDE_X86_AVX2_NATIVE) - return simde_mm256_and_si256(a, simde_mm256_movm_epi32(k)); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi32(k , a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi32(k >> 4, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi32 - #define _mm256_maskz_mov_epi32(k, a) simde_mm256_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_mov_epi64 (simde__mmask8 k, simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_epi64(k, a); - #else - simde__m256i_private - a_ = simde__m256i_to_private(a), - r_; - - /* N.B. CM: This fallback may not be faster as there are only four elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_maskz_mov_epi64(k , a_.m128i[0]); - r_.m128i[1] = simde_mm_maskz_mov_epi64(k >> 2, a_.m128i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_epi64 - #define _mm256_maskz_mov_epi64(k, a) simde_mm256_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_pd(k, a); - #else - return simde_mm256_castsi256_pd(simde_mm256_maskz_mov_epi64(k, simde_mm256_castpd_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_pd - #define _mm256_maskz_mov_pd(k, a) simde_mm256_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256 -simde_mm256_maskz_mov_ps (simde__mmask8 k, simde__m256 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_mov_ps(k, a); - #else - return simde_mm256_castsi256_ps(simde_mm256_maskz_mov_epi32(k, simde_mm256_castps_si256(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_maskz_mov_ps - #define _mm256_maskz_mov_ps(k, a) simde_mm256_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi8 (simde__mmask64 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_mov_epi8(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSSE3_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); - #else - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi8 - #define _mm512_maskz_mov_epi8(k, a) simde_mm512_maskz_mov_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi16 (simde__mmask32 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_mov_epi16(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi16 - #define _mm512_maskz_mov_epi16(k, a) simde_mm512_maskz_mov_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi32 (simde__mmask16 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_epi32(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi32 - #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mov_epi64 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_epi64(k, a); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - r_; - - /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ - #if defined(SIMDE_X86_SSE2_NATIVE) - r_.m256i[0] = simde_mm256_maskz_mov_epi64(k , a_.m256i[0]); - r_.m256i[1] = simde_mm256_maskz_mov_epi64(k >> 4, a_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_epi64 - #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_mov_pd (simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_pd(k, a); - #else - return simde_mm512_castsi512_pd(simde_mm512_maskz_mov_epi64(k, simde_mm512_castpd_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_pd - #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_mov_ps (simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mov_ps(k, a); - #else - return simde_mm512_castsi512_ps(simde_mm512_maskz_mov_epi32(k, simde_mm512_castps_si512(a))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mov_ps - #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask64 -simde_mm512_movepi8_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movepi8_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask64 r; - - #if defined(SIMDE_X86_AVX2_NATIVE) - r = HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(unsigned int, simde_mm256_movemask_epi8(a_.m256i[1]))); - r = (r << 32) | HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(unsigned int, simde_mm256_movemask_epi8(a_.m256i[0]))); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r = HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(unsigned int, simde_mm_movemask_epi8(a_.m128i[3]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(unsigned int, simde_mm_movemask_epi8(a_.m128i[2]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(unsigned int, simde_mm_movemask_epi8(a_.m128i[1]))); - r = (r << 16) | HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(unsigned int, simde_mm_movemask_epi8(a_.m128i[0]))); - #else - r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { - r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; - } - #endif - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi8_mask - #define _mm512_movepi8_mask(a) simde_mm512_movepi8_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask32 -simde_mm512_movepi16_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_movepi16_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask32 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi16_mask - #define _mm512_movepi16_mask(a) simde_mm512_movepi16_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_movepi32_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movepi32_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask16 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi32_mask - #define _mm512_movepi32_mask(a) simde_mm512_movepi32_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_movepi64_mask (simde__m512i a) { - #if defined(SIMDE_X86_AVX512DQ_NATIVE) - return _mm512_movepi64_mask(a); - #else - simde__m512i_private a_ = simde__m512i_to_private(a); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; - } - - return r; - #endif -} -#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) - #undef _mm512_movepi64_mask - #define _mm512_movepi64_mask(a) simde_mm512_movepi64_mask(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_loadu_ps (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_loadu_ps(mem_addr); - #else - simde__m512 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_loadu_ps - #define _mm512_loadu_ps(a) simde_mm512_loadu_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_loadu_pd (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_loadu_pd(mem_addr); - #else - simde__m512d r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_loadu_pd - #define _mm512_loadu_pd(a) simde_mm512_loadu_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_load_si512 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_load_si512(HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - simde__m512i r; - simde_memcpy(&r, SIMDE_ASSUME_ALIGNED_AS(simde__m512i, mem_addr), sizeof(r)); - return r; - #endif -} -#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) -#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_load_epi8 - #undef _mm512_load_epi16 - #undef _mm512_load_epi32 - #undef _mm512_load_epi64 - #undef _mm512_load_si512 - #define _mm512_load_si512(a) simde_mm512_load_si512(a) - #define _mm512_load_epi8(a) simde_mm512_load_si512(a) - #define _mm512_load_epi16(a) simde_mm512_load_si512(a) - #define _mm512_load_epi32(a) simde_mm512_load_si512(a) - #define _mm512_load_epi64(a) simde_mm512_load_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_loadu_si512 (void const * mem_addr) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_loadu_si512(HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); - #else - simde__m512i r; - - #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm512_loadu_si512_s { - __typeof__(r) v; - } __attribute__((__packed__, __may_alias__)); - r = HEDLEY_REINTERPRET_CAST(const struct simde_mm512_loadu_si512_s *, mem_addr)->v; - HEDLEY_DIAGNOSTIC_POP - #else - simde_memcpy(&r, mem_addr, sizeof(r)); - #endif - - return r; - #endif -} -#define simde_mm512_loadu_epi8(mem_addr) simde_mm512_loadu_si512(mem_addr) -#define simde_mm512_loadu_epi16(mem_addr) simde_mm512_loadu_si512(mem_addr) -#define simde_mm512_loadu_epi32(mem_addr) simde_mm512_loadu_si512(mem_addr) -#define simde_mm512_loadu_epi64(mem_addr) simde_mm512_loadu_si512(mem_addr) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_loadu_epi8 - #undef _mm512_loadu_epi16 - #undef _mm512_loadu_epi32 - #undef _mm512_loadu_epi64 - #undef _mm512_loadu_si512 - #define _mm512_loadu_si512(a) simde_mm512_loadu_si512(a) - #define _mm512_loadu_epi8(a) simde_mm512_loadu_si512(a) - #define _mm512_loadu_epi16(a) simde_mm512_loadu_si512(a) - #define _mm512_loadu_epi32(a) simde_mm512_loadu_si512(a) - #define _mm512_loadu_epi64(a) simde_mm512_loadu_si512(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_store_ps (void * mem_addr, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_store_ps(mem_addr, a); - #else - simde_memcpy(SIMDE_ASSUME_ALIGNED_AS(simde__m512, mem_addr), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_store_ps - #define _mm512_store_ps(mem_addr, a) simde_mm512_store_ps(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_storeu_ps (void * mem_addr, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_storeu_ps(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_storeu_ps - #define _mm512_storeu_ps(mem_addr, a) simde_mm512_storeu_ps(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_store_pd (void * mem_addr, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_store_pd(mem_addr, a); - #else - simde_memcpy(SIMDE_ASSUME_ALIGNED_AS(simde__m512d, mem_addr), &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_store_pd - #define _mm512_store_pd(mem_addr, a) simde_mm512_store_pd(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_storeu_pd (void * mem_addr, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_storeu_pd(mem_addr, a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_storeu_pd - #define _mm512_storeu_pd(mem_addr, a) simde_mm512_storeu_pd(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_store_si512 (void * mem_addr, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_store_si512(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a); - #else - simde_memcpy(SIMDE_ASSUME_ALIGNED_AS(simde__m512i, mem_addr), &a, sizeof(a)); - #endif -} -#define simde_mm512_store_epi8(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#define simde_mm512_store_epi16(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#define simde_mm512_store_epi32(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#define simde_mm512_store_epi64(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_store_epi8 - #undef _mm512_store_epi16 - #undef _mm512_store_epi32 - #undef _mm512_store_epi64 - #undef _mm512_store_si512 - #define _mm512_store_si512(mem_addr, a) simde_mm512_store_si512(mem_addr, a) - #define _mm512_store_epi8(mem_addr, a) simde_mm512_store_si512(mem_addr, a) - #define _mm512_store_epi16(mem_addr, a) simde_mm512_store_si512(mem_addr, a) - #define _mm512_store_epi32(mem_addr, a) simde_mm512_store_si512(mem_addr, a) - #define _mm512_store_epi64(mem_addr, a) simde_mm512_store_si512(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void -simde_mm512_storeu_si512 (void * mem_addr, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - _mm512_storeu_si512(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a); - #else - simde_memcpy(mem_addr, &a, sizeof(a)); - #endif -} -#define simde_mm512_storeu_epi8(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#define simde_mm512_storeu_epi16(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#define simde_mm512_storeu_epi32(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#define simde_mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_storeu_epi8 - #undef _mm512_storeu_epi16 - #undef _mm512_storeu_epi32 - #undef _mm512_storeu_epi64 - #undef _mm512_storeu_si512 - #define _mm512_storeu_si512(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) - #define _mm512_storeu_epi8(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) - #define _mm512_storeu_epi16(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) - #define _mm512_storeu_epi32(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) - #define _mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, - int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, - int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, - int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, - int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, - int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, - int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { - simde__m512i_private r_; - - r_.i8[ 0] = e0; - r_.i8[ 1] = e1; - r_.i8[ 2] = e2; - r_.i8[ 3] = e3; - r_.i8[ 4] = e4; - r_.i8[ 5] = e5; - r_.i8[ 6] = e6; - r_.i8[ 7] = e7; - r_.i8[ 8] = e8; - r_.i8[ 9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; - r_.i8[16] = e16; - r_.i8[17] = e17; - r_.i8[18] = e18; - r_.i8[19] = e19; - r_.i8[20] = e20; - r_.i8[21] = e21; - r_.i8[22] = e22; - r_.i8[23] = e23; - r_.i8[24] = e24; - r_.i8[25] = e25; - r_.i8[26] = e26; - r_.i8[27] = e27; - r_.i8[28] = e28; - r_.i8[29] = e29; - r_.i8[30] = e30; - r_.i8[31] = e31; - r_.i8[32] = e32; - r_.i8[33] = e33; - r_.i8[34] = e34; - r_.i8[35] = e35; - r_.i8[36] = e36; - r_.i8[37] = e37; - r_.i8[38] = e38; - r_.i8[39] = e39; - r_.i8[40] = e40; - r_.i8[41] = e41; - r_.i8[42] = e42; - r_.i8[43] = e43; - r_.i8[44] = e44; - r_.i8[45] = e45; - r_.i8[46] = e46; - r_.i8[47] = e47; - r_.i8[48] = e48; - r_.i8[49] = e49; - r_.i8[50] = e50; - r_.i8[51] = e51; - r_.i8[52] = e52; - r_.i8[53] = e53; - r_.i8[54] = e54; - r_.i8[55] = e55; - r_.i8[56] = e56; - r_.i8[57] = e57; - r_.i8[58] = e58; - r_.i8[59] = e59; - r_.i8[60] = e60; - r_.i8[61] = e61; - r_.i8[62] = e62; - r_.i8[63] = e63; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi8 - #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, - int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, - int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, - int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { - simde__m512i_private r_; - - r_.i16[ 0] = e0; - r_.i16[ 1] = e1; - r_.i16[ 2] = e2; - r_.i16[ 3] = e3; - r_.i16[ 4] = e4; - r_.i16[ 5] = e5; - r_.i16[ 6] = e6; - r_.i16[ 7] = e7; - r_.i16[ 8] = e8; - r_.i16[ 9] = e9; - r_.i16[10] = e10; - r_.i16[11] = e11; - r_.i16[12] = e12; - r_.i16[13] = e13; - r_.i16[14] = e14; - r_.i16[15] = e15; - r_.i16[16] = e16; - r_.i16[17] = e17; - r_.i16[18] = e18; - r_.i16[19] = e19; - r_.i16[20] = e20; - r_.i16[21] = e21; - r_.i16[22] = e22; - r_.i16[23] = e23; - r_.i16[24] = e24; - r_.i16[25] = e25; - r_.i16[26] = e26; - r_.i16[27] = e27; - r_.i16[28] = e28; - r_.i16[29] = e29; - r_.i16[30] = e30; - r_.i16[31] = e31; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi16 - #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - simde__m512i_private r_; - - r_.i32[ 0] = e0; - r_.i32[ 1] = e1; - r_.i32[ 2] = e2; - r_.i32[ 3] = e3; - r_.i32[ 4] = e4; - r_.i32[ 5] = e5; - r_.i32[ 6] = e6; - r_.i32[ 7] = e7; - r_.i32[ 8] = e8; - r_.i32[ 9] = e9; - r_.i32[10] = e10; - r_.i32[11] = e11; - r_.i32[12] = e12; - r_.i32[13] = e13; - r_.i32[14] = e14; - r_.i32[15] = e15; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi32 - #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - simde__m512i_private r_; - - r_.i64[0] = e0; - r_.i64[1] = e1; - r_.i64[2] = e2; - r_.i64[3] = e3; - r_.i64[4] = e4; - r_.i64[5] = e5; - r_.i64[6] = e6; - r_.i64[7] = e7; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_epi64 - #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, - uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, - uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, - uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, - uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, - uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, - uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, - uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { - simde__m512i_private r_; - - r_.u8[ 0] = e0; - r_.u8[ 1] = e1; - r_.u8[ 2] = e2; - r_.u8[ 3] = e3; - r_.u8[ 4] = e4; - r_.u8[ 5] = e5; - r_.u8[ 6] = e6; - r_.u8[ 7] = e7; - r_.u8[ 8] = e8; - r_.u8[ 9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; - r_.u8[16] = e16; - r_.u8[17] = e17; - r_.u8[18] = e18; - r_.u8[19] = e19; - r_.u8[20] = e20; - r_.u8[21] = e21; - r_.u8[22] = e22; - r_.u8[23] = e23; - r_.u8[24] = e24; - r_.u8[25] = e25; - r_.u8[26] = e26; - r_.u8[27] = e27; - r_.u8[28] = e28; - r_.u8[29] = e29; - r_.u8[30] = e30; - r_.u8[31] = e31; - r_.u8[32] = e32; - r_.u8[33] = e33; - r_.u8[34] = e34; - r_.u8[35] = e35; - r_.u8[36] = e36; - r_.u8[37] = e37; - r_.u8[38] = e38; - r_.u8[39] = e39; - r_.u8[40] = e40; - r_.u8[41] = e41; - r_.u8[42] = e42; - r_.u8[43] = e43; - r_.u8[44] = e44; - r_.u8[45] = e45; - r_.u8[46] = e46; - r_.u8[47] = e47; - r_.u8[48] = e48; - r_.u8[49] = e49; - r_.u8[50] = e50; - r_.u8[51] = e51; - r_.u8[52] = e52; - r_.u8[53] = e53; - r_.u8[54] = e54; - r_.u8[55] = e55; - r_.u8[56] = e56; - r_.u8[57] = e57; - r_.u8[58] = e58; - r_.u8[59] = e59; - r_.u8[60] = e60; - r_.u8[61] = e61; - r_.u8[62] = e62; - r_.u8[63] = e63; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, - uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, - uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, - uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { - simde__m512i_private r_; - - r_.u16[ 0] = e0; - r_.u16[ 1] = e1; - r_.u16[ 2] = e2; - r_.u16[ 3] = e3; - r_.u16[ 4] = e4; - r_.u16[ 5] = e5; - r_.u16[ 6] = e6; - r_.u16[ 7] = e7; - r_.u16[ 8] = e8; - r_.u16[ 9] = e9; - r_.u16[10] = e10; - r_.u16[11] = e11; - r_.u16[12] = e12; - r_.u16[13] = e13; - r_.u16[14] = e14; - r_.u16[15] = e15; - r_.u16[16] = e16; - r_.u16[17] = e17; - r_.u16[18] = e18; - r_.u16[19] = e19; - r_.u16[20] = e20; - r_.u16[21] = e21; - r_.u16[22] = e22; - r_.u16[23] = e23; - r_.u16[24] = e24; - r_.u16[25] = e25; - r_.u16[26] = e26; - r_.u16[27] = e27; - r_.u16[28] = e28; - r_.u16[29] = e29; - r_.u16[30] = e30; - r_.u16[31] = e31; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, - uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { - simde__m512i_private r_; - - r_.u32[ 0] = e0; - r_.u32[ 1] = e1; - r_.u32[ 2] = e2; - r_.u32[ 3] = e3; - r_.u32[ 4] = e4; - r_.u32[ 5] = e5; - r_.u32[ 6] = e6; - r_.u32[ 7] = e7; - r_.u32[ 8] = e8; - r_.u32[ 9] = e9; - r_.u32[10] = e10; - r_.u32[11] = e11; - r_.u32[12] = e12; - r_.u32[13] = e13; - r_.u32[14] = e14; - r_.u32[15] = e15; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { - simde__m512i_private r_; - - r_.u64[ 0] = e0; - r_.u64[ 1] = e1; - r_.u64[ 2] = e2; - r_.u64[ 3] = e3; - r_.u64[ 4] = e4; - r_.u64[ 5] = e5; - r_.u64[ 6] = e6; - r_.u64[ 7] = e7; - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN(64) simde__m128i v[] = { d, c, b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m128i[0] = d; - r_.m128i[1] = c; - r_.m128i[2] = b; - r_.m128i[3] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - SIMDE_ALIGN(64) simde__m256i v[] = { b, a }; - return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); - #else - simde__m512i_private r_; - - r_.m256i[0] = b; - r_.m256i[1] = a; - - return simde__m512i_from_private(r_); - #endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, - simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - simde__m512_private r_; - - r_.f32[ 0] = e0; - r_.f32[ 1] = e1; - r_.f32[ 2] = e2; - r_.f32[ 3] = e3; - r_.f32[ 4] = e4; - r_.f32[ 5] = e5; - r_.f32[ 6] = e6; - r_.f32[ 7] = e7; - r_.f32[ 8] = e8; - r_.f32[ 9] = e9; - r_.f32[10] = e10; - r_.f32[11] = e11; - r_.f32[12] = e12; - r_.f32[13] = e13; - r_.f32[14] = e14; - r_.f32[15] = e15; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_ps - #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - simde__m512d_private r_; - - r_.f64[0] = e0; - r_.f64[1] = e1; - r_.f64[2] = e2; - r_.f64[3] = e3; - r_.f64[4] = e4; - r_.f64[5] = e5; - r_.f64[6] = e6; - r_.f64[7] = e7; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set_pd - #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi8 (int8_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi8(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi8 - #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi16 (int16_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi16(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi16 - #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi32 (int32_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi32(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi32 - #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi32(simde__m512i src, simde__mmask16 k, int32_t a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_set1_epi32(src, k, a); -#else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_set1_epi32(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_set1_epi32(src, k, a) simde_mm512_mask_set1_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi32(simde__mmask16 k, int32_t a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_set1_epi32(k, a); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_set1_epi32(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_set1_epi32(k, a) simde_mm512_maskz_set1_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set1_epi64 (int64_t a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_epi64(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_epi64 - #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_set1_epi64(simde__m512i src, simde__mmask8 k, int64_t a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_set1_epi64(src, k, a); -#else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_set1_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_set1_epi64(src, k, a) simde_mm512_mask_set1_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_set1_epi64(simde__mmask8 k, int64_t a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_set1_epi64(k, a); -#else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_set1_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_set1_epi64(k, a) simde_mm512_maskz_set1_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu8 (uint8_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu16 (uint16_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu32 (uint32_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_set1_epu64 (uint64_t a) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a; - } - - return simde__m512i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set1_ps (simde_float32 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_ps(a); - #else - simde__m512_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_ps - #define _mm512_set1_ps(a) simde_mm512_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set1_pd (simde_float64 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_set1_pd(a); - #else - simde__m512d_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set1_pd - #define _mm512_set1_pd(a) simde_mm512_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) { - simde__m512i_private r_; - - r_.i32[ 0] = a; - r_.i32[ 1] = b; - r_.i32[ 2] = c; - r_.i32[ 3] = d; - r_.i32[ 4] = a; - r_.i32[ 5] = b; - r_.i32[ 6] = c; - r_.i32[ 7] = d; - r_.i32[ 8] = a; - r_.i32[ 9] = b; - r_.i32[10] = c; - r_.i32[11] = d; - r_.i32[12] = a; - r_.i32[13] = b; - r_.i32[14] = c; - r_.i32[15] = d; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set4_epi32 - #define _mm512_set4_epi32(d,c,b,a) simde_mm512_set4_epi32(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_set4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) { - simde__m512i_private r_; - - r_.i64[0] = a; - r_.i64[1] = b; - r_.i64[2] = c; - r_.i64[3] = d; - r_.i64[4] = a; - r_.i64[5] = b; - r_.i64[6] = c; - r_.i64[7] = d; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set4_epi64 - #define _mm512_set4_epi64(d,c,b,a) simde_mm512_set4_epi64(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_set4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) { - simde__m512_private r_; - - r_.f32[ 0] = a; - r_.f32[ 1] = b; - r_.f32[ 2] = c; - r_.f32[ 3] = d; - r_.f32[ 4] = a; - r_.f32[ 5] = b; - r_.f32[ 6] = c; - r_.f32[ 7] = d; - r_.f32[ 8] = a; - r_.f32[ 9] = b; - r_.f32[10] = c; - r_.f32[11] = d; - r_.f32[12] = a; - r_.f32[13] = b; - r_.f32[14] = c; - r_.f32[15] = d; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set4_ps - #define _mm512_set4_ps(d,c,b,a) simde_mm512_set4_ps(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_set4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) { - simde__m512d_private r_; - - r_.f64[0] = a; - r_.f64[1] = b; - r_.f64[2] = c; - r_.f64[3] = d; - r_.f64[4] = a; - r_.f64[5] = b; - r_.f64[6] = c; - r_.f64[7] = d; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_set4_pd - #define _mm512_set4_pd(d,c,b,a) simde_mm512_set4_pd(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setr_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, - int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { - simde__m512i_private r_; - - r_.i32[ 0] = e15; - r_.i32[ 1] = e14; - r_.i32[ 2] = e13; - r_.i32[ 3] = e12; - r_.i32[ 4] = e11; - r_.i32[ 5] = e10; - r_.i32[ 6] = e9; - r_.i32[ 7] = e8; - r_.i32[ 8] = e7; - r_.i32[ 9] = e6; - r_.i32[10] = e5; - r_.i32[11] = e4; - r_.i32[12] = e3; - r_.i32[13] = e2; - r_.i32[14] = e1; - r_.i32[15] = e0; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr_epi32 - #define _mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setr_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { - simde__m512i_private r_; - - r_.i64[0] = e7; - r_.i64[1] = e6; - r_.i64[2] = e5; - r_.i64[3] = e4; - r_.i64[4] = e3; - r_.i64[5] = e2; - r_.i64[6] = e1; - r_.i64[7] = e0; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr_epi64 - #define _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setr_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, - simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, - simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, - simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { - simde__m512_private r_; - - r_.f32[ 0] = e15; - r_.f32[ 1] = e14; - r_.f32[ 2] = e13; - r_.f32[ 3] = e12; - r_.f32[ 4] = e11; - r_.f32[ 5] = e10; - r_.f32[ 6] = e9; - r_.f32[ 7] = e8; - r_.f32[ 8] = e7; - r_.f32[ 9] = e6; - r_.f32[10] = e5; - r_.f32[11] = e4; - r_.f32[12] = e3; - r_.f32[13] = e2; - r_.f32[14] = e1; - r_.f32[15] = e0; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr_ps - #define _mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setr_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { - simde__m512d_private r_; - - r_.f64[0] = e7; - r_.f64[1] = e6; - r_.f64[2] = e5; - r_.f64[3] = e4; - r_.f64[4] = e3; - r_.f64[5] = e2; - r_.f64[6] = e1; - r_.f64[7] = e0; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr_pd - #define _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setr4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) { - simde__m512i_private r_; - - r_.i32[ 0] = d; - r_.i32[ 1] = c; - r_.i32[ 2] = b; - r_.i32[ 3] = a; - r_.i32[ 4] = d; - r_.i32[ 5] = c; - r_.i32[ 6] = b; - r_.i32[ 7] = a; - r_.i32[ 8] = d; - r_.i32[ 9] = c; - r_.i32[10] = b; - r_.i32[11] = a; - r_.i32[12] = d; - r_.i32[13] = c; - r_.i32[14] = b; - r_.i32[15] = a; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr4_epi32 - #define _mm512_setr4_epi32(d,c,b,a) simde_mm512_setr4_epi32(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setr4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) { - simde__m512i_private r_; - - r_.i64[0] = d; - r_.i64[1] = c; - r_.i64[2] = b; - r_.i64[3] = a; - r_.i64[4] = d; - r_.i64[5] = c; - r_.i64[6] = b; - r_.i64[7] = a; - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr4_epi64 - #define _mm512_setr4_epi64(d,c,b,a) simde_mm512_setr4_epi64(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setr4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) { - simde__m512_private r_; - - r_.f32[ 0] = d; - r_.f32[ 1] = c; - r_.f32[ 2] = b; - r_.f32[ 3] = a; - r_.f32[ 4] = d; - r_.f32[ 5] = c; - r_.f32[ 6] = b; - r_.f32[ 7] = a; - r_.f32[ 8] = d; - r_.f32[ 9] = c; - r_.f32[10] = b; - r_.f32[11] = a; - r_.f32[12] = d; - r_.f32[13] = c; - r_.f32[14] = b; - r_.f32[15] = a; - - return simde__m512_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr4_ps - #define _mm512_setr4_ps(d,c,b,a) simde_mm512_setr4_ps(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setr4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) { - simde__m512d_private r_; - - r_.f64[0] = d; - r_.f64[1] = c; - r_.f64[2] = b; - r_.f64[3] = a; - r_.f64[4] = d; - r_.f64[5] = c; - r_.f64[6] = b; - r_.f64[7] = a; - - return simde__m512d_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setr4_pd - #define _mm512_setr4_pd(d,c,b,a) simde_mm512_setr4_pd(d,c,b,a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_setzero_si512(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_si512(); - #else - simde__m512i r; - simde_memset(&r, 0, sizeof(r)); - return r; - #endif -} -#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_si512 - #define _mm512_setzero_si512() simde_mm512_setzero_si512() - #undef _mm512_setzero_epi32 - #define _mm512_setzero_epi32() simde_mm512_setzero_si512() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_x_mm512_setone_si512(void) { - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); - } - - return simde__m512i_from_private(r_); -} -#define simde_x_mm512_setone_epi32() simde_x_mm512_setone_si512() - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_setzero_ps(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_ps(); - #else - return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_si512 - #define _mm512_setzero_si512() simde_mm512_setzero_si512() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_x_mm512_setone_ps(void) { - return simde_mm512_castsi512_ps(simde_x_mm512_setone_si512()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_setzero_pd(void) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_setzero_pd(); - #else - return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_setzero_si512 - #define _mm512_setzero_si512() simde_mm512_setzero_si512() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_x_mm512_setone_pd(void) { - return simde_mm512_castsi512_pd(simde_x_mm512_setone_si512()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi32(simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_abs_epi32(a); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi32 - #define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_abs_epi32(src, k, a); -#else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_abs_epi32(k, a); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_abs_epi64(simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_abs_epi64(a); -#else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_epi64 - #define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_abs_epi64(src, k, a); -#else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_abs_epi64(k, a); -#else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_abs_ps(simde__m512 v2) { -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - return _mm512_abs_ps(v2); -#else - simde__m512_private - r_, - v2_ = simde__m512_to_private(v2); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = (v2_.f32[i] < INT64_C(0)) ? -v2_.f32[i] : v2_.f32[i]; - } - - return simde__m512_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_ps - #define _mm512_abs_ps(v2) simde_mm512_abs_ps(v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_abs_ps(simde__m512 src, simde__mmask16 k, simde__m512 v2) { -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - return _mm512_mask_abs_ps(src, k, v2); -#else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_abs_ps(v2)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_abs_ps(src, k, v2) simde_mm512_mask_abs_ps(src, k, v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_abs_pd(simde__m512d v2) { -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - return _mm512_abs_pd(v2); -#else - simde__m512d_private - r_, - v2_ = simde__m512d_to_private(v2); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = (v2_.f64[i] < INT64_C(0)) ? -v2_.f64[i] : v2_.f64[i]; - } - - return simde__m512d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_abs_pd - #define _mm512_abs_pd(v2) simde_mm512_abs_pd(v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_abs_pd(simde__m512d src, simde__mmask8 k, simde__m512d v2) { -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - return _mm512_mask_abs_pd(src, k, v2); -#else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_abs_pd(v2)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_abs_pd(src, k, v2) simde_mm512_mask_abs_pd(src, k, v2) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi32 - #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi32 - #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi32 - #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_epi64 - #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_epi64 - #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_epi64 - #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_add_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_ps - #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_ps - #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_ps - #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_add_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_add_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_add_pd - #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_add_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_add_pd - #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_add_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_add_pd - #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_epi32 - #define _mm512_and_epi32(a, b) simde_mm512_and_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_and_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_and_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_and_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_epi32 - #define _mm512_mask_and_epi32(src, k, v2, v3) simde_mm512_mask_and_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_and_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_and_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_and_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_epi32 - #define _mm512_maskz_and_epi32(k, a, b) simde_mm512_maskz_and_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] & b_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_epi64 - #define _mm512_and_epi64(a, b) simde_mm512_and_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_and_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_and_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_and_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_and_epi64 - #define _mm512_mask_and_epi64(src, k, a, b) simde_mm512_mask_and_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_and_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_and_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_and_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_and_epi64 - #define _mm512_maskz_and_epi64(k, a, b) simde_mm512_maskz_and_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_and_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_and_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_and_si512 - #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_andnot_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#define simde_mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) -#define simde_mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_andnot_si512 - #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b) - #undef _mm512_andnot_epi32 - #define _mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) - #undef _mm512_andnot_epi64 - #define _mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_andnot_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_andnot_epi32(src, k, a, b); -#else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_andnot_epi32(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_andnot_epi32(src, k, a, b) simde_mm512_mask_andnot_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_andnot_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_andnot_epi32(k, a, b); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_andnot_epi32(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_andnot_epi32(k, a, b) simde_mm512_maskz_andnot_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_andnot_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_andnot_epi64(src, k, a, b); -#else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_andnot_epi64(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_andnot_epi64(src, k, a, b) simde_mm512_mask_andnot_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_andnot_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_andnot_epi64(k, a, b); -#else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_andnot_epi64(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_andnot_epi64(k, a, b) simde_mm512_maskz_andnot_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_epi32(k, a, b); -#else - return simde_mm512_mask_mov_epi32(a, k, b); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_blend_epi32(k, a, b) simde_mm512_mask_blend_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_blend_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_epi64(k, a, b); -#else - return simde_mm512_mask_mov_epi64(a, k, b); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_blend_epi64(k, a, b) simde_mm512_mask_blend_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_blend_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_ps(k, a, b); -#else - return simde_mm512_mask_mov_ps(a, k, b); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_blend_ps(k, a, b) simde_mm512_mask_blend_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_blend_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_blend_pd(k, a, b); -#else - return simde_mm512_mask_mov_pd(a, k, b); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_blend_pd(k, a, b) simde_mm512_mask_blend_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcast_f32x4 (simde__m128 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_f32x4(a); - #else - simde__m512_private r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256[1] = r_.m256[0] = simde_mm256_castsi256_ps(simde_mm256_broadcastsi128_si256(simde_mm_castps_si128(a))); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = a; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f32x4 - #define _mm512_broadcast_f32x4(a) simde_mm512_broadcast_f32x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcast_f32x4(simde__m512 src, simde__mmask16 k, simde__m128 a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_f32x4(src, k, a); -#else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x4(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_broadcast_f32x4(src, k, a) simde_mm512_mask_broadcast_f32x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcast_f32x4(simde__mmask16 k, simde__m128 a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_f32x4(k, a); -#else - return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x4(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_broadcast_f32x4(k, a) simde_mm512_maskz_broadcast_f32x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_broadcast_f64x4 (simde__m256d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_f64x4(a); - #else - simde__m512d_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = a; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_f64x4 - #define _mm512_broadcast_f64x4(a) simde_mm512_broadcast_f64x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_broadcast_f64x4(simde__m512d src, simde__mmask8 k, simde__m256d a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_f64x4(src, k, a); -#else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x4(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_broadcast_f64x4(src, k, a) simde_mm512_mask_broadcast_f64x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_broadcast_f64x4(simde__mmask8 k, simde__m256d a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_f64x4(k, a); -#else - return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x4(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_broadcast_f64x4(k, a) simde_mm512_maskz_broadcast_f64x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcast_i32x4 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_i32x4(a); - #else - simde__m512i_private r_; - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = a; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_i32x4 - #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcast_i32x4(simde__m512i src, simde__mmask16 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_i32x4(src, k, a); -#else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcast_i32x4(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_broadcast_i32x4(src, k, a) simde_mm512_mask_broadcast_i32x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcast_i32x4(simde__mmask16 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_i32x4(k, a); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcast_i32x4(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_broadcast_i32x4(k, a) simde_mm512_maskz_broadcast_i32x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcast_i64x4 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcast_i64x4(a); - #else - simde__m512i_private r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = a; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcast_i64x4 - #define _mm512_broadcast_i64x4(a) simde_mm512_broadcast_i64x4(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcast_i64x4(simde__m512i src, simde__mmask8 k, simde__m256i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcast_i64x4(src, k, a); -#else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcast_i64x4(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_broadcast_i64x4(src, k, a) simde_mm512_mask_broadcast_i64x4(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcast_i64x4(simde__mmask8 k, simde__m256i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcast_i64x4(k, a); -#else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcast_i64x4(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_broadcast_i64x4(k, a) simde_mm512_maskz_broadcast_i64x4(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastd_epi32 (simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastd_epi32(a); -#else - simde__m512i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[0]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastd_epi32 - #define _mm512_broadcastd_epi32(a) simde_mm512_broadcastd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastd_epi32(simde__m512i src, simde__mmask16 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastd_epi32(src, k, a); -#else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcastd_epi32(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_broadcastd_epi32(src, k, a) simde_mm512_mask_broadcastd_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastd_epi32(simde__mmask16 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastd_epi32(k, a); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcastd_epi32(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_broadcastd_epi32(k, a) simde_mm512_maskz_broadcastd_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_broadcastq_epi64 (simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastq_epi64(a); -#else - simde__m512i_private r_; - simde__m128i_private a_= simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[0]; - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastq_epi64 - #define _mm512_broadcastq_epi64(a) simde_mm512_broadcastq_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_broadcastq_epi64(simde__m512i src, simde__mmask8 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastq_epi64(src, k, a); -#else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcastq_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_broadcastq_epi64(src, k, a) simde_mm512_mask_broadcastq_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_broadcastq_epi64(simde__mmask8 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastq_epi64(k, a); -#else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcastq_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_broadcastq_epi64(k, a) simde_mm512_maskz_broadcastq_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_broadcastss_ps (simde__m128 a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastss_ps(a); -#else - simde__m512_private r_; - simde__m128_private a_= simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[0]; - } - - return simde__m512_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastss_ps - #define _mm512_broadcastss_ps(a) simde_mm512_broadcastss_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_broadcastss_ps(simde__m512 src, simde__mmask16 k, simde__m128 a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastss_ps(src, k, a); -#else - simde__m512_private - src_ = simde__m512_to_private(src), - r_; - simde__m128_private - a_ = simde__m128_to_private(a); - - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : src_.f32[i]; - } - - return simde__m512_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_broadcastss_ps(src, k, a) simde_mm512_mask_broadcastss_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_broadcastss_ps(simde__mmask16 k, simde__m128 a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastss_ps(k, a); -#else - simde__m512_private - r_; - simde__m128_private - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : INT32_C(0); - } - - return simde__m512_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_broadcastss_ps(k, a) simde_mm512_maskz_broadcastss_ps(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_broadcastsd_pd (simde__m128d a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_broadcastsd_pd(a); -#else - simde__m512d_private r_; - simde__m128d_private a_= simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[0]; - } - - return simde__m512d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_broadcastsd_pd - #define _mm512_broadcastsd_pd(a) simde_mm512_broadcastsd_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_broadcastsd_pd(simde__m512d src, simde__mmask8 k, simde__m128d a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_broadcastsd_pd(src, k, a); -#else - simde__m512d_private - src_ = simde__m512d_to_private(src), - r_; - simde__m128d_private - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : src_.f64[i]; - } - - return simde__m512d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_broadcastsd_pd(src, k, a) simde_mm512_mask_broadcastsd_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_broadcastsd_pd(simde__mmask8 k, simde__m128d a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_broadcastsd_pd(k, a); -#else - simde__m512d_private - r_; - simde__m128d_private - a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : INT64_C(0); - } - - return simde__m512d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_broadcastsd_pd(k, a) simde_mm512_maskz_broadcastsd_pd(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpeq_epi32_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi32_mask - #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpeq_epi32_mask(k1, a, b); - #else - return simde_mm512_cmpeq_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epi32_mask - #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpeq_epi64_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_epi64_mask - #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpeq_epi64_mask(k1, a, b); - #else - return simde_mm512_cmpeq_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpeq_epi64_mask - #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpgt_epi32_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi32_mask - #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpgt_epi32_mask(k1, a, b); - #else - return simde_mm512_cmpgt_epi32_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpgt_epi32_mask - #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cmpgt_epi64_mask(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]); - } - - return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpgt_epi64_mask - #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cmpgt_epi64_mask(k1, a, b); - #else - return simde_mm512_cmpgt_epi64_mask(a, b) & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cmpgt_epi64_mask - #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8) - SIMDE_REQUIRE_CONSTANT(imm8) - HEDLEY_REQUIRE_MSG(((imm8 >= 0) && (imm8 <= 31)), "imm8 must be one of the SIMDE_CMP_* macros (values: [0, 31])") { - #if defined(SIMDE_X86_AVX512F_NATIVE) - simde__mmask16 r; - SIMDE_CONSTIFY_32_(_mm512_cmp_ps_mask, r, (HEDLEY_UNREACHABLE(), 0), imm8, a, b); - return r; - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - break; - case SIMDE_CMP_LT_OS: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - break; - case SIMDE_CMP_LE_OS: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - break; - case SIMDE_CMP_UNORD_Q: - #if defined(simde_math_isnanf) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_NEQ_UQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - break; - case SIMDE_CMP_NLT_US: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - break; - case SIMDE_CMP_NLE_US: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - break; - case SIMDE_CMP_ORD_Q: - #if defined(simde_math_isnanf) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_EQ_UQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - break; - case SIMDE_CMP_NGE_US: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - break; - case SIMDE_CMP_NGT_US: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - break; - case SIMDE_CMP_FALSE_OQ: - r_ = simde__m512_to_private(simde_mm512_setzero_ps()); - break; - case SIMDE_CMP_NEQ_OQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - break; - case SIMDE_CMP_GE_OS: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - break; - case SIMDE_CMP_GT_OS: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - break; - case SIMDE_CMP_TRUE_UQ: - r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); - break; - case SIMDE_CMP_EQ_OS: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - break; - case SIMDE_CMP_LT_OQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - break; - case SIMDE_CMP_LE_OQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - break; - case SIMDE_CMP_UNORD_S: - #if defined(simde_math_isnanf) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_NEQ_US: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - break; - case SIMDE_CMP_NLT_UQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - break; - case SIMDE_CMP_NLE_UQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - break; - case SIMDE_CMP_ORD_S: - #if defined(simde_math_isnanf) - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_EQ_US: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); - break; - case SIMDE_CMP_NGE_UQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); - break; - case SIMDE_CMP_NGT_UQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); - break; - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512_to_private(simde_mm512_setzero_ps()); - break; - case SIMDE_CMP_NEQ_OS: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); - break; - case SIMDE_CMP_GE_OQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); - break; - case SIMDE_CMP_GT_OQ: - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); - break; - case SIMDE_CMP_TRUE_US: - r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else /* defined(SIMDE_VECTOR_SUBSCRIPT_OPS) */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_LT_OS: - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_LE_OS: - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_UNORD_Q: - #if defined(simde_math_isnanf) - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_NEQ_UQ: - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_NLT_US: - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_NLE_US: - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_ORD_Q: - #if defined(simde_math_isnanf) - r_.u32[i] = (!simde_math_isnanf(a_.f32[i]) && !simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_EQ_UQ: - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_NGE_US: - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_NGT_US: - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_FALSE_OQ: - r_.u32[i] = UINT32_C(0); - break; - case SIMDE_CMP_NEQ_OQ: - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_GE_OS: - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_GT_OS: - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_TRUE_UQ: - r_.u32[i] = ~UINT32_C(0); - break; - case SIMDE_CMP_EQ_OS: - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_LT_OQ: - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_LE_OQ: - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_UNORD_S: - #if defined(simde_math_isnanf) - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_NEQ_US: - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_NLT_UQ: - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_NLE_UQ: - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_ORD_S: - #if defined(simde_math_isnanf) - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_EQ_US: - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_NGE_UQ: - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_NGT_UQ: - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_FALSE_OS: - r_.u32[i] = UINT32_C(0); - break; - case SIMDE_CMP_NEQ_OS: - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_GE_OQ: - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_GT_OQ: - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); - break; - case SIMDE_CMP_TRUE_US: - r_.u32[i] = ~UINT32_C(0); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - } - #endif - - return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_ps_mask - #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8) - SIMDE_REQUIRE_CONSTANT(imm8) - HEDLEY_REQUIRE_MSG(((imm8 >= 0) && (imm8 <= 31)), "imm8 must be one of the SIMDE_CMP_* macros (values: [0, 31])") { - #if defined(SIMDE_X86_AVX512F_NATIVE) - simde__mmask8 r; - SIMDE_CONSTIFY_32_(_mm512_cmp_pd_mask, r, (HEDLEY_UNREACHABLE(), 0), imm8, a, b); - return r; - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - break; - case SIMDE_CMP_LT_OS: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - break; - case SIMDE_CMP_LE_OS: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - break; - case SIMDE_CMP_UNORD_Q: - #if defined(simde_math_isnanf) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_NEQ_UQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - break; - case SIMDE_CMP_NLT_US: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - break; - case SIMDE_CMP_NLE_US: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - break; - case SIMDE_CMP_ORD_Q: - #if defined(simde_math_isnanf) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (!simde_math_isnanf(a_.f64[i]) && !simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_EQ_UQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - break; - case SIMDE_CMP_NGE_US: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - break; - case SIMDE_CMP_NGT_US: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - break; - case SIMDE_CMP_FALSE_OQ: - r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); - break; - case SIMDE_CMP_NEQ_OQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - break; - case SIMDE_CMP_GE_OS: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - break; - case SIMDE_CMP_GT_OS: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - break; - case SIMDE_CMP_TRUE_UQ: - r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); - break; - case SIMDE_CMP_EQ_OS: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - break; - case SIMDE_CMP_LT_OQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - break; - case SIMDE_CMP_LE_OQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - break; - case SIMDE_CMP_UNORD_S: - #if defined(simde_math_isnanf) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_NEQ_US: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - break; - case SIMDE_CMP_NLT_UQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - break; - case SIMDE_CMP_NLE_UQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - break; - case SIMDE_CMP_ORD_S: - #if defined(simde_math_isnanf) - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? UINT64_C(0) : ~UINT64_C(0); - } - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_EQ_US: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); - break; - case SIMDE_CMP_NGE_UQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); - break; - case SIMDE_CMP_NGT_UQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); - break; - case SIMDE_CMP_FALSE_OS: - r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); - break; - case SIMDE_CMP_NEQ_OS: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); - break; - case SIMDE_CMP_GE_OQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); - break; - case SIMDE_CMP_GT_OQ: - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); - break; - case SIMDE_CMP_TRUE_US: - r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - #else /* defined(SIMDE_VECTOR_SUBSCRIPT_OPS) */ - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - switch (imm8) { - case SIMDE_CMP_EQ_OQ: - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_LT_OS: - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_LE_OS: - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_UNORD_Q: - #if defined(simde_math_isnanf) - r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_NEQ_UQ: - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_NLT_US: - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_NLE_US: - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_ORD_Q: - #if defined(simde_math_isnanf) - r_.u64[i] = (!simde_math_isnanf(a_.f64[i]) && !simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_EQ_UQ: - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_NGE_US: - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_NGT_US: - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_FALSE_OQ: - r_.u64[i] = UINT64_C(0); - break; - case SIMDE_CMP_NEQ_OQ: - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_GE_OS: - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_GT_OS: - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_TRUE_UQ: - r_.u64[i] = ~UINT64_C(0); - break; - case SIMDE_CMP_EQ_OS: - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_LT_OQ: - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_LE_OQ: - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_UNORD_S: - #if defined(simde_math_isnanf) - r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_NEQ_US: - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_NLT_UQ: - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_NLE_UQ: - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_ORD_S: - #if defined(simde_math_isnanf) - r_.u64[i] = (simde_math_isnanf(a_.f64[i]) || simde_math_isnanf(b_.f64[i])) ? UINT64_C(0) : ~UINT64_C(0); - #else - HEDLEY_UNREACHABLE(); - #endif - break; - case SIMDE_CMP_EQ_US: - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_NGE_UQ: - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_NGT_UQ: - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_FALSE_OS: - r_.u64[i] = UINT64_C(0); - break; - case SIMDE_CMP_NEQ_OS: - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_GE_OQ: - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_GT_OQ: - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); - break; - case SIMDE_CMP_TRUE_US: - r_.u64[i] = ~UINT64_C(0); - break; - default: - HEDLEY_UNREACHABLE(); - break; - } - } - #endif - - return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_))); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmp_pd_mask - #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmplt_ps_mask (simde__m512 a, simde__m512 b) { - return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_ps_mask - #define _mm512_cmplt_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_cmpeq_ps_mask (simde__m512 a, simde__m512 b) { - return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_ps_mask - #define _mm512_cmpeq_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmplt_pd_mask (simde__m512d a, simde__m512d b) { - return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmplt_pd_mask - #define _mm512_cmplt_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_cmpeq_pd_mask (simde__m512d a, simde__m512d b) { - return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ); -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpeq_pd_mask - #define _mm512_cmpeq_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_cvtepi8_epi32 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi8_epi32(a); - #else - simde__m512i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi8_epi32 - #define _mm512_cvtepi8_epi32(a) simde_mm512_cvtepi8_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_cvtepi8_epi32 (simde__m512i src, simde__mmask16 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtepi8_epi32(src, k, a); -#else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_cvtepi8_epi32(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_cvtepi8_epi32(src, k, a) simde_mm512_mask_cvtepi8_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_cvtepi8_epi32 (simde__mmask16 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtepi8_epi32(k, a); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_cvtepi8_epi32(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtepi8_epi32(k, a) simde_mm512_maskz_cvtepi8_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_cvtepi8_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi8_epi64(a); - #else - simde__m512i_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i8); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i8[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi8_epi64 - #define _mm512_cvtepi8_epi64(a) simde_mm512_cvtepi8_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_cvtepi8_epi64 (simde__m512i src, simde__mmask8 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtepi8_epi64(src, k, a); -#else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_cvtepi8_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_cvtepi8_epi64(src, k, a) simde_mm512_mask_cvtepi8_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_cvtepi8_epi64 (simde__mmask8 k, simde__m128i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtepi8_epi64(k, a); -#else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_cvtepi8_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtepi8_epi64(k, a) simde_mm512_maskz_cvtepi8_epi64(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtepi32_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi32_epi8(a); - #else - simde__m128i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i8, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i32[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi32_epi8 - #define _mm512_cvtepi32_epi8(a) simde_mm512_cvtepi32_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtepi32_epi8 (simde__m128i src, simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtepi32_epi8(src, k, a); -#else - simde__m128i_private r_; - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int8_t, a_.i32[i]) : src_.i8[i]; - } - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_cvtepi32_epi8(src, k, a) simde_mm512_mask_cvtepi32_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtepi32_epi8 (simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtepi32_epi8(k, a); -#else - simde__m128i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int8_t, a_.i32[i]) : INT8_C(0); - } - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtepi32_epi8(k, a) simde_mm512_maskz_cvtepi32_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtepi32_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi32_epi16(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i32); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi32_epi16 - #define _mm512_cvtepi32_epi16(a) simde_mm512_cvtepi32_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtepi32_epi16 (simde__m256i src, simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtepi32_epi16(src, k, a); -#else - simde__m256i_private r_; - simde__m256i_private src_ = simde__m256i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int16_t, a_.i32[i]) : src_.i16[i]; - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_cvtepi32_epi16(src, k, a) simde_mm512_mask_cvtepi32_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtepi32_epi16 (simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtepi32_epi16(k, a); -#else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int16_t, a_.i32[i]) : INT16_C(0); - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtepi32_epi16(k, a) simde_mm512_maskz_cvtepi32_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtepi64_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi64_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].i8, a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi64_epi8 - #define _mm512_cvtepi64_epi8(a) simde_mm512_cvtepi64_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtepi64_epi8 (simde__m128i src, simde__mmask8 k,simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtepi64_epi8(src, k, a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int8_t, a_.i64[i]) : src_.i8[i] ; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtepi64_epi8 - #define _mm512_mask_cvtepi64_epi8(src, k, a) simde_mm512_mask_cvtepi64_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtepi64_epi8 (simde__mmask8 k,simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtepi64_epi8(k, a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int8_t, a_.i64[i]) : INT8_C(0) ; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtepi64_epi8 - #define _mm512_maskz_cvtepi64_epi8(k, a) simde_mm512_maskz_cvtepi64_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtepi64_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi64_epi16(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i16, a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i64[i]); - } - #endif - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi64_epi16 - #define _mm512_cvtepi64_epi16(a) simde_mm512_cvtepi64_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtepi64_epi16 (simde__m128i src, simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtepi64_epi16(src, k, a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int16_t, a_.i64[i]) : src_.i16[i]; - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtepi64_epi16 - #define _mm512_mask_cvtepi64_epi16(src, k, a) simde_mm512_mask_cvtepi64_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtepi64_epi16 (simde__mmask8 k, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtepi64_epi16(k, a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int16_t, a_.i64[i]) : INT16_C(0); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_cvtepi64_epi16 - #define _mm512_maskz_cvtepi64_epi16(k, a) simde_mm512_maskz_cvtepi64_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtepi64_epi32 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtepi64_epi32(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.i64); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i64[i]); - } - #endif - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtepi64_epi32 - #define _mm512_cvtepi64_epi32(a) simde_mm512_cvtepi64_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtepi64_epi32 (simde__m256i src, simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtepi64_epi32(src, k, a); -#else - simde__m256i_private src_= simde__m256i_to_private(src); - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int32_t, a_.i64[i]) : src_.i32[i]; - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_cvtepi64_epi32(src, k, a) simde_mm512_mask_cvtepi64_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtepi64_epi32 (simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtepi64_epi32(k, a); -#else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = ( (k>>i) & 1 ) ? HEDLEY_STATIC_CAST(int32_t, a_.i64[i]) : INT32_C(0); - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtepi64_epi32(k, a) simde_mm512_maskz_cvtepi64_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtsepi32_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi32_epi8(a); - #else - simde__m128i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = - (a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi32_epi8 - #define _mm512_cvtsepi32_epi8(a) simde_mm512_cvtsepi32_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtsepi32_epi8 (simde__m128i src, simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi32_epi8(src, k, a); -#else - simde__m128i_private r_; - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : src_.i8[i] ; - } - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_cvtsepi32_epi8(src, k, a) simde_mm512_mask_cvtsepi32_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtsepi32_epi8 (simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi32_epi8(k, a); -#else - simde__m128i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : INT8_C(0) ; - } - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtsepi32_epi8(k, a) simde_mm512_maskz_cvtsepi32_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtsepi32_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi32_epi16(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = - (a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi32_epi16 - #define _mm512_cvtsepi32_epi16(a) simde_mm512_cvtsepi32_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtsepi32_epi16 (simde__m256i src, simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi32_epi16(src, k, a); -#else - simde__m256i_private r_; - simde__m256i_private src_ = simde__m256i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : src_.i16[i]; - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_cvtsepi32_epi16(src, k, a) simde_mm512_mask_cvtsepi32_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtsepi32_epi16 (simde__mmask16 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi32_epi16(k, a); -#else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = ((k>>i) &1 ) ? - ((a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : INT16_C(0); - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtsepi32_epi16(k, a) simde_mm512_maskz_cvtsepi32_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtsepi64_epi8 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi64_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = - (a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi64_epi8 - #define _mm512_cvtsepi64_epi8(a) simde_mm512_cvtsepi64_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtsepi64_epi8 (simde__m128i src, simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi64_epi8(src, k, a); -#else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]))) : src_.i8[i]; - } - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi64_epi8 - #define _mm512_mask_cvtsepi64_epi8(src, k, a) simde_mm512_mask_cvtsepi64_epi8(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtsepi64_epi8 (simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi64_epi8(k, a); -#else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = ((k>>i) &1 ) ? - ((a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]))) : INT8_C(0); - } - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtsepi64_epi8(k, a) simde_mm512_maskz_cvtsepi64_epi8(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_cvtsepi64_epi16 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi64_epi16(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = - (a_.i64[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i64[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i64[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi64_epi16 - #define _mm512_cvtsepi64_epi16(a) simde_mm512_cvtsepi64_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_mask_cvtsepi64_epi16 (simde__m128i src, simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi64_epi16(src, k, a); -#else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private src_ = simde__m128i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = ((k>>i) & 1) ? - ((a_.i64[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i64[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]))) : src_.i16[i]; - } - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_cvtsepi64_epi16 - #define _mm512_mask_cvtsepi64_epi16(src, k, a) simde_mm512_mask_cvtsepi64_epi16(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_maskz_cvtsepi64_epi16 (simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi64_epi16(k, a); -#else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i16[i] = ((k>>i) & 1) ? - ((a_.i64[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i64[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]))) : INT16_C(0); - } - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtsepi64_epi16(k, a) simde_mm512_maskz_cvtsepi64_epi16(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_cvtsepi64_epi32 (simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_cvtsepi64_epi32(a); - #else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = - (a_.i64[i] < INT32_MIN) - ? (INT32_MIN) - : ((a_.i64[i] > INT32_MAX) - ? (INT32_MAX) - : HEDLEY_STATIC_CAST(int32_t, a_.i64[i])); - } - - return simde__m256i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_cvtsepi64_epi32 - #define _mm512_cvtsepi64_epi32(a) simde_mm512_cvtsepi64_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_mask_cvtsepi64_epi32 (simde__m256i src, simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_cvtsepi64_epi32(src, k, a); -#else - simde__m256i_private r_; - simde__m256i_private src_ = simde__m256i_to_private(src); - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = ((k>>i) & 1) ? - ((a_.i64[i] < INT32_MIN) - ? (INT32_MIN) - : ((a_.i64[i] > INT32_MAX) - ? (INT32_MAX) - : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]))) : src_.i32[i]; - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_mask_cvtsepi64_epi32(src, k, a) simde_mm512_mask_cvtsepi64_epi32(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_maskz_cvtsepi64_epi32 (simde__mmask8 k, simde__m512i a) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_cvtsepi64_epi32(k, a); -#else - simde__m256i_private r_; - simde__m512i_private a_ = simde__m512i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i32[i] = ((k>>i) & 1) ? - ((a_.i64[i] < INT32_MIN) - ? (INT32_MIN) - : ((a_.i64[i] > INT32_MAX) - ? (INT32_MAX) - : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]))) : INT32_C(0); - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -#define _mm512_maskz_cvtsepi64_epi32(k, a) simde_mm512_maskz_cvtsepi64_epi32(k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_div_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_ps - #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_ps - #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_div_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_div_ps - #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_div_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_div_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_div_pd - #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_div_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_div_pd - #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_div_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_div_pd - #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm512_extractf32x4_ps (simde__m512 a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512_private a_ = simde__m512_to_private(a); - - return a_.m128[imm8 & 3]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) - #define simde_mm512_extractf32x4_ps(a, imm8) _mm512_extractf32x4_ps(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extractf32x4_ps - #define _mm512_extractf32x4_ps(a, imm8) simde_mm512_extractf32x4_ps(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) _mm512_mask_extractf32x4_ps(src, k, a, imm8) -#else - #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm_mask_mov_ps(src, k, simde_mm512_extractf32x4_ps(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extractf32x4_ps - #define _mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) _mm512_maskz_extractf32x4_ps(k, a, imm8) -#else - #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm_maskz_mov_ps(k, simde_mm512_extractf32x4_ps(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extractf32x4_ps - #define _mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm512_maskz_extractf32x4_ps(k, a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256d -simde_mm512_extractf64x4_pd (simde__m512d a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - return a_.m256d[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_extractf64x4_pd(a, imm8) _mm512_extractf64x4_pd(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extractf64x4_pd - #define _mm512_extractf64x4_pd(a, imm8) simde_mm512_extractf64x4_pd(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) _mm512_mask_extractf64x4_pd(src, k, a, imm8) -#else - #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm512_extractf64x4_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extractf64x4_pd - #define _mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) _mm512_maskz_extractf64x4_pd(k, a, imm8) -#else - #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm256_maskz_mov_pd(k, simde_mm512_extractf64x4_pd(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extractf64x4_pd - #define _mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm512_maskz_extractf64x4_pd(k, a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm512_extracti32x4_epi32 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - return a_.m128i[imm8 & 3]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_extracti32x4_epi32(a, imm8) _mm512_extracti32x4_epi32(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extracti32x4_epi32 - #define _mm512_extracti32x4_epi32(a, imm8) simde_mm512_extracti32x4_epi32(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) _mm512_mask_extracti32x4_epi32(src, k, a, imm8) -#else - #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm512_extracti32x4_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extracti32x4_epi32 - #define _mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) _mm512_maskz_extracti32x4_epi32(k, a, imm8) -#else - #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm512_extracti32x4_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extracti32x4_epi32 - #define _mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm512_extracti64x4_epi64 (simde__m512i a, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - return a_.m256i[imm8 & 1]; -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_extracti64x4_epi64(a, imm8) _mm512_extracti64x4_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_extracti64x4_epi64 - #define _mm512_extracti64x4_epi64(a, imm8) simde_mm512_extracti64x4_epi64(a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) _mm512_mask_extracti64x4_epi64(src, k, a, imm8) -#else - #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm512_extracti64x4_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_extracti64x4_epi64 - #define _mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) _mm512_maskz_extracti64x4_epi64(k, a, imm8) -#else - #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm512_extracti64x4_epi64(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_extracti64x4_epi64 - #define _mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_fmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fmadd_ps(a, b, c); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - c_ = simde__m512_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_fmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = (a_.f32 * b_.f32) + c_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m32) / sizeof(r_.m32)) ; i++) { - r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fmadd_ps - #define _mm512_fmadd_ps(a, b, c) simde_mm512_fmadd_ps(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_fmadd_ps(simde__m512 a, simde__mmask16 k, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_fmadd_ps(a, k, b, c); - #else - return simde_mm512_mask_mov_ps(a, k, simde_mm512_fmadd_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_fmadd_ps - #define _mm512_mask_fmadd_ps(a, k, b, c) simde_mm512_mask_fmadd_ps(a, k, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_fmadd_ps(simde__mmask16 k, simde__m512 a, simde__m512 b, simde__m512 c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_fmadd_ps(k, a, b, c); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_fmadd_ps(a, b, c)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_fmadd_ps - #define _mm512_maskz_fmadd_ps(k, a, b, c) simde_mm512_maskz_fmadd_ps(k, a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_fmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_fmadd_pd(a, b, c); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - c_ = simde__m512d_to_private(c); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_fmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = (a_.f64 * b_.f64) + c_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m64) / sizeof(r_.m64)) ; i++) { - r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_fmadd_pd - #define _mm512_fmadd_pd(a, b, c) simde_mm512_fmadd_pd(a, b, c) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_insertf32x4 (simde__m512 a, simde__m128 b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512_private a_ = simde__m512_to_private(a); - - a_.m128[imm8 & 3] = b; - - return simde__m512_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_insertf32x4(a, b, imm8) _mm512_insertf32x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf32x4 - #define _mm512_insertf32x4(a, b, imm8) simde_mm512_insertf32x4(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - #define simde_mm512_mask_insertf32x4(src, k, a, b, imm8) _mm512_mask_insertf32x4(src, k, a, b, imm8) -#else - #define simde_mm512_mask_insertf32x4(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_insertf32x4(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf32x4 - #define _mm512_mask_insertf32x4(src, k, a, b, imm8) simde_mm512_mask_insertf32x4(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - #define simde_mm512_maskz_insertf32x4(k, a, b, imm8) _mm512_maskz_insertf32x4(k, a, b, imm8) -#else - #define simde_mm512_maskz_insertf32x4(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_insertf32x4(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf32x4 - #define _mm512_maskz_insertf32x4(k, a, b, imm8) simde_mm512_maskz_insertf32x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_insertf64x4 (simde__m512d a, simde__m256d b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512d_private a_ = simde__m512d_to_private(a); - - a_.m256d[imm8 & 1] = b; - - return simde__m512d_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_insertf64x4(a, b, imm8) _mm512_insertf64x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_insertf64x4 - #define _mm512_insertf64x4(a, b, imm8) simde_mm512_insertf64x4(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_insertf64x4(src, k, a, b, imm8) _mm512_mask_insertf64x4(src, k, a, b, imm8) -#else - #define simde_mm512_mask_insertf64x4(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_insertf64x4(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_insertf64x4 - #define _mm512_mask_insertf64x4(src, k, a, b, imm8) simde_mm512_mask_insertf64x4(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_insertf64x4(k, a, b, imm8) _mm512_maskz_insertf64x4(k, a, b, imm8) -#else - #define simde_mm512_maskz_insertf64x4(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_insertf64x4(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_insertf64x4 - #define _mm512_maskz_insertf64x4(k, a, b, imm8) simde_mm512_maskz_insertf64x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti32x4 (simde__m512i a, simde__m128i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m128i[imm8 & 3] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_inserti32x4(a, b, imm8) _mm512_inserti32x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti32x4 - #define _mm512_inserti32x4(a, b, imm8) simde_mm512_inserti32x4(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - #define simde_mm512_mask_inserti32x4(src, k, a, b, imm8) _mm512_mask_inserti32x4(src, k, a, b, imm8) -#else - #define simde_mm512_mask_inserti32x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_inserti32x4(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti32x4 - #define _mm512_mask_inserti32x4(src, k, a, b, imm8) simde_mm512_mask_inserti32x4(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) - #define simde_mm512_maskz_inserti32x4(k, a, b, imm8) _mm512_maskz_inserti32x4(k, a, b, imm8) -#else - #define simde_mm512_maskz_inserti32x4(k, a, b, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_inserti32x4(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti32x4 - #define _mm512_maskz_inserti32x4(k, a, b, imm8) simde_mm512_maskz_inserti32x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_inserti64x4 (simde__m512i a, simde__m256i b, int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { - simde__m512i_private a_ = simde__m512i_to_private(a); - - a_.m256i[imm8 & 1] = b; - - return simde__m512i_from_private(a_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_inserti64x4(a, b, imm8) _mm512_inserti64x4(a, b, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_inserti64x4 - #define _mm512_inserti64x4(a, b, imm8) simde_mm512_inserti64x4(a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_mask_inserti64x4(src, k, a, b, imm8) _mm512_mask_inserti64x4(src, k, a, b, imm8) -#else - #define simde_mm512_mask_inserti64x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_inserti64x4(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_inserti64x4 - #define _mm512_mask_inserti64x4(src, k, a, b, imm8) simde_mm512_mask_inserti64x4(src, k, a, b, imm8) -#endif - -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_maskz_inserti64x4(k, a, b, imm8) _mm512_maskz_inserti64x4(k, a, b, imm8) -#else - #define simde_mm512_maskz_inserti64x4(k, a, b, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_inserti64x4(a, b, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_inserti64x4 - #define _mm512_maskz_inserti64x4(k, a, b, imm8) simde_mm512_maskz_inserti64x4(k, a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_min_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_min_epi32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epi32 - #define _mm512_min_epi32(a, b) simde_mm512_min_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_min_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epi32 - #define _mm512_mask_min_epi32(src, k, a, b) simde_mm512_mask_min_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_epi32(k, a, b); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_min_epi32(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epi32 - #define _mm512_maskz_min_epi32(k, a, b) simde_mm512_maskz_min_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_min_epu32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_min_epu32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epu32 - #define _mm512_min_epu32(a, b) simde_mm512_min_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_min_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epu32 - #define _mm512_mask_min_epu32(src, k, a, b) simde_mm512_mask_min_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epu32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_epu32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_min_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epu32 - #define _mm512_maskz_min_epu32(k, a, b) simde_mm512_maskz_min_epu32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] < b_.i64[i] ? a_.i64[i] : b_.i64[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epi64 - #define _mm512_min_epi64(a, b) simde_mm512_min_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_min_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epi64 - #define _mm512_mask_min_epi64(src, k, a, b) simde_mm512_mask_min_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_min_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epi64 - #define _mm512_maskz_min_epi64(k, a, b) simde_mm512_maskz_min_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_min_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_epu64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? a_.u64[i] : b_.u64[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_epu64 - #define _mm512_min_epu64(a, b) simde_mm512_min_epu64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_min_epu64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_epu64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_min_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_epu64 - #define _mm512_mask_min_epu64(src, k, a, b) simde_mm512_mask_min_epu64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_min_epu64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_epu64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_min_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_epu64 - #define _mm512_maskz_min_epu64(k, a, b) simde_mm512_maskz_min_epu64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_min_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256[0] = simde_mm256_min_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_min_ps(a_.m256[1], b_.m256[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] < b_.f32[i] ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_ps - #define _mm512_min_ps(a, b) simde_mm512_min_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_min_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_min_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_ps - #define _mm512_mask_min_ps(src, k, a, b) simde_mm512_mask_min_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_min_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_ps(k, a, b); -#else - return simde_mm512_maskz_mov_ps(k, simde_mm512_min_ps(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_ps - #define _mm512_maskz_min_ps(k, a, b) simde_mm512_maskz_min_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_min_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_min_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] < b_.f64[i] ? a_.f64[i] : b_.f64[i]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_min_pd - #define _mm512_min_pd(a, b) simde_mm512_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_min_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_min_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_min_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_min_pd - #define _mm512_mask_min_pd(src, k, a, b) simde_mm512_mask_min_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_min_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_min_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_min_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_min_pd - #define _mm512_maskz_min_pd(k, a, b) simde_mm512_maskz_min_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_max_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_max_epi32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epi32 - #define _mm512_max_epi32(a, b) simde_mm512_max_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_max_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epi32 - #define _mm512_mask_max_epi32(src, k, a, b) simde_mm512_mask_max_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_epi32(k, a, b); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_max_epi32(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epi32 - #define _mm512_maskz_max_epi32(k, a, b) simde_mm512_maskz_max_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_max_epu32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_max_epu32(a_.m256i[1], b_.m256i[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epu32 - #define _mm512_max_epu32(a, b) simde_mm512_max_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_max_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epu32 - #define _mm512_mask_max_epu32(src, k, a, b) simde_mm512_mask_max_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epu32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_epu32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_max_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epu32 - #define _mm512_maskz_max_epu32(k, a, b) simde_mm512_maskz_max_epu32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] > b_.i64[i] ? a_.i64[i] : b_.i64[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epi64 - #define _mm512_max_epi64(a, b) simde_mm512_max_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_max_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epi64 - #define _mm512_mask_max_epi64(src, k, a, b) simde_mm512_mask_max_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_max_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epi64 - #define _mm512_maskz_max_epi64(k, a, b) simde_mm512_maskz_max_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_max_epu64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_epu64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = (a_.u64[i] > b_.u64[i]) ? a_.u64[i] : b_.u64[i]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_epu64 - #define _mm512_max_epu64(a, b) simde_mm512_max_epu64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_max_epu64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_epu64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_max_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_epu64 - #define _mm512_mask_max_epu64(src, k, a, b) simde_mm512_mask_max_epu64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_max_epu64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_epu64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_max_epu64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_epu64 - #define _mm512_maskz_max_epu64(k, a, b) simde_mm512_maskz_max_epu64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_max_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256[0] = simde_mm256_max_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_max_ps(a_.m256[1], b_.m256[1]); - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[i] > b_.f32[i] ? a_.f32[i] : b_.f32[i]; - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_ps - #define _mm512_max_ps(a, b) simde_mm512_max_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_max_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_max_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_ps - #define _mm512_mask_max_ps(src, k, a, b) simde_mm512_mask_max_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_max_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_ps(k, a, b); -#else - return simde_mm512_maskz_mov_ps(k, simde_mm512_max_ps(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_ps - #define _mm512_maskz_max_ps(k, a, b) simde_mm512_maskz_max_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_max_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_max_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[i] > b_.f64[i] ? a_.f64[i] : b_.f64[i]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_max_pd - #define _mm512_max_pd(a, b) simde_mm512_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_max_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_max_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_max_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_max_pd - #define _mm512_mask_max_pd(src, k, a, b) simde_mm512_mask_max_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_max_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_max_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_max_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_max_pd - #define _mm512_maskz_max_pd(k, a, b) simde_mm512_maskz_max_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mul_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_ps - #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_ps - #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_ps - #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mul_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_pd - #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_pd - #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_pd - #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - simde__m512i_private x; - __typeof__(r_.i64) ta, tb; - - /* Get even numbered 32-bit values */ - x.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - /* Cast to 64 bits */ - SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].i32); - SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].i32); - r_.i64 = ta * tb; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]); - } - #endif - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_epi32 - #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_epi32 - #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_epi32 - #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mul_epu32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) - simde__m512i_private x; - __typeof__(r_.u64) ta, tb; - - x.u32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].u32); - SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].u32); - r_.u64 = ta * tb; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mul_epu32 - #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mul_epu32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mul_epu32 - #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mul_epu32(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mul_epu32 - #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mullo_epi32 (simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mullo_epi32(a, b); -#else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); - } - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mullo_epi32 - #define _mm512_mullo_epi32(a, b) simde_mm512_mullo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_mullo_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_mullo_epi32(src, k, a, b); -#else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_mullo_epi32(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_mullo_epi32 - #define _mm512_mask_mullo_epi32(src, k, a, b) simde_mm512_mask_mullo_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_mullo_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { -#if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_mullo_epi32(k, a, b); -#else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_mullo_epi32(a, b)); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_mullo_epi32 - #define _mm512_maskz_mullo_epi32(k, a, b) simde_mm512_maskz_mullo_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 | b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] | b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_epi32 - #define _mm512_or_epi32(a, b) simde_mm512_or_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_or_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_or_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_or_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_epi32 - #define _mm512_mask_or_epi32(src, k, v2, v3) simde_mm512_mask_or_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_or_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_or_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_or_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_epi32 - #define _mm512_maskz_or_epi32(k, a, b) simde_mm512_maskz_or_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_or_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] | b_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_epi64 - #define _mm512_or_epi64(a, b) simde_mm512_or_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_or_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_or_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_or_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_or_epi64 - #define _mm512_mask_or_epi64(src, k, a, b) simde_mm512_mask_or_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_or_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_or_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_or_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_or_epi64 - #define _mm512_maskz_or_epi64(k, a, b) simde_mm512_maskz_or_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_or_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_or_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_or_si512 - #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutexvar_epi32 (simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_epi32(idx, a); - #else - simde__m512i_private - idx_ = simde__m512i_to_private(idx), - a_ = simde__m512i_to_private(a), - r_; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[idx_.i32[i] & 0x0F]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_epi32 - #define _mm512_permutexvar_epi32(idx, a) simde_mm512_permutexvar_epi32(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutexvar_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutexvar_epi32(src, k, idx, a); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_permutexvar_epi32(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_epi32 - #define _mm512_mask_permutexvar_epi32(src, k, idx, a) simde_mm512_mask_permutexvar_epi32(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutexvar_epi32 (simde__mmask16 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutexvar_epi32(k, idx, a); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutexvar_epi32(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_epi32 - #define _mm512_maskz_permutexvar_epi32(k, idx, a) simde_mm512_maskz_permutexvar_epi32(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutexvar_epi64 (simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_epi64(idx, a); - #else - simde__m512i_private - idx_ = simde__m512i_to_private(idx), - a_ = simde__m512i_to_private(a), - r_; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[idx_.i64[i] & 7]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_epi64 - #define _mm512_permutexvar_epi64(idx, a) simde_mm512_permutexvar_epi64(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutexvar_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutexvar_epi64(src, k, idx, a); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_permutexvar_epi64(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_epi64 - #define _mm512_mask_permutexvar_epi64(src, k, idx, a) simde_mm512_mask_permutexvar_epi64(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutexvar_epi64 (simde__mmask8 k, simde__m512i idx, simde__m512i a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutexvar_epi64(k, idx, a); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutexvar_epi64(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_epi64 - #define _mm512_maskz_permutexvar_epi64(k, idx, a) simde_mm512_maskz_permutexvar_epi64(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_permutexvar_pd (simde__m512i idx, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_pd(idx, a); - #else - simde__m512i_private idx_ = simde__m512i_to_private(idx); - simde__m512d_private - a_ = simde__m512d_to_private(a), - r_; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = a_.f64[idx_.i64[i] & 7]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_pd - #define _mm512_permutexvar_pd(idx, a) simde_mm512_permutexvar_pd(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_permutexvar_pd (simde__m512d src, simde__mmask8 k, simde__m512i idx, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutexvar_pd(src, k, idx, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_permutexvar_pd(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_pd - #define _mm512_mask_permutexvar_pd(src, k, idx, a) simde_mm512_mask_permutexvar_pd(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_permutexvar_pd (simde__mmask8 k, simde__m512i idx, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutexvar_pd(k, idx, a); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_permutexvar_pd(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_pd - #define _mm512_maskz_permutexvar_pd(k, idx, a) simde_mm512_maskz_permutexvar_pd(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_permutexvar_ps (simde__m512i idx, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_permutexvar_ps(idx, a); - #else - simde__m512i_private idx_ = simde__m512i_to_private(idx); - simde__m512_private - a_ = simde__m512_to_private(a), - r_; - - #if !defined(__INTEL_COMPILER) - SIMDE_VECTORIZE - #endif - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = a_.f32[idx_.i32[i] & 0x0F]; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutexvar_ps - #define _mm512_permutexvar_ps(idx, a) simde_mm512_permutexvar_ps(idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_permutexvar_ps (simde__m512 src, simde__mmask16 k, simde__m512i idx, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_permutexvar_ps(src, k, idx, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_permutexvar_ps(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutexvar_ps - #define _mm512_mask_permutexvar_ps(src, k, idx, a) simde_mm512_mask_permutexvar_ps(src, k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_permutexvar_ps (simde__mmask16 k, simde__m512i idx, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_permutexvar_ps(k, idx, a); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_permutexvar_ps(idx, a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutexvar_ps - #define _mm512_maskz_permutexvar_ps(k, idx, a) simde_mm512_maskz_permutexvar_ps(k, idx, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_epi32(a, idx, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).i32[idx_.i32[i] & 0x0F]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi32 - #define _mm512_permutex2var_epi32(a, idx, b) simde_mm512_permutex2var_epi32(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi32 (simde__m512i a, simde__mmask16 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_permutex2var_epi32(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi32(a, k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi32 -#define _mm512_mask_permutex2var_epi32(a, k, idx, b) simde_mm512_mask_permutex2var_epi32(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__mmask16 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask2_permutex2var_epi32(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi32(idx, k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi32 -#define _mm512_mask2_permutex2var_epi32(a, idx, k, b) simde_mm512_mask2_permutex2var_epi32(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_permutex2var_epi32(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutex2var_epi32(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi32 -#define _mm512_maskz_permutex2var_epi32(k, a, idx, b) simde_mm512_maskz_permutex2var_epi32(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_epi64(a, idx, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - idx_ = simde__m512i_to_private(idx), - b_ = simde__m512i_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = ((idx_.i64[i] & 0x08) ? b_ : a_).i64[idx_.i64[i] & 0x07]; - } - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_epi64 - #define _mm512_permutex2var_epi64(a, idx, b) simde_mm512_permutex2var_epi64(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_permutex2var_epi64 (simde__m512i a, simde__mmask8 k, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_permutex2var_epi64(a, k, idx, b); - #else - return simde_mm512_mask_mov_epi64(a, k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_epi64 -#define _mm512_mask_permutex2var_epi64(a, k, idx, b) simde_mm512_mask_permutex2var_epi64(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask2_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__mmask8 k, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask2_permutex2var_epi64(a, idx, k, b); - #else - return simde_mm512_mask_mov_epi64(idx, k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_epi64 -#define _mm512_mask2_permutex2var_epi64(a, idx, k, b) simde_mm512_mask2_permutex2var_epi64(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i idx, simde__m512i b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_permutex2var_epi64(k, a, idx, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutex2var_epi64(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_epi64 -#define _mm512_maskz_permutex2var_epi64(k, a, idx, b) simde_mm512_maskz_permutex2var_epi64(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_pd(a, idx, b); - #else - simde__m512i_private idx_ = simde__m512i_to_private(idx); - simde__m512d_private - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = ((idx_.i64[i] & 0x08) ? b_ : a_).f64[idx_.i64[i] & 0x07]; - } - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_pd - #define _mm512_permutex2var_pd(a, idx, b) simde_mm512_permutex2var_pd(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_permutex2var_pd (simde__m512d a, simde__mmask8 k, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_permutex2var_pd(a, k, idx, b); - #else - return simde_mm512_mask_mov_pd(a, k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_pd -#define _mm512_mask_permutex2var_pd(a, k, idx, b) simde_mm512_mask_permutex2var_pd(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask2_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__mmask8 k, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask2_permutex2var_pd(a, idx, k, b); - #else - return simde_mm512_mask_mov_pd(simde_mm512_castsi512_pd(idx), k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_pd -#define _mm512_mask2_permutex2var_pd(a, idx, k, b) simde_mm512_mask2_permutex2var_pd(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_permutex2var_pd (simde__mmask8 k, simde__m512d a, simde__m512i idx, simde__m512d b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_permutex2var_pd(k, a, idx, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_permutex2var_pd(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_pd -#define _mm512_maskz_permutex2var_pd(k, a, idx, b) simde_mm512_maskz_permutex2var_pd(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_permutex2var_ps(a, idx, b); - #else - simde__m512i_private idx_ = simde__m512i_to_private(idx); - simde__m512_private - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b), - r_; - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).f32[idx_.i32[i] & 0x0F]; - } - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_permutex2var_ps - #define _mm512_permutex2var_ps(a, idx, b) simde_mm512_permutex2var_ps(a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_permutex2var_ps (simde__m512 a, simde__mmask16 k, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask_permutex2var_ps(a, k, idx, b); - #else - return simde_mm512_mask_mov_ps(a, k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_permutex2var_ps -#define _mm512_mask_permutex2var_ps(a, k, idx, b) simde_mm512_mask_permutex2var_ps(a, k, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask2_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__mmask16 k, simde__m512 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_mask2_permutex2var_ps(a, idx, k, b); - #else - return simde_mm512_mask_mov_ps(simde_mm512_castsi512_ps(idx), k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask2_permutex2var_ps -#define _mm512_mask2_permutex2var_ps(a, idx, k, b) simde_mm512_mask2_permutex2var_ps(a, idx, k, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_permutex2var_ps (simde__mmask16 k, simde__m512 a, simde__m512i idx, simde__m512 b) { - #if defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm512_maskz_permutex2var_ps(k, a, idx, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_permutex2var_ps(a, idx, b)); - #endif -} -#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_permutex2var_ps -#define _mm512_maskz_permutex2var_ps(k, a, idx, b) simde_mm512_maskz_permutex2var_ps(k, a, idx, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sqrt_ps (simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sqrt_ps(a); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256[0] = simde_mm256_sqrt_ps(a_.m256[0]); - r_.m256[1] = simde_mm256_sqrt_ps(a_.m256[1]); - #elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_sqrt_ps(a) simde_mm512_sqrt_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sqrt_ps(src, k, a); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sqrt_ps(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sqrt_ps - #define _mm512_mask_sqrt_ps(src, k, a) simde_mm512_mask_sqrt_ps(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sqrt_pd (simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sqrt_pd(a); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a); - - #if defined(SIMDE_X86_AVX_NATIVE) - r_.m256d[0] = simde_mm256_sqrt_pd(a_.m256d[0]); - r_.m256d[1] = simde_mm256_sqrt_pd(a_.m256d[1]); - #elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } - #else - HEDLEY_UNREACHABLE(); - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) -# define _mm512_sqrt_pd(a) simde_mm512_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sqrt_pd(src, k, a); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sqrt_pd(a)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sqrt_pd - #define _mm512_mask_sqrt_pd(src, k, a) simde_mm512_mask_sqrt_pd(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi32 - #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi32 - #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi32 - #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]); - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_epi64 - #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_epi64 - #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_epi64 - #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_sub_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { - r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]); - } - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_ps - #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_ps - #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_ps - #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_sub_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_sub_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { - r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]); - } - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_sub_pd - #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_sub_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_sub_pd - #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_sub_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_sub_pd - #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi32 (simde__m512i a, unsigned int imm8) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff". However in - * practice all bits are used. */ - if (imm8 > 31) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_slli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_slli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_slli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_slli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_slli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_slli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] << imm8; - } - #endif - } - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_slli_epi32(a, imm8) SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi32(a, imm8)) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi32 - #define _mm512_slli_epi32(a, imm8) simde_mm512_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_slli_epi64 (simde__m512i a, unsigned int imm8) { - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff". However in - * practice all bits are used. */ - if (imm8 > 63) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_slli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_slli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_slli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_slli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_slli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_slli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 << imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] << imm8; - } - #endif - } - - return simde__m512i_from_private(r_); -} -#if defined(SIMDE_X86_AVX512F_NATIVE) - #define simde_mm512_slli_epi64(a, imm8) _mm512_slli_epi64(a, imm8) -#endif -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_slli_epi64 - #define _mm512_slli_epi64(a, imm8) simde_mm512_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi32(a, imm8)); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #else - if (imm8 > 31) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] >> imm8; - } - #endif - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi32 - #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_srli_epi64(a, imm8); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); - r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); - #else - /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are - * used. In this case we should do "imm8 &= 0xff" here. However in - * practice all bits are used. */ - if (imm8 > 63) { - simde_memset(&r_, 0, sizeof(r_)); - } else { - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> imm8; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } - #endif - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_srli_epi64 - #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask16 -simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_test_epi32_mask(k1, a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask16 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); - } - - return r & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi32_mask - #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__mmask8 -simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_test_epi64_mask(k1, a, b); - #else - simde__m512i_private - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - simde__mmask8 r = 0; - - SIMDE_VECTORIZE_REDUCTION(|:r) - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r |= !!(a_.i64[i] & b_.i64[i]) << i; - } - - return r & k1; - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_test_epi64_mask - #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpacklo_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpacklo_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, - 0, 16, 1 , 17, 4, 20, 5, 21, - 8, 24, 9, 25, 12, 28, 13, 29); - #else - r_.m256i[0] = simde_mm256_unpacklo_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpacklo_epi32(a_.m256i[1], b_.m256i[1]); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_epi32 - #define _mm512_unpacklo_epi32(a, b) simde_mm512_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpacklo_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpacklo_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_unpacklo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_epi32 - #define _mm512_mask_unpacklo_epi32(src, k, a, b) simde_mm512_mask_unpacklo_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpacklo_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpacklo_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_unpacklo_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_epi32 - #define _mm512_maskz_unpacklo_epi32(k, a, b) simde_mm512_maskz_unpacklo_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpacklo_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpacklo_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.i64, b_.i64, 0, 8, 2, 10, 4, 12, 6, 14); - #else - r_.m256i[0] = simde_mm256_unpacklo_epi64(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpacklo_epi64(a_.m256i[1], b_.m256i[1]); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_epi64 - #define _mm512_unpacklo_epi64(a, b) simde_mm512_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpacklo_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpacklo_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_unpacklo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_epi64 - #define _mm512_mask_unpacklo_epi64(src, k, a, b) simde_mm512_mask_unpacklo_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpacklo_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpacklo_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_unpacklo_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_epi64 - #define _mm512_maskz_unpacklo_epi64(k, a, b) simde_mm512_maskz_unpacklo_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_unpacklo_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpacklo_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.f32, b_.f32, - 0, 16, 1 , 17, 4, 20, 5, 21, - 8, 24, 9, 25, 12, 28, 13, 29); - #else - r_.m256[0] = simde_mm256_unpacklo_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_unpacklo_ps(a_.m256[1], b_.m256[1]); - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_ps - #define _mm512_unpacklo_ps(a, b) simde_mm512_unpacklo_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_unpacklo_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpacklo_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_unpacklo_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_ps - #define _mm512_mask_unpacklo_ps(src, k, a, b) simde_mm512_mask_unpacklo_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_unpacklo_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpacklo_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_unpacklo_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_ps - #define _mm512_maskz_unpacklo_ps(k, a, b) simde_mm512_maskz_unpacklo_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_unpacklo_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpacklo_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.f64, b_.f64, 0, 8, 2, 10, 4, 12, 6, 14); - #else - r_.m256d[0] = simde_mm256_unpacklo_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_unpacklo_pd(a_.m256d[1], b_.m256d[1]); - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpacklo_pd - #define _mm512_unpacklo_pd(a, b) simde_mm512_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_unpacklo_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpacklo_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_unpacklo_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpacklo_pd - #define _mm512_mask_unpacklo_pd(src, k, a, b) simde_mm512_mask_unpacklo_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_unpacklo_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpacklo_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_unpacklo_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpacklo_pd - #define _mm512_maskz_unpacklo_pd(k, a, b) simde_mm512_maskz_unpacklo_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpackhi_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpackhi_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, - 2, 18, 3 , 19, 6, 22, 7, 23, - 10, 26, 11, 27, 14, 30, 15, 31); - #else - r_.m256i[0] = simde_mm256_unpackhi_epi32(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpackhi_epi32(a_.m256i[1], b_.m256i[1]); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_epi32 - #define _mm512_unpackhi_epi32(a, b) simde_mm512_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpackhi_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpackhi_epi32(src, k, a, b); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_unpackhi_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_epi32 - #define _mm512_mask_unpackhi_epi32(src, k, a, b) simde_mm512_mask_unpackhi_epi32(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpackhi_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpackhi_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_unpackhi_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_epi32 - #define _mm512_maskz_unpackhi_epi32(k, a, b) simde_mm512_maskz_unpackhi_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_unpackhi_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpackhi_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.i64, b_.i64, 1, 9, 3, 11, 5, 13, 7, 15); - #else - r_.m256i[0] = simde_mm256_unpackhi_epi64(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_unpackhi_epi64(a_.m256i[1], b_.m256i[1]); - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_epi64 - #define _mm512_unpackhi_epi64(a, b) simde_mm512_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_unpackhi_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpackhi_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_unpackhi_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_epi64 - #define _mm512_mask_unpackhi_epi64(src, k, a, b) simde_mm512_mask_unpackhi_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_unpackhi_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpackhi_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_unpackhi_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_epi64 - #define _mm512_maskz_unpackhi_epi64(k, a, b) simde_mm512_maskz_unpackhi_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_unpackhi_ps (simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpackhi_ps(a, b); - #else - simde__m512_private - r_, - a_ = simde__m512_to_private(a), - b_ = simde__m512_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.f32, b_.f32, - 2, 18, 3 , 19, 6, 22, 7, 23, - 10, 26, 11, 27, 14, 30, 15, 31); - #else - r_.m256[0] = simde_mm256_unpackhi_ps(a_.m256[0], b_.m256[0]); - r_.m256[1] = simde_mm256_unpackhi_ps(a_.m256[1], b_.m256[1]); - #endif - - return simde__m512_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_ps - #define _mm512_unpackhi_ps(a, b) simde_mm512_unpackhi_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_mask_unpackhi_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpackhi_ps(src, k, a, b); - #else - return simde_mm512_mask_mov_ps(src, k, simde_mm512_unpackhi_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_ps - #define _mm512_mask_unpackhi_ps(src, k, a, b) simde_mm512_mask_unpackhi_ps(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512 -simde_mm512_maskz_unpackhi_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpackhi_ps(k, a, b); - #else - return simde_mm512_maskz_mov_ps(k, simde_mm512_unpackhi_ps(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_ps - #define _mm512_maskz_unpackhi_ps(k, a, b) simde_mm512_maskz_unpackhi_ps(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_unpackhi_pd (simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_unpackhi_pd(a, b); - #else - simde__m512d_private - r_, - a_ = simde__m512d_to_private(a), - b_ = simde__m512d_to_private(b); - - #if defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.f64, b_.f64, 1, 9, 3, 11, 5, 13, 7, 15); - #else - r_.m256d[0] = simde_mm256_unpackhi_pd(a_.m256d[0], b_.m256d[0]); - r_.m256d[1] = simde_mm256_unpackhi_pd(a_.m256d[1], b_.m256d[1]); - #endif - - return simde__m512d_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_unpackhi_pd - #define _mm512_unpackhi_pd(a, b) simde_mm512_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_mask_unpackhi_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_unpackhi_pd(src, k, a, b); - #else - return simde_mm512_mask_mov_pd(src, k, simde_mm512_unpackhi_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_unpackhi_pd - #define _mm512_mask_unpackhi_pd(src, k, a, b) simde_mm512_mask_unpackhi_pd(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512d -simde_mm512_maskz_unpackhi_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_unpackhi_pd(k, a, b); - #else - return simde_mm512_maskz_mov_pd(k, simde_mm512_unpackhi_pd(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_unpackhi_pd - #define _mm512_maskz_unpackhi_pd(k, a, b) simde_mm512_maskz_unpackhi_pd(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_epi32 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_epi32(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ b_.i32; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] ^ b_.i32[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_epi32 - #define _mm512_xor_epi32(a, b) simde_mm512_xor_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_xor_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_xor_epi32(src, k, v2, v3); - #else - return simde_mm512_mask_mov_epi32(src, k, simde_mm512_xor_epi32(v2, v3)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_epi32 - #define _mm512_mask_xor_epi32(src, k, v2, v3) simde_mm512_mask_xor_epi32(src, k, v2, v3) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_xor_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_xor_epi32(k, a, b); - #else - return simde_mm512_maskz_mov_epi32(k, simde_mm512_xor_epi32(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_epi32 - #define _mm512_maskz_xor_epi32(k, a, b) simde_mm512_maskz_xor_epi32(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_epi64 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_epi64(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) - for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { - r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); - } - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ b_.i64; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] ^ b_.i64[i]; - } - #endif - - return simde__m512i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_epi64 - #define _mm512_xor_epi64(a, b) simde_mm512_xor_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_mask_xor_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_mask_xor_epi64(src, k, a, b); - #else - return simde_mm512_mask_mov_epi64(src, k, simde_mm512_xor_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_mask_xor_epi64 - #define _mm512_mask_xor_epi64(src, k, a, b) simde_mm512_mask_xor_epi64(src, k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_maskz_xor_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_maskz_xor_epi64(k, a, b); - #else - return simde_mm512_maskz_mov_epi64(k, simde_mm512_xor_epi64(a, b)); - #endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_maskz_xor_epi64 - #define _mm512_maskz_xor_epi64(k, a, b) simde_mm512_maskz_xor_epi64(k, a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m512i -simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) { - #if defined(SIMDE_X86_AVX512F_NATIVE) - return _mm512_xor_si512(a, b); - #else - simde__m512i_private - r_, - a_ = simde__m512i_to_private(a), - b_ = simde__m512i_to_private(b); - - #if defined(SIMDE_X86_AVX2_NATIVE) - r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]); - r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]); - #elif defined(SIMDE_X86_SSE2_NATIVE) - r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); - r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); - r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]); - r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } - #endif - - return simde__m512i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm512_xor_si512 - #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512F_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512.h 2021-04-17 01:19:49.000000000 +0000 @@ -55,6 +55,7 @@ #include "avx512/fnmadd.h" #include "avx512/fnmsub.h" #include "avx512/insert.h" +#include "avx512/kshift.h" #include "avx512/load.h" #include "avx512/loadu.h" #include "avx512/lzcnt.h" diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512vl.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512vl.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx512vl.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx512vl.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,307 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - * 2020 Himanshi Mathur - * 2020 Christopher Moore - */ - -#if !defined(SIMDE_X86_AVX512VL_H) -#define SIMDE_X86_AVX512VL_H - -#include "avx512f.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsepi16_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cvtsepi16_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { - r_.i8[i] = - (a_.i16[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i16[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtsepi16_epi8 - #define _mm_cvtsepi16_epi8(a) simde_mm_cvtsepi16_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtsepi16_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cvtsepi16_epi8(a); - #else - simde__m128i_private r_; - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = - (a_.i16[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i16[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsepi16_epi8 - #define _mm256_cvtsepi16_epi8(a) simde_mm256_cvtsepi16_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsepi32_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cvtsepi32_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = - (a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtsepi32_epi8 - #define _mm_cvtsepi32_epi8(a) simde_mm_cvtsepi32_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtsepi32_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cvtsepi32_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i8[i] = - (a_.i32[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i32[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsepi32_epi8 - #define _mm256_cvtsepi32_epi8(a) simde_mm256_cvtsepi32_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsepi32_epi16 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cvtsepi32_epi16(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = - (a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtsepi32_epi16 - #define _mm_cvtsepi32_epi16(a) simde_mm_cvtsepi32_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtsepi32_epi16 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cvtsepi32_epi16(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { - r_.i16[i] = - (a_.i32[i] < INT16_MIN) - ? (INT16_MIN) - : ((a_.i32[i] > INT16_MAX) - ? (INT16_MAX) - : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsepi32_epi16 - #define _mm256_cvtsepi32_epi16(a) simde_mm256_cvtsepi32_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm_cvtsepi64_epi8 (simde__m128i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm_cvtsepi64_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m128i_private a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = - (a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm_cvtsepi64_epi8 - #define _mm_cvtsepi64_epi8(a) simde_mm_cvtsepi64_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i -simde_mm256_cvtsepi64_epi8 (simde__m256i a) { - #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) - return _mm256_cvtsepi64_epi8(a); - #else - simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - simde__m256i_private a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { - r_.i8[i] = - (a_.i64[i] < INT8_MIN) - ? (INT8_MIN) - : ((a_.i64[i] > INT8_MAX) - ? (INT8_MAX) - : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); - } - - return simde__m128i_from_private(r_); - #endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_cvtsepi64_epi8 - #define _mm256_cvtsepi64_epi8(a) simde_mm256_cvtsepi64_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_abs_epi64(simde__m256i a) { -#if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_abs_epi64(a); -#else - simde__m256i_private - r_, - a_ = simde__m256i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; - } - - return simde__m256i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm256_abs_epi64 - #define _mm256_abs_epi64(a) simde_mm256_abs_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_mask_abs_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a) { -#if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_mask_abs_epi64(src, k, a); -#else - return simde_mm256_mask_mov_epi64(src, k, simde_mm256_abs_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) -#define _mm256_mask_abs_epi64(src, k, a) simde_mm256_mask_abs_epi64(src, k, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m256i -simde_mm256_maskz_abs_epi64(simde__mmask8 k, simde__m256i a) { -#if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm256_maskz_abs_epi64(k, a); -#else - return simde_mm256_maskz_mov_epi64(k, simde_mm256_abs_epi64(a)); -#endif -} -#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) -#define _mm256_maskz_abs_epi64(k, a) simde_mm256_maskz_abs_epi64(k, a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_AVX512VL_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/avx.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/avx.h 2021-04-17 01:19:49.000000000 +0000 @@ -22,6 +22,7 @@ * * Copyright: * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe */ #include "sse.h" @@ -513,9 +514,8 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = ~a_.i32; #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_x_mm_not_ps(a_.m128[i]); - } + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { @@ -549,9 +549,8 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_x_mm_select_ps(a_.m128[i], b_.m128[i], mask_.m128[i]); - } + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { @@ -573,9 +572,8 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = ~a_.i64; #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_x_mm_not_pd(a_.m128d[i]); - } + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { @@ -609,9 +607,8 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_x_mm_select_pd(a_.m128d[i], b_.m128d[i], mask_.m128d[i]); - } + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { @@ -1258,9 +1255,8 @@ b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for(size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[i], b_.m128i[i]); - } + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); #else @@ -1286,9 +1282,8 @@ b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for(size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[i], b_.m128i[i]); - } + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); #else @@ -1314,9 +1309,8 @@ b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for(size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[i], b_.m128i[i]); - } + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); #else @@ -1342,9 +1336,8 @@ b_ = simde__m256i_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for(size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { - r_.m128i[i] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[i], b_.m128i[i]); - } + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); #else @@ -1370,9 +1363,8 @@ b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_x_mm_deinterleaveeven_ps(a_.m128[i], b_.m128[i]); - } + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); #else @@ -1398,9 +1390,8 @@ b_ = simde__m256_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { - r_.m128[i] = simde_x_mm_deinterleaveodd_ps(a_.m128[i], b_.m128[i]); - } + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); #else @@ -1426,9 +1417,8 @@ b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_x_mm_deinterleaveeven_pd(a_.m128d[i], b_.m128d[i]); - } + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); #else @@ -1454,9 +1444,8 @@ b_ = simde__m256d_to_private(b); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { - r_.m128d[i] = simde_x_mm_deinterleaveodd_pd(a_.m128d[i], b_.m128d[i]); - } + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); #else @@ -4328,7 +4317,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m256 simde_mm256_moveldup_ps (simde__m256 a) { - #if defined(SIMDE_X86_AVX_NATIVE) && 0 + #if defined(SIMDE_X86_AVX_NATIVE) return _mm256_moveldup_ps(a); #else simde__m256_private @@ -4786,9 +4775,8 @@ a_ = simde__m256_to_private(a); #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) - for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128_private[0])) ; i++) { - r_.m128[i] = simde_mm_rcp_ps(a_.m128[i]); - } + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/clmul.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/clmul.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/clmul.h 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/clmul.h 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,414 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2016 Thomas Pornin + */ + +/* The portable version is based on the implementation in BearSSL, + * which is MIT licensed, constant-time / branch-free, and documented + * at https://www.bearssl.org/constanttime.html (specifically, we use + * the implementation from ghash_ctmul64.c). */ + +#if !defined(SIMDE_X86_CLMUL_H) +#define SIMDE_X86_CLMUL_H + +#include "avx512/set.h" +#include "avx512/setzero.h" + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_x_clmul_u64(uint64_t x, uint64_t y) { + uint64_t x0, x1, x2, x3; + uint64_t y0, y1, y2, y3; + uint64_t z0, z1, z2, z3; + + x0 = x & UINT64_C(0x1111111111111111); + x1 = x & UINT64_C(0x2222222222222222); + x2 = x & UINT64_C(0x4444444444444444); + x3 = x & UINT64_C(0x8888888888888888); + y0 = y & UINT64_C(0x1111111111111111); + y1 = y & UINT64_C(0x2222222222222222); + y2 = y & UINT64_C(0x4444444444444444); + y3 = y & UINT64_C(0x8888888888888888); + + z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1); + z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2); + z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3); + z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0); + + z0 &= UINT64_C(0x1111111111111111); + z1 &= UINT64_C(0x2222222222222222); + z2 &= UINT64_C(0x4444444444444444); + z3 &= UINT64_C(0x8888888888888888); + + return z0 | z1 | z2 | z3; +} + +static uint64_t +simde_x_bitreverse_u64(uint64_t v) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t bytes = vreinterpret_u8_u64(vmov_n_u64(v)); + bytes = vrbit_u8(bytes); + bytes = vrev64_u8(bytes); + return vget_lane_u64(vreinterpret_u64_u8(bytes), 0); + #elif defined(SIMDE_X86_GFNI_NATIVE) + /* I don't think there is (or likely will ever be) a CPU with GFNI + * but not pclmulq, but this may be useful for things other than + * _mm_clmulepi64_si128. */ + __m128i vec = _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, v)); + + /* Reverse bits within each byte */ + vec = _mm_gf2p8affine_epi64_epi8(vec, _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); + + /* Reverse bytes */ + #if defined(SIMDE_X86_SSSE3_NATIVE) + vec = _mm_shuffle_epi8(vec, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + #else + vec = _mm_or_si128(_mm_slli_epi16(vec, 8), _mm_srli_epi16(vec, 8)); + vec = _mm_shufflelo_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3)); + vec = _mm_shufflehi_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3)); + #endif + + return HEDLEY_STATIC_CAST(uint64_t, _mm_cvtsi128_si64(vec)); + #elif HEDLEY_HAS_BUILTIN(__builtin_bitreverse64) + return __builtin_bitreverse64(v); + #else + v = ((v >> 1) & UINT64_C(0x5555555555555555)) | ((v & UINT64_C(0x5555555555555555)) << 1); + v = ((v >> 2) & UINT64_C(0x3333333333333333)) | ((v & UINT64_C(0x3333333333333333)) << 2); + v = ((v >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((v & UINT64_C(0x0F0F0F0F0F0F0F0F)) << 4); + v = ((v >> 8) & UINT64_C(0x00FF00FF00FF00FF)) | ((v & UINT64_C(0x00FF00FF00FF00FF)) << 8); + v = ((v >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((v & UINT64_C(0x0000FFFF0000FFFF)) << 16); + return (v >> 32) | (v << 32); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_clmulepi64_si128 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT(imm8) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_AES) + uint64x1_t A = ((imm8) & 0x01) ? vget_high_u64(a_.neon_u64) : vget_low_u64(a_.neon_u64); + uint64x1_t B = ((imm8) & 0x10) ? vget_high_u64(b_.neon_u64) : vget_low_u64(b_.neon_u64); + #if defined(SIMDE_BUG_CLANG_48257) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif + poly64_t A_ = vget_lane_p64(vreinterpret_p64_u64(A), 0); + poly64_t B_ = vget_lane_p64(vreinterpret_p64_u64(B), 0); + #if defined(SIMDE_BUG_CLANG_48257) + HEDLEY_DIAGNOSTIC_POP + #endif + poly128_t R = vmull_p64(A_, B_); + r_.neon_u64 = vreinterpretq_u64_p128(R); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #if defined(SIMDE_SHUFFLE_VECTOR_) + switch (imm8 & 0x11) { + case 0x00: + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0); + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0); + break; + case 0x01: + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0); + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1); + break; + case 0x10: + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1); + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0); + break; + case 0x11: + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1); + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1); + break; + } + #else + { + const uint64_t A = a_.u64[(imm8 ) & 1]; + const uint64_t B = b_.u64[(imm8 >> 4) & 1]; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + a_.u64[i] = A; + b_.u64[i] = B; + } + } + #endif + + simde__m128i_private reversed_; + { + #if defined(SIMDE_SHUFFLE_VECTOR_) + reversed_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, b_.u64, 1, 3); + #else + reversed_.u64[0] = a_.u64[1]; + reversed_.u64[1] = b_.u64[1]; + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) { + reversed_.u64[i] = simde_x_bitreverse_u64(reversed_.u64[i]); + } + } + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, reversed_.u64, 0, 2); + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, reversed_.u64, 1, 3); + #else + a_.u64[1] = reversed_.u64[0]; + b_.u64[1] = reversed_.u64[1]; + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) { + r_.u64[i] = simde_x_clmul_u64(a_.u64[i], b_.u64[i]); + } + + r_.u64[1] = simde_x_bitreverse_u64(r_.u64[1]) >> 1; + #else + r_.u64[0] = simde_x_clmul_u64( a_.u64[imm8 & 1], b_.u64[(imm8 >> 4) & 1]); + r_.u64[1] = simde_x_bitreverse_u64(simde_x_clmul_u64(simde_x_bitreverse_u64(a_.u64[imm8 & 1]), simde_x_bitreverse_u64(b_.u64[(imm8 >> 4) & 1]))) >> 1; + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_PCLMUL_NATIVE) + #define simde_mm_clmulepi64_si128(a, b, imm8) _mm_clmulepi64_si128(a, b, imm8) +#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_AES) + #define simde_mm_clmulepi64_si128(a, b, imm8) \ + simde__m128i_from_neon_u64( \ + vreinterpretq_u64_p128( \ + vmull_p64( \ + vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(a)), (imm8 ) & 1), \ + vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(b)), (imm8 >> 4) & 1) \ + ) \ + ) \ + ) +#endif +#if defined(SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES) + #undef _mm_clmulepi64_si128 + #define _mm_clmulepi64_si128(a, b, imm8) simde_mm_clmulepi64_si128(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_clmulepi64_epi128 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT(imm8) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X86_PCLMUL_NATIVE) + switch (imm8 & 0x11) { + case 0x00: + r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x00); + r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x00); + break; + case 0x01: + r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x01); + r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x01); + break; + case 0x10: + r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x10); + r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x10); + break; + case 0x11: + r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x11); + r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x11); + break; + } + #else + simde__m128i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) + switch (imm8 & 0x01) { + case 0x00: + a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2); + break; + case 0x01: + a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3); + break; + } + switch (imm8 & 0x10) { + case 0x00: + b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2); + break; + case 0x10: + b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3); + break; + } + #else + a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0]; + a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2]; + b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0]; + b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2]; + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) { + a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]); + b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]); + + r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]); + r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]); + + r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1; + } + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) + r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 2, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_ = simde__m256i_to_private(simde_mm256_set_m128i(simde__m128i_from_private(r_hi_), simde__m128i_from_private(r_lo_))); + r_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 32, r_.u64, r_.u64, 0, 2, 1, 3); + #else + r_.u64[0] = r_lo_.u64[0]; + r_.u64[1] = r_hi_.u64[0]; + r_.u64[2] = r_lo_.u64[1]; + r_.u64[3] = r_hi_.u64[1]; + #endif + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_clmulepi64_epi128(a, b, imm8) _mm256_clmulepi64_epi128(a, b, imm8) +#endif +#if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_clmulepi64_epi128 + #define _mm256_clmulepi64_epi128(a, b, imm8) simde_mm256_clmulepi64_epi128(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_clmulepi64_epi128 (simde__m512i a, simde__m512i b, const int imm8) + SIMDE_REQUIRE_CONSTANT(imm8) { + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(HEDLEY_MSVC_VERSION) + r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); + #endif + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + switch (imm8 & 0x11) { + case 0x00: + r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x00); + r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x00); + break; + case 0x01: + r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x01); + r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x01); + break; + case 0x10: + r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x10); + r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x10); + break; + case 0x11: + r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x11); + r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x11); + break; + } + #else + simde__m256i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) + switch (imm8 & 0x01) { + case 0x00: + a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2, 4, 6); + break; + case 0x01: + a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3, 5, 7); + break; + } + switch (imm8 & 0x10) { + case 0x00: + b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2, 4, 6); + break; + case 0x10: + b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3, 5, 7); + break; + } + #else + a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0]; + a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2]; + a_lo_.u64[2] = a_.u64[((imm8 >> 0) & 1) + 4]; + a_lo_.u64[3] = a_.u64[((imm8 >> 0) & 1) + 6]; + b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0]; + b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2]; + b_lo_.u64[2] = b_.u64[((imm8 >> 4) & 1) + 4]; + b_lo_.u64[3] = b_.u64[((imm8 >> 4) & 1) + 6]; + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) { + a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]); + b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]); + + r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]); + r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]); + + r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1; + } + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) + r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 4, 1, 5, 2, 6, 3, 7); + #else + r_.u64[0] = r_lo_.u64[0]; + r_.u64[1] = r_hi_.u64[0]; + r_.u64[2] = r_lo_.u64[1]; + r_.u64[3] = r_hi_.u64[1]; + r_.u64[4] = r_lo_.u64[2]; + r_.u64[5] = r_hi_.u64[2]; + r_.u64[6] = r_lo_.u64[3]; + r_.u64[7] = r_hi_.u64[3]; + #endif + #endif + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define simde_mm512_clmulepi64_epi128(a, b, imm8) _mm512_clmulepi64_epi128(a, b, imm8) +#endif +#if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_clmulepi64_epi128 + #define _mm512_clmulepi64_epi128(a, b, imm8) simde_mm512_clmulepi64_epi128(a, b, imm8) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_CLMUL_H) */ diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/gfni.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/gfni.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/gfni.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/gfni.h 2021-04-17 01:19:49.000000000 +0000 @@ -339,7 +339,7 @@ SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) +#if defined(SIMDE_X86_GFNI_NATIVE) #define simde_mm_gf2p8affine_epi64_epi8(x, A, b) _mm_gf2p8affine_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) @@ -353,7 +353,7 @@ SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_gf2p8affine_epi64_epi8(x, A, b) _mm256_gf2p8affine_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) @@ -441,7 +441,7 @@ SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) +#if defined(SIMDE_X86_GFNI_NATIVE) #define simde_mm_gf2p8affineinv_epi64_epi8(x, A, b) _mm_gf2p8affineinv_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) @@ -455,7 +455,7 @@ SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); } -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b) _mm256_gf2p8affineinv_epi64_epi8(x, A, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) @@ -596,7 +596,7 @@ return simde__m128i_from_private(r_); #endif } -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) +#if defined(SIMDE_X86_GFNI_NATIVE) #define simde_mm_gf2p8mul_epi8(a, b) _mm_gf2p8mul_epi8(a, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) @@ -649,7 +649,7 @@ return simde__m256i_from_private(r_); #endif } -#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) #define simde_mm256_gf2p8mul_epi8(a, b) _mm256_gf2p8mul_epi8(a, b) #endif #if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/mmx.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/mmx.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/mmx.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/mmx.h 2021-04-17 01:19:49.000000000 +0000 @@ -1390,6 +1390,11 @@ #endif r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << count_.u64[0]; #else @@ -1462,8 +1467,14 @@ simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << count; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) @@ -1583,7 +1594,12 @@ simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); - #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> count_.u64[0]; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); @@ -1775,7 +1791,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> (count & 0xff); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count)); + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psrah_s(a_.mmi_i16, count); #else diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/sse2.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/sse2.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/sse2.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/sse2.h 2021-04-17 01:19:49.000000000 +0000 @@ -433,28 +433,33 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_not_pd(simde__m128d a) { - simde__m128d_private - r_, - a_ = simde__m128d_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); - return simde__m128d_from_private(r_); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif } SIMDE_FUNCTION_ATTRIBUTES @@ -1160,8 +1165,8 @@ vec_sro #endif (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); - #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) && 0 - r_.u128[0] = a_.u128[0] << s; + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); #else r_ = simde__m128i_to_private(simde_mm_setzero_si128()); for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { @@ -2352,7 +2357,11 @@ return _mm_cvtsd_f64(a); #else simde__m128d_private a_ = simde__m128d_to_private(a); - return a_.f64[0]; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #else + return a_.f64[0]; + #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) @@ -2611,32 +2620,21 @@ simde__m128i_private r_; simde__m128_private a_ = simde__m128_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* The default rounding mode on SSE is 'round to even', which ArmV7 - does not support! It is supported on ARMv8 however. */ - #if defined(SIMDE_ARCH_AARCH64) - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); - #else - uint32x4_t signmask = vdupq_n_u32(0x80000000); - float32x4_t half = vbslq_f32(signmask, a_.neon_f32, vdupq_n_f32(0.5f)); /* +/- 0.5 */ - int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(a_.neon_f32, half)); /* round to integer: [a + 0.5]*/ - int32x4_t r_trunc = vcvtq_s32_f32(a_.neon_f32); /* truncate to integer: [a] */ - int32x4_t plusone = vshrq_n_s32(vnegq_s32(r_trunc), 31); /* 1 or 0 */ - int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */ - float32x4_t delta = vsubq_f32(a_.neon_f32, vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */ - uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */ - r_.neon_i32 = vbslq_s32(is_delta_half, r_even, r_normal); - #endif - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_ROUND_TIES) + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_ROUND_TIES) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(vec_round(a_.altivec_f32), 0); + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); HEDLEY_DIAGNOSTIC_POP #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(simde__m128_from_private(a_), SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(a_.f32[i])); + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f32[i]); } #endif @@ -2737,6 +2735,26 @@ #endif SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtsi128_si32 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) @@ -2748,7 +2766,7 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_s32(a_.neon_i32, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_i32x4_extract_lane(a_.wasm_v128, 0); + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; @@ -2779,7 +2797,7 @@ #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_s64(a_.neon_i64, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_i64x2_extract_lane(a_.wasm_v128, 0); + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); #endif return a_.i64[0]; #endif @@ -2815,6 +2833,29 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i simde_mm_cvtsi32_si128 (int32_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsi32_si128(a); @@ -4102,7 +4143,11 @@ a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif return simde__m64_from_private(r_); #endif @@ -4501,6 +4546,24 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi32(e3, e2, e1, e0); @@ -4528,6 +4591,24 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si32(mem_addr); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_set_epi64(e1, e0); @@ -4575,6 +4656,24 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, @@ -4780,7 +4879,7 @@ simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vmovq_n_s64(a); + r_.neon_i64 = vdupq_n_s64(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) @@ -5010,6 +5109,23 @@ } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) \ + __extension__({ \ + int32x4_t ret; \ + ret = vmovq_n_s32( \ + vgetq_lane_s32(vreinterpretq_s32_s64(a), (imm8) & (0x3))); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_s64(a), ((imm8) >> 2) & 0x3), \ + ret, 1); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_s64(a), ((imm8) >> 4) & 0x3), \ + ret, 2); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_s64(a), ((imm8) >> 6) & 0x3), \ + ret, 3); \ + vreinterpretq_s64_s32(ret); \ + }) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ const simde__m128i_private simde__tmp_a_ = simde__m128i_to_private(a); \ @@ -5075,6 +5191,20 @@ } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define _mm_shufflehi_epi16(a, imm8) \ + __extension__({ \ + int16x8_t ret = vreinterpretq_s16_s64(a); \ + int16x4_t highBits = vget_high_s16(ret); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm8) & (0x3)), ret, 4); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm8) >> 2) & 0x3), ret, \ + 5); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm8) >> 4) & 0x3), ret, \ + 6); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm8) >> 6) & 0x3), ret, \ + 7); \ + vreinterpretq_s64_s16(ret); \ + }) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ const simde__m128i_private simde__tmp_a_ = simde__m128i_to_private(a); \ @@ -5112,6 +5242,20 @@ } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define _mm_shufflelo_epi16(a, imm8) \ + __extension__({ \ + int16x8_t ret = vreinterpretq_s16_s64(a); \ + int16x4_t lowBits = vget_low_s16(ret); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm8) & (0x3)), ret, 0); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm8) >> 2) & 0x3), ret, \ + 1); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm8) >> 4) & 0x3), ret, \ + 2); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm8) >> 6) & 0x3), ret, \ + 3); \ + vreinterpretq_s64_s16(ret); \ + }) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ const simde__m128i_private simde__tmp_a_ = simde__m128i_to_private(a); \ @@ -6029,6 +6173,57 @@ SIMDE_FUNCTION_ATTRIBUTES void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_stream_pd(mem_addr, a); @@ -7100,26 +7295,30 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_not_si128 (simde__m128i a) { - simde__m128i_private - r_, - a_ = simde__m128i_to_private(a); - - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); - #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); - #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } - #endif + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); - return simde__m128i_from_private(r_); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif } #define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/sse4.1.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/sse4.1.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/sse4.1.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/sse4.1.h 2021-04-17 01:19:49.000000000 +0000 @@ -397,7 +397,7 @@ case SIMDE_MM_FROUND_CUR_DIRECTION: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndiq_f64(a_.neon_f64); #elif defined(simde_math_nearbyint) SIMDE_VECTORIZE @@ -412,12 +412,12 @@ case SIMDE_MM_FROUND_TO_NEAREST_INT: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndaq_f64(a_.neon_f64); - #elif defined(simde_math_round) + #elif defined(simde_math_roundeven) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_round(a_.f64[i]); + r_.f64[i] = simde_math_roundeven(a_.f64[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); @@ -427,7 +427,7 @@ case SIMDE_MM_FROUND_TO_NEG_INF: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndmq_f64(a_.neon_f64); #else SIMDE_VECTORIZE @@ -440,7 +440,7 @@ case SIMDE_MM_FROUND_TO_POS_INF: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndpq_f64(a_.neon_f64); #elif defined(simde_math_ceil) SIMDE_VECTORIZE @@ -455,7 +455,7 @@ case SIMDE_MM_FROUND_TO_ZERO: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); - #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vrndq_f64(a_.neon_f64); #else SIMDE_VECTORIZE @@ -972,7 +972,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtepi32_epi64 (simde__m128i a) { - #if defined(SIMDE_X86_SSE4_1_NATIVE) && 0 + #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_cvtepi32_epi64(a); #elif defined(SIMDE_X86_SSE2_NATIVE) __m128i tmp = _mm_shuffle_epi32(a, 0x50); diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/sse.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/sse.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/sse.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/sse.h 2021-04-17 01:19:49.000000000 +0000 @@ -385,12 +385,15 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128 -simde_mm_round_ps (simde__m128 a, int rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { simde__m128_private r_, a_ = simde__m128_to_private(a); + (void) lax_rounding; + /* For architectures which lack a current direction SIMD instruction. * * Note that NEON actually has a current rounding mode instruction, @@ -408,7 +411,7 @@ case SIMDE_MM_FROUND_CUR_DIRECTION: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) r_.neon_f32 = vrndiq_f32(a_.neon_f32); #elif defined(simde_math_nearbyintf) SIMDE_VECTORIZE @@ -421,14 +424,14 @@ break; case SIMDE_MM_FROUND_TO_NEAREST_INT: - #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && 0 - r_.neon_f32 = vrndaq_f32(a_.neon_f32); - #elif defined(simde_math_roundf) + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(simde_math_roundevenf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_roundf(a_.f32[i]); + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); @@ -438,7 +441,7 @@ case SIMDE_MM_FROUND_TO_NEG_INF: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndmq_f32(a_.neon_f32); #elif defined(simde_math_floorf) SIMDE_VECTORIZE @@ -453,7 +456,7 @@ case SIMDE_MM_FROUND_TO_POS_INF: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndpq_f32(a_.neon_f32); #elif defined(simde_math_ceilf) SIMDE_VECTORIZE @@ -468,7 +471,7 @@ case SIMDE_MM_FROUND_TO_ZERO: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); - #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && 0 + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndq_f32(a_.neon_f32); #elif defined(simde_math_truncf) SIMDE_VECTORIZE @@ -487,10 +490,12 @@ return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) - #define simde_mm_round_ps(a, rounding) _mm_round_ps(a, rounding) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) - #define _mm_round_ps(a, rounding) simde_mm_round_ps(a, rounding) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -779,7 +784,10 @@ SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_not_ps(simde__m128 a) { - #if defined(SIMDE_X86_SSE2_NATIVE) + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) /* Note: we use ints instead of floats because we don't want cmpeq * to return false for (NaN, NaN) */ __m128i ai = _mm_castps_si128(a); @@ -1443,9 +1451,13 @@ r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); - #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); #elif defined(simde_math_isnanf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { @@ -1728,7 +1740,8 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); - #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(__clang__) && 0 + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); #else a_ = simde__m128_to_private(a); @@ -1797,8 +1810,8 @@ simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && 0 /* TODO */ - r_.neon_f32 = vmovl_s16(vget_low_s16(vuzp1q_s16(a_.neon_i16, vmovq_n_s16(0)))); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); #else @@ -2318,8 +2331,7 @@ return a_.i16[imm8]; } #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) -# if HEDLEY_HAS_WARNING("-Wvector-conversion") - /* https://bugs.llvm.org/show_bug.cgi?id=44589 */ +# if defined(SIMDE_BUG_CLANG_44589) # define simde_mm_extract_pi16(a, imm8) ( \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \ @@ -2352,8 +2364,7 @@ return simde__m64_from_private(r_); } #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) -# if HEDLEY_HAS_WARNING("-Wvector-conversion") - /* https://bugs.llvm.org/show_bug.cgi?id=44589 */ +# if defined(SIMDE_BUG_CLANG_44589) # define ssimde_mm_insert_pi16(a, i, imm8) ( \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") \ @@ -3079,6 +3090,50 @@ # define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) #endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ETNA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ETNA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_mm_prefetch (char const* p, int i) { @@ -3091,10 +3146,20 @@ (void) i; } #if defined(SIMDE_X86_SSE_NATIVE) -# define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -# define _mm_prefetch(p, i) simde_mm_prefetch(p, i) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -3526,7 +3591,23 @@ #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) # define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && 0 +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) \ + __extension__({ \ + float32x4_t ret; \ + ret = vmovq_n_f32( \ + vgetq_lane_f32(a, (imm8) & (0x3))); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(a, ((imm8) >> 2) & 0x3), \ + ret, 1); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(b, ((imm8) >> 4) & 0x3), \ + ret, 2); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(b, ((imm8) >> 6) & 0x3), \ + ret, 3); \ + }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) # define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ simde__m128_from_private((simde__m128_private) { .f32 = \ SIMDE_SHUFFLE_VECTOR_(32, 16, \ @@ -3638,7 +3719,7 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_f32(mem_addr, a_.neon_f32); - #elif defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) vec_st(a_.altivec_f32, 0, mem_addr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store(mem_addr, a_.wasm_v128); diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/ssse3.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/ssse3.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/ssse3.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/ssse3.h 2021-04-17 01:19:49.000000000 +0000 @@ -630,7 +630,7 @@ a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && 0 + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); #else diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/svml.h lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/svml.h --- lightzone-4.2.2/lightcrafts/jnisrc/include/simde/x86/svml.h 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/include/simde/x86/svml.h 2021-04-17 01:19:49.000000000 +0000 @@ -292,7 +292,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_acosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_acoshf4_u10(a); + return Sleef_acoshf4_u10(a); #else simde__m128_private r_, @@ -317,7 +317,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_acosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_acoshd2_u10(a); + return Sleef_acoshd2_u10(a); #else simde__m128d_private r_, @@ -342,7 +342,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_acosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_acoshf8_u10(a); + return Sleef_acoshf8_u10(a); #else simde__m256_private r_, @@ -374,7 +374,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_acosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_acoshd4_u10(a); + return Sleef_acoshd4_u10(a); #else simde__m256d_private r_, @@ -405,7 +405,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_acosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_acoshf16_u10(a); + return Sleef_acoshf16_u10(a); #else simde__m512_private r_, @@ -436,7 +436,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_acosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_acoshd8_u10(a); + return Sleef_acoshd8_u10(a); #else simde__m512d_private r_, @@ -722,7 +722,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_asinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_asinhf4_u10(a); + return Sleef_asinhf4_u10(a); #else simde__m128_private r_, @@ -747,7 +747,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_asinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_asinhd2_u10(a); + return Sleef_asinhd2_u10(a); #else simde__m128d_private r_, @@ -772,7 +772,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_asinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_asinhf8_u10(a); + return Sleef_asinhf8_u10(a); #else simde__m256_private r_, @@ -804,7 +804,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_asinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_asinhd4_u10(a); + return Sleef_asinhd4_u10(a); #else simde__m256d_private r_, @@ -835,7 +835,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_asinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_asinhf16_u10(a); + return Sleef_asinhf16_u10(a); #else simde__m512_private r_, @@ -866,7 +866,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_asinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_asinhd8_u10(a); + return Sleef_asinhd8_u10(a); #else simde__m512d_private r_, @@ -1385,7 +1385,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_atanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_atanhf4_u10(a); + return Sleef_atanhf4_u10(a); #else simde__m128_private r_, @@ -1410,7 +1410,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_atanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_atanhd2_u10(a); + return Sleef_atanhd2_u10(a); #else simde__m128d_private r_, @@ -1435,7 +1435,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_atanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_atanhf8_u10(a); + return Sleef_atanhf8_u10(a); #else simde__m256_private r_, @@ -1467,7 +1467,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_atanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_atanhd4_u10(a); + return Sleef_atanhd4_u10(a); #else simde__m256d_private r_, @@ -1498,7 +1498,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_atanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_atanhf16_u10(a); + return Sleef_atanhf16_u10(a); #else simde__m512_private r_, @@ -1529,7 +1529,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_atanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_atanhd8_u10(a); + return Sleef_atanhd8_u10(a); #else simde__m512d_private r_, @@ -1588,7 +1588,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cbrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_cbrtf4_u10(a); + return Sleef_cbrtf4_u10(a); #else simde__m128_private r_, @@ -1613,7 +1613,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cbrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_cbrtd2_u10(a); + return Sleef_cbrtd2_u10(a); #else simde__m128d_private r_, @@ -1638,7 +1638,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cbrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_cbrtf8_u10(a); + return Sleef_cbrtf8_u10(a); #else simde__m256_private r_, @@ -1670,7 +1670,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cbrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_cbrtd4_u10(a); + return Sleef_cbrtd4_u10(a); #else simde__m256d_private r_, @@ -1701,7 +1701,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cbrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_cbrtf16_u10(a); + return Sleef_cbrtf16_u10(a); #else simde__m512_private r_, @@ -1732,7 +1732,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cbrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_cbrtd8_u10(a); + return Sleef_cbrtd8_u10(a); #else simde__m512d_private r_, @@ -2075,9 +2075,9 @@ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; + const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; r_.f32 = a_.f32 * tmp; #else SIMDE_VECTORIZE @@ -2103,9 +2103,9 @@ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; + const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; r_.f64 = a_.f64 * tmp; #else SIMDE_VECTORIZE @@ -2133,9 +2133,9 @@ r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { + const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; @@ -2166,9 +2166,9 @@ r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; + const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; r_.f64 = a_.f64 * tmp; #else SIMDE_VECTORIZE @@ -2196,9 +2196,9 @@ r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f32) tmp = { + const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, @@ -2231,9 +2231,9 @@ r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]); } #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) - r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - const __typeof__(r_.f64) tmp = { + const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; @@ -2481,7 +2481,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_coshf4_u10(a); + return Sleef_coshf4_u10(a); #else simde__m128_private r_, @@ -2506,7 +2506,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_cosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_coshd2_u10(a); + return Sleef_coshd2_u10(a); #else simde__m128d_private r_, @@ -2531,7 +2531,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_coshf8_u10(a); + return Sleef_coshf8_u10(a); #else simde__m256_private r_, @@ -2563,7 +2563,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_cosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_coshd4_u10(a); + return Sleef_coshd4_u10(a); #else simde__m256d_private r_, @@ -2594,7 +2594,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cosh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_coshf16_u10(a); + return Sleef_coshf16_u10(a); #else simde__m512_private r_, @@ -2625,7 +2625,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_cosh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_coshd8_u10(a); + return Sleef_coshd8_u10(a); #else simde__m512d_private r_, @@ -2692,7 +2692,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 / b_.i8; #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { @@ -2722,7 +2722,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 / b_.i16; #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { @@ -2752,7 +2752,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 / b_.i32; #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { @@ -2785,7 +2785,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 / b_.i64; #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { @@ -2815,7 +2815,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 / b_.u8; #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { @@ -2845,7 +2845,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 / b_.u16; #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { @@ -2875,7 +2875,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 / b_.u32; #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { @@ -2908,7 +2908,7 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 / b_.u64; #elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { @@ -3077,10 +3077,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 / b_.u8; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] / b_.u8[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] / b_.u8[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -3105,10 +3111,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 / b_.u16; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] / b_.u16[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] / b_.u16[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -3133,10 +3145,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 / b_.u32; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] / b_.u32[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] / b_.u32[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -3164,10 +3182,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 / b_.u64; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] / b_.u64[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] / b_.u64[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -3342,10 +3366,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 / b_.u8; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] / b_.u8[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] / b_.u8[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -3370,10 +3400,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 / b_.u16; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] / b_.u16[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] / b_.u16[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -3398,10 +3434,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 / b_.u32; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] / b_.u32[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] / b_.u32[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -3440,10 +3482,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 / b_.u64; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] / b_.u64[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] / b_.u64[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -3460,7 +3508,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erf_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erff4_u10(a); + return Sleef_erff4_u10(a); #else simde__m128_private r_, @@ -3485,7 +3533,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erf_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfd2_u10(a); + return Sleef_erfd2_u10(a); #else simde__m128d_private r_, @@ -3510,7 +3558,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erf_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erff8_u10(a); + return Sleef_erff8_u10(a); #else simde__m256_private r_, @@ -3542,7 +3590,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erf_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfd4_u10(a); + return Sleef_erfd4_u10(a); #else simde__m256d_private r_, @@ -3573,7 +3621,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erf_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erff16_u10(a); + return Sleef_erff16_u10(a); #else simde__m512_private r_, @@ -3604,7 +3652,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erf_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfd8_u10(a); + return Sleef_erfd8_u10(a); #else simde__m512d_private r_, @@ -3663,7 +3711,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erfc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfcf4_u15(a); + return Sleef_erfcf4_u15(a); #else simde__m128_private r_, @@ -3688,7 +3736,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_erfc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_erfcd2_u15(a); + return Sleef_erfcd2_u15(a); #else simde__m128d_private r_, @@ -3713,7 +3761,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erfc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfcf8_u15(a); + return Sleef_erfcf8_u15(a); #else simde__m256_private r_, @@ -3745,7 +3793,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_erfc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_erfcd4_u15(a); + return Sleef_erfcd4_u15(a); #else simde__m256d_private r_, @@ -3776,7 +3824,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erfc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfcf16_u15(a); + return Sleef_erfcf16_u15(a); #else simde__m512_private r_, @@ -3807,7 +3855,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_erfc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_erfcd8_u15(a); + return Sleef_erfcd8_u15(a); #else simde__m512d_private r_, @@ -3866,7 +3914,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expf4_u10(a); + return Sleef_expf4_u10(a); #else simde__m128_private r_, @@ -3891,7 +3939,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expd2_u10(a); + return Sleef_expd2_u10(a); #else simde__m128d_private r_, @@ -3916,7 +3964,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expf8_u10(a); + return Sleef_expf8_u10(a); #else simde__m256_private r_, @@ -3948,7 +3996,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expd4_u10(a); + return Sleef_expd4_u10(a); #else simde__m256d_private r_, @@ -3979,7 +4027,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expf16_u10(a); + return Sleef_expf16_u10(a); #else simde__m512_private r_, @@ -4010,7 +4058,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expd8_u10(a); + return Sleef_expd8_u10(a); #else simde__m512d_private r_, @@ -4069,7 +4117,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_expm1_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expm1f4_u10(a); + return Sleef_expm1f4_u10(a); #else simde__m128_private r_, @@ -4094,7 +4142,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_expm1_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_expm1d2_u10(a); + return Sleef_expm1d2_u10(a); #else simde__m128d_private r_, @@ -4119,7 +4167,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_expm1_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expm1f8_u10(a); + return Sleef_expm1f8_u10(a); #else simde__m256_private r_, @@ -4151,7 +4199,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_expm1_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_expm1d4_u10(a); + return Sleef_expm1d4_u10(a); #else simde__m256d_private r_, @@ -4182,7 +4230,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_expm1_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expm1f16_u10(a); + return Sleef_expm1f16_u10(a); #else simde__m512_private r_, @@ -4213,7 +4261,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_expm1_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_expm1d8_u10(a); + return Sleef_expm1d8_u10(a); #else simde__m512d_private r_, @@ -4272,7 +4320,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp2f4_u10(a); + return Sleef_exp2f4_u10(a); #else simde__m128_private r_, @@ -4297,7 +4345,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp2d2_u10(a); + return Sleef_exp2d2_u10(a); #else simde__m128d_private r_, @@ -4322,7 +4370,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp2f8_u10(a); + return Sleef_exp2f8_u10(a); #else simde__m256_private r_, @@ -4354,7 +4402,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp2d4_u10(a); + return Sleef_exp2d4_u10(a); #else simde__m256d_private r_, @@ -4385,7 +4433,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp2_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp2f16_u10(a); + return Sleef_exp2f16_u10(a); #else simde__m512_private r_, @@ -4416,7 +4464,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp2_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp2d8_u10(a); + return Sleef_exp2d8_u10(a); #else simde__m512d_private r_, @@ -4475,7 +4523,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp10f4_u10(a); + return Sleef_exp10f4_u10(a); #else simde__m128_private r_, @@ -4500,7 +4548,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_exp10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_exp10d2_u10(a); + return Sleef_exp10d2_u10(a); #else simde__m128d_private r_, @@ -4525,7 +4573,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp10f8_u10(a); + return Sleef_exp10f8_u10(a); #else simde__m256_private r_, @@ -4557,7 +4605,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_exp10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_exp10d4_u10(a); + return Sleef_exp10d4_u10(a); #else simde__m256d_private r_, @@ -4588,7 +4636,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp10f16_u10(a); + return Sleef_exp10f16_u10(a); #else simde__m512_private r_, @@ -4619,7 +4667,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_exp10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_exp10d8_u10(a); + return Sleef_exp10d8_u10(a); #else simde__m512d_private r_, @@ -5163,10 +5211,16 @@ a_ = simde__m256_to_private(a), b_ = simde__m256_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]); } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + } + #endif return simde__m256_from_private(r_); #endif @@ -5194,10 +5248,16 @@ a_ = simde__m256d_to_private(a), b_ = simde__m256d_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); + } + #endif return simde__m256d_from_private(r_); #endif @@ -5224,10 +5284,16 @@ a_ = simde__m512_to_private(a), b_ = simde__m512_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { - r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + } + #endif return simde__m512_from_private(r_); #endif @@ -5254,10 +5320,16 @@ a_ = simde__m512d_to_private(a), b_ = simde__m512d_to_private(b); - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { - r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); + } + #endif return simde__m512d_from_private(r_); #endif @@ -5468,8 +5540,6 @@ simde_mm_log_ps (simde__m128 a) { #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log_ps(a); - #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && 0 - return vec_loge(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) #if SIMDE_ACCURACY_PREFERENCE > 1 return Sleef_logf4_u10(a); @@ -8068,7 +8138,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log1p_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log1pf4_u10(a); + return Sleef_log1pf4_u10(a); #else simde__m128_private r_, @@ -8093,7 +8163,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log1p_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log1pd2_u10(a); + return Sleef_log1pd2_u10(a); #else simde__m128d_private r_, @@ -8118,7 +8188,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log1p_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log1pf8_u10(a); + return Sleef_log1pf8_u10(a); #else simde__m256_private r_, @@ -8150,7 +8220,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log1p_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log1pd4_u10(a); + return Sleef_log1pd4_u10(a); #else simde__m256d_private r_, @@ -8181,7 +8251,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log1p_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log1pf16_u10(a); + return Sleef_log1pf16_u10(a); #else simde__m512_private r_, @@ -8212,7 +8282,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log1p_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log1pd8_u10(a); + return Sleef_log1pd8_u10(a); #else simde__m512d_private r_, @@ -8271,7 +8341,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log10f4_u10(a); + return Sleef_log10f4_u10(a); #else simde__m128_private r_, @@ -8296,7 +8366,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_log10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_log10d2_u10(a); + return Sleef_log10d2_u10(a); #else simde__m128d_private r_, @@ -8321,7 +8391,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log10f8_u10(a); + return Sleef_log10f8_u10(a); #else simde__m256_private r_, @@ -8353,7 +8423,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_log10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_log10d4_u10(a); + return Sleef_log10d4_u10(a); #else simde__m256d_private r_, @@ -8384,7 +8454,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log10_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log10f16_u10(a); + return Sleef_log10f16_u10(a); #else simde__m512_private r_, @@ -8415,7 +8485,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_log10_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_log10d8_u10(a); + return Sleef_log10d8_u10(a); #else simde__m512d_private r_, @@ -8548,7 +8618,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_pow_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_powf4_u10(a, b); + return Sleef_powf4_u10(a, b); #else simde__m128_private r_, @@ -8574,7 +8644,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_pow_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_powd2_u10(a, b); + return Sleef_powd2_u10(a, b); #else simde__m128d_private r_, @@ -8600,7 +8670,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_pow_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_powf8_u10(a, b); + return Sleef_powf8_u10(a, b); #else simde__m256_private r_, @@ -8627,7 +8697,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_pow_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_powd4_u10(a, b); + return Sleef_powd4_u10(a, b); #else simde__m256d_private r_, @@ -8653,7 +8723,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_pow_ps(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_powf16_u10(a, b); + return Sleef_powf16_u10(a, b); #else simde__m512_private r_, @@ -8679,7 +8749,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_pow_pd(a, b); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_powd8_u10(a, b); + return Sleef_powd8_u10(a, b); #else simde__m512d_private r_, @@ -9081,10 +9151,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 % b_.i8; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] % b_.i8[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] % b_.i8[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -9109,10 +9185,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 % b_.i16; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] % b_.i16[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] % b_.i16[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -9137,10 +9219,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 % b_.i32; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] % b_.i32[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] % b_.i32[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -9168,10 +9256,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 % b_.i64; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -9196,10 +9290,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 % b_.u8; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] % b_.u8[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] % b_.u8[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -9224,10 +9324,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 % b_.u16; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] % b_.u16[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] % b_.u16[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -9252,10 +9358,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 % b_.u32; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] % b_.u32[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] % b_.u32[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -9283,10 +9395,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 % b_.u64; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] % b_.u64[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] % b_.u64[i]; + } + #endif #endif return simde__m256i_from_private(r_); @@ -9311,10 +9429,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 % b_.i8; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { - r_.i8[i] = a_.i8[i] % b_.i8[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] % b_.i8[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -9339,10 +9463,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 % b_.i16; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { - r_.i16[i] = a_.i16[i] % b_.i16[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] % b_.i16[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -9367,10 +9497,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 % b_.i32; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { - r_.i32[i] = a_.i32[i] % b_.i32[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] % b_.i32[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -9409,10 +9545,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 % b_.i64; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -9437,10 +9579,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u8 = a_.u8 % b_.u8; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { - r_.u8[i] = a_.u8[i] % b_.u8[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] % b_.u8[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -9465,10 +9613,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u16 = a_.u16 % b_.u16; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { - r_.u16[i] = a_.u16[i] % b_.u16[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] % b_.u16[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -9493,10 +9647,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 % b_.u32; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { - r_.u32[i] = a_.u32[i] % b_.u32[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] % b_.u32[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -9535,10 +9695,16 @@ #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 % b_.u64; #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { - r_.u64[i] = a_.u64[i] % b_.u64[i]; - } + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] % b_.u64[i]; + } + #endif #endif return simde__m512i_from_private(r_); @@ -9611,7 +9777,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rint_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_rintf16(a); + return Sleef_rintf16(a); #else simde__m512_private r_, @@ -9636,7 +9802,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_rint_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_rintd8(a); + return Sleef_rintd8(a); #else simde__m512d_private r_, @@ -10357,7 +10523,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sinhf4_u10(a); + return Sleef_sinhf4_u10(a); #else simde__m128_private r_, @@ -10382,7 +10548,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_sinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sinhd2_u10(a); + return Sleef_sinhd2_u10(a); #else simde__m128d_private r_, @@ -10407,7 +10573,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sinhf8_u10(a); + return Sleef_sinhf8_u10(a); #else simde__m256_private r_, @@ -10439,7 +10605,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_sinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sinhd4_u10(a); + return Sleef_sinhd4_u10(a); #else simde__m256d_private r_, @@ -10470,7 +10636,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sinh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sinhf16_u10(a); + return Sleef_sinhf16_u10(a); #else simde__m512_private r_, @@ -10501,7 +10667,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_sinh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sinhd8_u10(a); + return Sleef_sinhd8_u10(a); #else simde__m512d_private r_, @@ -10560,7 +10726,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_ceil_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_ceilf4(a); + return Sleef_ceilf4(a); #else return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); #endif @@ -10576,7 +10742,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_ceil_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_ceild2(a); + return Sleef_ceild2(a); #else return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); #endif @@ -10592,7 +10758,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_ceil_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_ceilf8(a); + return Sleef_ceilf8(a); #else return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); #endif @@ -10608,7 +10774,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_ceil_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_ceild4(a); + return Sleef_ceild4(a); #else return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); #endif @@ -10624,7 +10790,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_ceil_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_ceilf16(a); + return Sleef_ceilf16(a); #else simde__m512_private r_, @@ -10655,7 +10821,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_ceil_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_ceild8(a); + return Sleef_ceild8(a); #else simde__m512d_private r_, @@ -10714,7 +10880,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_floor_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_floorf4(a); + return Sleef_floorf4(a); #else return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); #endif @@ -10730,7 +10896,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_floor_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_floord2(a); + return Sleef_floord2(a); #else return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); #endif @@ -10746,7 +10912,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_floor_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_floorf8(a); + return Sleef_floorf8(a); #else return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); #endif @@ -10762,7 +10928,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_floor_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_floord4(a); + return Sleef_floord4(a); #else return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); #endif @@ -10778,7 +10944,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_floor_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_floorf16(a); + return Sleef_floorf16(a); #else simde__m512_private r_, @@ -10809,7 +10975,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_floor_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_floord8(a); + return Sleef_floord8(a); #else simde__m512d_private r_, @@ -10868,7 +11034,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_round_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_roundf4(a); + return Sleef_roundf4(a); #else simde__m128_private r_, @@ -10893,7 +11059,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_round_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_roundd2(a); + return Sleef_roundd2(a); #else simde__m128d_private r_, @@ -10918,7 +11084,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_round_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_roundf8(a); + return Sleef_roundf8(a); #else simde__m256_private r_, @@ -10950,7 +11116,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_round_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_roundd4(a); + return Sleef_roundd4(a); #else simde__m256d_private r_, @@ -10981,7 +11147,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_svml_round_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_roundd8(a); + return Sleef_roundd8(a); #else simde__m512d_private r_, @@ -11026,7 +11192,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_sqrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sqrtf4(a); + return Sleef_sqrtf4(a); #else return simde_mm_sqrt_ps(a); #endif @@ -11042,7 +11208,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_svml_sqrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_sqrtd2(a); + return Sleef_sqrtd2(a); #else return simde_mm_sqrt_pd(a); #endif @@ -11058,7 +11224,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_sqrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sqrtf8(a); + return Sleef_sqrtf8(a); #else return simde_mm256_sqrt_ps(a); #endif @@ -11074,7 +11240,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_svml_sqrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_sqrtd4(a); + return Sleef_sqrtd4(a); #else return simde_mm256_sqrt_pd(a); #endif @@ -11090,7 +11256,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_svml_sqrt_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sqrtf16(a); + return Sleef_sqrtf16(a); #else return simde_mm512_sqrt_ps(a); #endif @@ -11106,7 +11272,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_svml_sqrt_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_sqrtd8(a); + return Sleef_sqrtd8(a); #else return simde_mm512_sqrt_pd(a); #endif @@ -11575,7 +11741,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_tanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_tanhf4_u10(a); + return Sleef_tanhf4_u10(a); #else simde__m128_private r_, @@ -11600,7 +11766,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_tanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_tanhd2_u10(a); + return Sleef_tanhd2_u10(a); #else simde__m128d_private r_, @@ -11625,7 +11791,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_tanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_tanhf8_u10(a); + return Sleef_tanhf8_u10(a); #else simde__m256_private r_, @@ -11657,7 +11823,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_tanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_tanhd4_u10(a); + return Sleef_tanhd4_u10(a); #else simde__m256d_private r_, @@ -11688,7 +11854,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_tanh_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_tanhf16_u10(a); + return Sleef_tanhf16_u10(a); #else simde__m512_private r_, @@ -11719,7 +11885,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) return _mm512_tanh_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) - return Sleef_tanhd8_u10(a); + return Sleef_tanhd8_u10(a); #else simde__m512d_private r_, @@ -11778,7 +11944,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_trunc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_truncf4(a); + return Sleef_truncf4(a); #else return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); #endif @@ -11794,7 +11960,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) return _mm_trunc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) - return Sleef_truncd2(a); + return Sleef_truncd2(a); #else return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); #endif @@ -11810,7 +11976,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_trunc_ps(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_truncf8(a); + return Sleef_truncf8(a); #else return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); #endif @@ -11826,7 +11992,7 @@ #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) return _mm256_trunc_pd(a); #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) - return Sleef_truncd4(a); + return Sleef_truncd4(a); #else return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); #endif diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/jni.mk lightzone-4.2.3/lightcrafts/jnisrc/jni.mk --- lightzone-4.2.2/lightcrafts/jnisrc/jni.mk 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/jni.mk 2021-04-17 01:19:49.000000000 +0000 @@ -33,8 +33,8 @@ # platform only, the makefile can replace "EXTRA" with one of "MACOSX", # "WINDOWS" or "LINUX". # -# In addition to the above, there are also JNI_PPC_CFLAGS, JNI_PPC_DEFINES, and -# JNI_PPC_LDFLAGS for PowerPC-specific directives, and JNI_X86_CFLAGS, +# In addition to the above, there are also JNI_ARM_CFLAGS, JNI_ARM_DEFINES, and +# JNI_ARM_LDFLAGS for arm64-specific directives, and JNI_X86_CFLAGS, # JNI_X86_DEFINES, and JNI_X86_LDFLAGS for Intel-specific directives. # # If a makefile needs to override how the TARGET is build, it can do: @@ -57,7 +57,7 @@ include $(COMMON_DIR)/mk/platform.mk ifeq ($(UNIVERSAL),1) - CFLAGS_PPC:= $(PLATFORM_CFLAGS_PPC) $(JNI_EXTRA_CFLAGS) + CFLAGS_ARM:= $(PLATFORM_CFLAGS_ARM) $(JNI_EXTRA_CFLAGS) CFLAGS_X86:= $(PLATFORM_CFLAGS_X86) $(JNI_EXTRA_CFLAGS) else CFLAGS:= $(PLATFORM_CFLAGS) $(JNI_EXTRA_CFLAGS) @@ -82,7 +82,7 @@ ifeq ($(PLATFORM),MacOSX) DEFINES+= $(JNI_MACOSX_DEFINES) INCLUDES:= $(MACOSX_ISYSROOT) $(INCLUDES) $(JNI_MACOSX_INCLUDES) - LDFLAGS+= -dynamiclib -framework JavaVM $(JNI_MACOSX_LDFLAGS) + LDFLAGS+= -dynamiclib $(JNI_MACOSX_LDFLAGS) LINK+= $(JNI_MACOSX_LINK) ifdef JNI_MACOSX_DYLIB JNILIB_EXT:= $(DYLIB_EXT) @@ -93,15 +93,15 @@ endif endif ifeq ($(UNIVERSAL),1) - CFLAGS_PPC+= $(JNI_MACOSX_CFLAGS) $(JNI_PPC_CFLAGS) + CFLAGS_ARM+= $(JNI_MACOSX_CFLAGS) $(JNI_ARM_CFLAGS) CFLAGS_X86+= $(JNI_MACOSX_CFLAGS) $(JNI_X86_CFLAGS) else CFLAGS+= $(JNI_MACOSX_CFLAGS) - ifeq ($(PROCESSOR),powerpc) - CFLAGS+= $(JNI_PPC_CFLAGS) - DEFINES+= $(JNI_PPC_DEFINES) - LDFLAGS+= $(JNI_PPC_LDFLAGS) - LINK+= $(JNI_PPC_LINK) + ifeq ($(PROCESSOR),arm64) + CFLAGS+= $(JNI_ARM_CFLAGS) + DEFINES+= $(JNI_ARM_DEFINES) + LDFLAGS+= $(JNI_ARM_LDFLAGS) + LINK+= $(JNI_ARM_LINK) endif ifeq ($(PROCESSOR),x86_64) CFLAGS+= $(JNI_X86_CFLAGS) @@ -147,9 +147,9 @@ endif ifeq ($(UNIVERSAL),1) - CFLAGS_PPC+= $(DEFINES) $(JNI_PPC_DEFINES) + CFLAGS_ARM+= $(DEFINES) $(JNI_ARM_DEFINES) CFLAGS_X86+= $(DEFINES) $(JNI_X86_DEFINES) - INCLUDES_PPC:= $(INCLUDES) $(JNI_PPC_INCLUDES) + INCLUDES_ARM:= $(INCLUDES) $(JNI_ARM_INCLUDES) INCLUDES_X86:= $(INCLUDES) $(JNI_X86_INCLUDES) else CFLAGS+= $(DEFINES) @@ -174,7 +174,7 @@ # These are always defined even when UNIVERSAL is not set so a "make disclean" # will remove them. ## -TARGET_PPC:= $(JNILIB_PREFIX)$(TARGET_BASE)-ppc$(JNILIB_EXT) +TARGET_ARM:= $(JNILIB_PREFIX)$(TARGET_BASE)-arm64$(JNILIB_EXT) TARGET_X86:= $(JNILIB_PREFIX)$(TARGET_BASE)-x86$(JNILIB_EXT) ## @@ -213,21 +213,21 @@ ifeq ($(UNIVERSAL),1) -$(TARGET): $(TARGET_PPC) $(TARGET_X86) +$(TARGET): $(TARGET_ARM) $(TARGET_X86) -$(MKDIR) $(TARGET_DIR) - $(LIPO) -create $(TARGET_PPC) $(TARGET_X86) -output $@ + $(LIPO) -create $(TARGET_ARM) $(TARGET_X86) -output $@ ifeq ($(PLATFORM),MacOSX) cp -p $@ $(TARGET_DIR) endif ifndef JNI_MANUAL_TARGET ifdef USE_AR_RANLIB -$(TARGET_PPC): $(OBJECTS_PPC) $(BUILT_LIBS) - ar -rc $@ *-ppc.o +$(TARGET_ARM): $(OBJECTS_ARM) $(BUILT_LIBS) + ar -rc $@ *-arm64.o -ranlib $@ else -$(TARGET_PPC): $(OBJECTS_PPC) $(LOCAL_RANLIBS) $(BUILT_LIBS) - $(CC_LINK) $(CFLAGS_PPC) $(LDFLAGS) -o $@ *-ppc.o $(LINK) +$(TARGET_ARM): $(OBJECTS_ARM) $(LOCAL_RANLIBS) $(BUILT_LIBS) + $(CC_LINK) $(CFLAGS_ARM) $(LDFLAGS) -o $@ *-arm64.o $(LINK) endif ifdef USE_AR_RANLIB @@ -274,6 +274,6 @@ $(RM) *.o .*.d javah *-ranlib.a *.dSYM *.res $(TARGET).dSYM $(JNI_EXTRA_CLEAN) distclean mostlyclean: clean - $(RM) $(TARGET) $(TARGET_IMPLIB) $(TARGET_PPC) $(TARGET_X86) $(POST_TARGET) $(JNI_EXTRA_DISTCLEAN) + $(RM) $(TARGET) $(TARGET_IMPLIB) $(TARGET_ARM) $(TARGET_X86) $(POST_TARGET) $(JNI_EXTRA_DISTCLEAN) # vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/jpeg/GNUmakefile lightzone-4.2.3/lightcrafts/jnisrc/jpeg/GNUmakefile --- lightzone-4.2.2/lightcrafts/jnisrc/jpeg/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/jpeg/GNUmakefile 2021-04-17 01:19:49.000000000 +0000 @@ -1,7 +1,3 @@ -ROOT:= ../../.. -COMMON_DIR:= $(ROOT)/lightcrafts -include $(COMMON_DIR)/mk/platform.mk - ## # Build rules ## diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/jpeg/lcjpeg.mk lightzone-4.2.3/lightcrafts/jnisrc/jpeg/lcjpeg.mk --- lightzone-4.2.2/lightcrafts/jnisrc/jpeg/lcjpeg.mk 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/jpeg/lcjpeg.mk 2021-04-17 01:19:49.000000000 +0000 @@ -1,7 +1,3 @@ -ROOT:= ../../.. -COMMON_DIR:= $(ROOT)/lightcrafts -include $(COMMON_DIR)/mk/platform.mk - HIGH_PERFORMANCE:= 1 TARGET_BASE:= LCJPEG @@ -10,9 +6,7 @@ #DEBUG:= true JNI_EXTRA_CFLAGS:= -fexceptions -std=c++0x -JNI_EXTRA_INCLUDES:= $(shell $(PKGCFG) --cflags libjpeg) -JNI_EXTRA_LINK:= $(shell $(PKGCFG) --libs-only-l libjpeg) -JNI_EXTRA_LDFLAGS:= $(shell $(PKGCFG) --libs-only-L libjpeg) +JNI_EXTRA_PKGCFG:= libjpeg JNI_WINDOWS_LINK:= -lLCJNI JNI_LINUX_LINK:= -lLCJNI JNI_MACOSX_LINK:= ../jniutils/libLCJNI.a @@ -22,6 +16,7 @@ JAVAH_CLASSES:= com.lightcrafts.image.libs.LCJPEGReader \ com.lightcrafts.image.libs.LCJPEGWriter +ROOT:= ../../.. include ../jni.mk # vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/lensfun/GNUmakefile lightzone-4.2.3/lightcrafts/jnisrc/lensfun/GNUmakefile --- lightzone-4.2.2/lightcrafts/jnisrc/lensfun/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/lensfun/GNUmakefile 2021-04-17 01:19:49.000000000 +0000 @@ -1,19 +1,9 @@ HIGH_PERFORMANCE:= 1 -ROOT:= ../../.. -COMMON_DIR:= $(ROOT)/lightcrafts -include $(COMMON_DIR)/mk/platform.mk - TARGET_BASE:= LCLENSFUN JNI_EXTRA_CFLAGS:= -std=c++14 -JNI_EXTRA_INCLUDES:= $(shell $(PKGCFG) --cflags lensfun) -JNI_EXTRA_LINK:= $(shell $(PKGCFG) --libs-only-l lensfun) -ifneq ($(PLATFORM),MacOSX) - JNI_EXTRA_LDFLAGS:= $(shell $(PKGCFG) --libs-only-L lensfun) -endif -JNI_MACOSX_INCLUDES:= -I/usr/local/include -JNI_MACOSX_LDFLAGS:= -L/usr/local/lib +JNI_EXTRA_PKGCFG:= lensfun JAVAH_CLASSES:= com.lightcrafts.utils.Lensfun diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/lensfun/LC_lensfun.cpp lightzone-4.2.3/lightcrafts/jnisrc/lensfun/LC_lensfun.cpp --- lightzone-4.2.2/lightcrafts/jnisrc/lensfun/LC_lensfun.cpp 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/lensfun/LC_lensfun.cpp 2021-04-17 01:19:49.000000000 +0000 @@ -173,15 +173,17 @@ } #if (LF_VERSION >= 0x00035f00) // 0.3.95 - lfLensCalibDistortion dc = {LF_DIST_MODEL_POLY5, focal, focal, false, {k1, k2}}; + lfLensCalibAttributes attr = {0, 0, 1, fullWidth / float(fullHeight)}; + lfLensCalibDistortion dc = {LF_DIST_MODEL_POLY5, focal, focal, 0, {k1, k2}, attr}; + lfLensCalibTCA tcac = {LF_TCA_MODEL_LINEAR, focal, {kr, kb}, attr}; + lens->RemoveCalibrations(); #else lfLensCalibDistortion dc = {LF_DIST_MODEL_POLY5, focal, {k1, k2}}; + lfLensCalibTCA tcac = {LF_TCA_MODEL_LINEAR, focal, {kr, kb}}; #endif - // FIXME: Wrong autoscale, cf. https://github.com/lensfun/lensfun/issues/945 lens->AddCalibDistortion(&dc); - - lfLensCalibTCA tcac = {LF_TCA_MODEL_LINEAR, focal, {kr, kb}}; lens->AddCalibTCA(&tcac); + // FIXME: Wrong autoscale, cf. https://github.com/lensfun/lensfun/issues/945 lf->initModifier(fullWidth, fullHeight, 1, lens, focal, aperture); } @@ -281,6 +283,10 @@ delete ldb; ldb = nullptr; } + if (default_lens) { + delete default_lens; + default_lens = nullptr; + } } const lfCamera* LC_lensfun::findCamera( @@ -368,7 +374,11 @@ std::cout << "Lensfun: fallback to the default lens" << std::endl; } +#if (LF_VERSION >= 0x00035f00) // 0.3.95 + const float crop = camera ? camera->CropFactor : 1.0f; +#else const float crop = camera ? camera->CropFactor : lens->CropFactor; +#endif initModifier(fullWidth, fullHeight, crop, lens, focal, aperture); } @@ -553,4 +563,3 @@ srcRectParams[2] = bottomMost - topMost; srcRectParams[3] = rightMost - leftMost; } - diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/tiff/GNUmakefile lightzone-4.2.3/lightcrafts/jnisrc/tiff/GNUmakefile --- lightzone-4.2.2/lightcrafts/jnisrc/tiff/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/tiff/GNUmakefile 2021-04-17 01:19:49.000000000 +0000 @@ -1,7 +1,3 @@ -ROOT:= ../../.. -COMMON_DIR:= $(ROOT)/lightcrafts -include $(COMMON_DIR)/mk/platform.mk - ## # Build rules ## diff -Nru lightzone-4.2.2/lightcrafts/jnisrc/tiff/lctiff.mk lightzone-4.2.3/lightcrafts/jnisrc/tiff/lctiff.mk --- lightzone-4.2.2/lightcrafts/jnisrc/tiff/lctiff.mk 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/jnisrc/tiff/lctiff.mk 2021-04-17 01:19:49.000000000 +0000 @@ -1,7 +1,3 @@ -ROOT:= ../../.. -COMMON_DIR:= $(ROOT)/lightcrafts -include $(COMMON_DIR)/mk/platform.mk - HIGH_PERFORMANCE:= 1 ifeq ($(PLATFORM),MacOSX) USE_ICC_HERE:= 1 @@ -12,9 +8,7 @@ # Uncomment to compile in debug mode. #DEBUG:= true -JNI_EXTRA_INCLUDES:= $(shell $(PKGCFG) --cflags libtiff-4) -JNI_EXTRA_LINK:= $(shell $(PKGCFG) --libs-only-l libtiff-4) -JNI_EXTRA_LDFLAGS:= $(shell $(PKGCFG) --libs-only-L libtiff-4) +JNI_EXTRA_PKGCFG:= libtiff-4 JNI_WINDOWS_LINK:= -Wl,-Bdynamic -lLCJNI -Wl,-Bstatic -lstdc++ JNI_LINUX_LINK:= -lLCJNI -lstdc++ JNI_MACOSX_LINK:= ../jniutils/libLCJNI.a @@ -27,6 +21,7 @@ com.lightcrafts.image.libs.LCTIFFReader \ com.lightcrafts.image.libs.LCTIFFWriter +ROOT:= ../../.. include ../jni.mk # vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/lightcrafts/lightcrafts.iml lightzone-4.2.3/lightcrafts/lightcrafts.iml --- lightzone-4.2.2/lightcrafts/lightcrafts.iml 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/lightcrafts.iml 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff -Nru lightzone-4.2.2/lightcrafts/mk/auto_dep.mk lightzone-4.2.3/lightcrafts/mk/auto_dep.mk --- lightzone-4.2.2/lightcrafts/mk/auto_dep.mk 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/mk/auto_dep.mk 2021-04-17 01:19:49.000000000 +0000 @@ -14,12 +14,12 @@ ## # We need to use an architecture-specific INCLUDES, but since dependencies # are generated once regardless of the number of architectures, we have to - # pick one, so we pick PPC. Strictly speaking, this isn't the right thing do - # do since it means the X86 compile will depend on PPC includes, but in + # pick one, so we pick ARM. Strictly speaking, this isn't the right thing do + # do since it means the X86 compile will depend on ARM includes, but in # practice it's OK because this is only for dependency generation, not code # generation. ## - AUTO_DEP_FLAGS+= $(INCLUDES_PPC) + AUTO_DEP_FLAGS+= $(INCLUDES_ARM) else AUTO_DEP_FLAGS+= $(INCLUDES) endif @@ -37,15 +37,7 @@ AUTO_DEP_FLAGS+= -maltivec endif -ifeq ($(findstring MacOSX10.2.8,$(AUTO_DEP_FLAGS)),MacOSX10.2.8) - ## - # There aren't gcc 4.0 headers for the 10.2.8 SDK, so use gcc 3.3 to generate - # the dependencies. - ## - AUTO_DEP_CC:= gcc-3.3 -else - AUTO_DEP_CC:= $(CC) -endif +AUTO_DEP_CC:= $(CC) MAKEDEPEND:= $(AUTO_DEP_CC) $(AUTO_DEP_FLAGS) diff -Nru lightzone-4.2.2/lightcrafts/mk/executable.mk lightzone-4.2.3/lightcrafts/mk/executable.mk --- lightzone-4.2.2/lightcrafts/mk/executable.mk 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/mk/executable.mk 2021-04-17 01:19:49.000000000 +0000 @@ -26,8 +26,8 @@ # platform only, the makefile can replace "EXTRA" with one of "MACOSX", # "WINDOWS" or "LINUX". # -# In addition to the above, there are also EXEC_PPC_CFLAGS, EXEC_PPC_DEFINES, -# and EXEC_PPC_LDFLAGS for PowerPC-specific directives, and EXEC_X86_CFLAGS, +# In addition to the above, there are also EXEC_ARM_CFLAGS, EXEC_ARM_DEFINES, +# and EXEC_ARM_LDFLAGS for arm64-specific directives, and EXEC_X86_CFLAGS, # EXEC_X86_DEFINES, and EXEC_X86_LDFLAGS for Intel-specific directives. # # If a makefile needs to override how the TARGET is build, it can do: @@ -52,7 +52,7 @@ include $(COMMON_DIR)/mk/platform.mk ifeq ($(UNIVERSAL),1) - CFLAGS_PPC:= $(PLATFORM_CFLAGS_PPC) $(EXEC_EXTRA_CFLAGS) + CFLAGS_ARM:= $(PLATFORM_CFLAGS_ARM) $(EXEC_EXTRA_CFLAGS) CFLAGS_X86:= $(PLATFORM_CFLAGS_X86) $(EXEC_EXTRA_CFLAGS) else CFLAGS:= $(PLATFORM_CFLAGS) $(EXEC_EXTRA_CFLAGS) @@ -70,16 +70,16 @@ LDFLAGS+= $(EXEC_MACOSX_LDFLAGS) LINK+= $(EXEC_MACOSX_LINK) ifeq ($(UNIVERSAL),1) - CFLAGS_PPC+= $(EXEC_MACOSX_CFLAGS) $(EXEC_PPC_CFLAGS) + CFLAGS_ARM+= $(EXEC_MACOSX_CFLAGS) $(EXEC_ARM_CFLAGS) CFLAGS_X86+= $(EXEC_MACOSX_CFLAGS) $(EXEC_X86_CFLAGS) else CFLAGS+= $(EXEC_MACOSX_CFLAGS) - ifeq ($(PROCESSOR),powerpc) - CFLAGS+= $(EXEC_PPC_CFLAGS) - DEFINES+= $(EXEC_PPC_DEFINES) - LDFLAGS+= $(EXEC_PPC_LDFLAGS) + ifeq ($(PROCESSOR),arm64) + CFLAGS+= $(EXEC_ARM_CFLAGS) + DEFINES+= $(EXEC_ARM_DEFINES) + LDFLAGS+= $(EXEC_ARM_LDFLAGS) endif - ifeq ($(PROCESSOR),i386) + ifeq ($(PROCESSOR),x86_64) CFLAGS+= $(EXEC_X86_CFLAGS) DEFINES+= $(EXEC_X86_DEFINES) LDFLAGS+= $(EXEC_X86_LDFLAGS) @@ -112,9 +112,9 @@ endif ifeq ($(UNIVERSAL),1) - CFLAGS_PPC+= $(DEFINES) $(EXEC_PPC_DEFINES) + CFLAGS_ARM+= $(DEFINES) $(EXEC_ARM_DEFINES) CFLAGS_X86+= $(DEFINES) $(EXEC_X86_DEFINES) - INCLUDES_PPC:= $(INCLUDES) $(EXEC_PPC_INCLUDES) + INCLUDES_ARM:= $(INCLUDES) $(EXEC_ARM_INCLUDES) INCLUDES_X86:= $(INCLUDES) $(EXEC_X86_INCLUDES) else CFLAGS+= $(DEFINES) @@ -133,7 +133,7 @@ # These are always defined even when UNIVERSAL is not set so a "make disclean" # will remove them. ## -TARGET_PPC:= $(TARGET_BASE)-ppc$(EXEC_EXT) +TARGET_ARM:= $(TARGET_BASE)-arm$(EXEC_EXT) TARGET_X86:= $(TARGET_BASE)-x86$(EXEC_EXT) ## @@ -147,13 +147,13 @@ ifeq ($(UNIVERSAL),1) -$(TARGET): $(TARGET_PPC) $(TARGET_X86) +$(TARGET): $(TARGET_ARM) $(TARGET_X86) -$(MKDIR) $(TARGET_DIR) - $(LIPO) -create $(TARGET_PPC) $(TARGET_X86) -output $@ + $(LIPO) -create $(TARGET_ARM) $(TARGET_X86) -output $@ ifndef JNI_MANUAL_TARGET -$(TARGET_PPC): $(OBJECTS_PPC) $(LOCAL_RANLIBS) $(BUILT_LIBS) - $(CC_LINK) $(CFLAGS_PPC) $(LDFLAGS) -o $@ *-ppc.o $(LINK) +$(TARGET_ARM): $(OBJECTS_ARM) $(LOCAL_RANLIBS) $(BUILT_LIBS) + $(CC_LINK) $(CFLAGS_ARM) $(LDFLAGS) -o $@ *-arm.o $(LINK) $(TARGET_X86): $(OBJECTS_X86) $(LOCAL_RANLIBS) $(BUILT_LIBS) $(CC_LINK) $(CFLAGS_X86) $(LDFLAGS) -o $@ *-x86.o $(LINK) @@ -177,6 +177,6 @@ $(RM) *.o .*.d distclean mostlyclean: clean - $(RM) $(TARGET) $(TARGET_PPC) $(TARGET_X86) $(POST_TARGET) + $(RM) $(TARGET) $(TARGET_ARM) $(TARGET_X86) $(POST_TARGET) # vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/lightcrafts/mk/platform.mk lightzone-4.2.3/lightcrafts/mk/platform.mk --- lightzone-4.2.2/lightcrafts/mk/platform.mk 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/mk/platform.mk 2021-04-17 01:19:49.000000000 +0000 @@ -16,17 +16,20 @@ ## # Target architecture ## -ifdef TARGET - PROCESSOR:= $(TARGET) +ifdef TARGET_ARCH + PROCESSOR:= $(TARGET_ARCH) else PROCESSOR:= $(shell uname -m) endif + ifeq ($(PROCESSOR),$(filter $(PROCESSOR),i486 i586 i686 i86pc)) PROCESSOR:= i386 -else ifeq ($(PROCESSOR),amd64) +endif +ifeq ($(PROCESSOR),amd64) PROCESSOR:= x86_64 -else ifeq ($(PROCESSOR),"Power Macintosh") - PROCESSOR:= powerpc +endif +ifeq ($(PROCESSOR),$(filter $(PROCESSOR),aarch64 armv8l arm64)) + PROCESSOR:= arm64 endif TOOLS_BIN:= $(abspath $(ROOT)/lightcrafts/tools/bin) @@ -37,7 +40,6 @@ # The default C and C++ compilers for Linux, FreeBSD, or OpenIndiana CC?= gcc CXX?= g++ -PKGCFG:= pkg-config # Unset USE_ICC_HERE if the overall USE_ICC flags != 1. ifneq ($(USE_ICC),1) @@ -62,17 +64,8 @@ # Mac OS X ## ifeq ($(PLATFORM),MacOSX) - MACOSX_MINOR_VERSION:= $(shell sw_vers -productVersion | cut -d. -f2-2) - ifeq ($(MACOSX_MINOR_VERSION),6) # Snow Leopard - CC:= gcc - CXX:= g++ - else ifeq ($(shell expr $(MACOSX_MINOR_VERSION) \>= 12),1) # Sierra - CC:= clang - CXX:= clang++ - else - CC:= clang-omp - CXX:= clang-omp++ - endif + CC:= clang + CXX:= clang++ MACOSX_DEPLOYMENT_TARGET:= $(shell sw_vers -productVersion | cut -d. -f-2) ifndef EXECUTABLE @@ -80,11 +73,15 @@ endif ALTIVEC_CFLAGS:= -DLC_USE_ALTIVEC - ifdef USE_ICC_HERE - ICC_ROOT:= /opt/intel/Compiler/11.1/067 - ICC:= $(ICC_ROOT)/bin/ia32/icc - XIAR:= $(ICC_ROOT)/bin/ia32/xiar + ifeq ($(PROCESSOR),arm64) + BREW_DIR?= /opt/homebrew + else + BREW_DIR?= /usr/local endif + PKGCFG:= $(BREW_DIR)/bin/pkg-config + LIBOMP_PATH?= $(shell $(BREW_DIR)/bin/brew --prefix libomp) + PLATFORM_INCLUDES+= -I$(LIBOMP_PATH)/include + PLATFORM_LDFLAGS+= -L$(LIBOMP_PATH)/lib ## # Don't use := here so other makefiles can override SDKROOT. @@ -94,7 +91,7 @@ MACOSX_ISYSROOT= -isysroot $(SDKROOT) MACOSX_SYSLIBROOT= -Wl,-syslibroot,$(SDKROOT) else - SDKROOT:= + SDKROOT?= MACOSX_ISYSROOT= MACOSX_SYSLIBROOT= endif @@ -104,56 +101,39 @@ # These are to be only the bare minimum architecture-specific CFLAGS. High- # performance CFLAGS go in the FAST_CFLAGS_* variables below. ## - MACOSX_CFLAGS_PPC:= -mcpu=G4 -mtune=G5 - MACOSX_CFLAGS_X86:= -march=core2 + MACOSX_CFLAGS_ARM:= -target arm64-apple-macos11 + MACOSX_CFLAGS_X86:= -target x86_64-apple-macos10.12 ifdef HIGH_PERFORMANCE ## # High-performance architecture-specific CFLAGS only. ## - FAST_CFLAGS_PPC:= -fast -Wstrict-aliasing -Wstrict-aliasing=2 - - ifdef USE_ICC_HERE - FAST_CFLAGS_X86:= -O3 -no-prec-div -xP -fp-model fast=2 -ipo -vec-report0 -fno-common # -fno-alias - ifeq ($(UNIVERSAL),1) - CC_X86:= $(ICC) - AR_X86:= $(XIAR) - CXX_X86:= $(ICC) - else - ifneq ($(PROCESSOR),powerpc) - AR:= $(XIAR) - CC:= $(ICC) - CXX:= $(ICC) - endif - endif - else - FAST_CFLAGS_X86:= -O3 \ + FAST_CFLAGS_ARM:= -O3 # TODO + FAST_CFLAGS_X86:= -O3 \ -fno-trapping-math \ -fomit-frame-pointer \ -msse2 -mfpmath=sse - endif - MACOSX_CFLAGS_PPC+= $(FAST_CFLAGS_PPC) + MACOSX_CFLAGS_ARM+= $(FAST_CFLAGS_ARM) MACOSX_CFLAGS_X86+= $(FAST_CFLAGS_X86) else PLATFORM_CFLAGS+= -Os endif ifeq ($(UNIVERSAL),1) - PLATFORM_CFLAGS_PPC:= $(PLATFORM_CFLAGS) -arch ppc7400 $(MACOSX_CFLAGS_PPC) - PLATFORM_CFLAGS_X86:= $(PLATFORM_CFLAGS) -arch i386 $(MACOSX_CFLAGS_X86) + PLATFORM_CFLAGS_ARM:= $(PLATFORM_CFLAGS) $(MACOSX_CFLAGS_ARM) + PLATFORM_CFLAGS_X86:= $(PLATFORM_CFLAGS) $(MACOSX_CFLAGS_X86) - ifeq ($(PROCESSOR),powerpc) - OTHER_PROCESSOR:= i386 + ifeq ($(PROCESSOR),arm64) + CONFIG_HOST:= $(MACOSX_CFLAGS_ARM) + CONFIG_TARGET:= $(MACOSX_CFLAGS_X86) else - OTHER_PROCESSOR:= powerpc + CONFIG_HOST:= $(MACOSX_CFLAGS_X86) + CONFIG_TARGET:= $(MACOSX_CFLAGS_ARM) endif - DARWIN_RELEASE:= $(shell uname -r) - CONFIG_HOST:= $(PROCESSOR)-apple-darwin$(DARWIN_RELEASE) - CONFIG_TARGET:= $(OTHER_PROCESSOR)-apple-darwin$(DARWIN_RELEASE) else - ifeq ($(PROCESSOR),powerpc) - PLATFORM_CFLAGS+= $(MACOSX_CFLAGS_PPC) - PLATFORM_CFLAGS_PPC:= $(PLATFORM_CFLAGS) + ifeq ($(PROCESSOR),arm64) + PLATFORM_CFLAGS+= $(MACOSX_CFLAGS_ARM) + PLATFORM_CFLAGS_ARM:= $(PLATFORM_CFLAGS) else PLATFORM_CFLAGS+= $(MACOSX_CFLAGS_X86) PLATFORM_CFLAGS_X86:= $(PLATFORM_CFLAGS) @@ -162,7 +142,7 @@ LIPO:= lipo - JAVA_INCLUDES= -I$(SDKROOT)/System/Library/Frameworks/JavaVM.framework/Versions/A/Headers + JAVA_INCLUDES+= -I"$(JAVA_HOME)/include" -I"$(JAVA_HOME)/include/darwin" JNILIB_PREFIX:= lib JNILIB_EXT:= .jnilib DYLIB_PREFIX:= $(JNILIB_PREFIX) @@ -244,7 +224,7 @@ else PLATFORM_CFLAGS+= -O3 \ -fno-trapping-math \ - -fomit-frame-pointer + -fomit-frame-pointer endif else PLATFORM_CFLAGS+= -Os @@ -273,7 +253,7 @@ ifeq ($(PROCESSOR),$(filter $(PROCESSOR),x86_64 i386)) PLATFORM_CFLAGS+= $(SSE_FLAGS) - else ifeq ($(PROCESSOR),$(filter $(PROCESSOR),aarch64 armv8l)) + else ifeq ($(PROCESSOR),arm64) PLATFORM_CFLAGS+= -march=armv8-a else ifeq ($(PROCESSOR),$(filter $(PROCESSOR),armhf armv7l)) PLATFORM_CFLAGS+= -march=armv7-a @@ -297,6 +277,8 @@ DYLIB_PREFIX:= $(JNILIB_PREFIX) DYLIB_EXT:= $(JNILIB_EXT) + PKGCFG:= pkg-config + ifeq ($(PLATFORM),Linux) JAVA_INCLUDES:= -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux NUM_PROCESSORS:= $(shell grep '^processor' /proc/cpuinfo | wc -l) diff -Nru lightzone-4.2.2/lightcrafts/mk/sources.mk lightzone-4.2.3/lightcrafts/mk/sources.mk --- lightzone-4.2.2/lightcrafts/mk/sources.mk 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/mk/sources.mk 2021-04-17 01:19:49.000000000 +0000 @@ -10,41 +10,33 @@ # If architecture-specific versions of CC, CXX, and CFLAGS aren't set, just # copy them from the architecture-neutral values. ## - ifndef CC_PPC - CC_PPC:= $(CC) + ifndef CC_ARM + CC_ARM:= $(CC) endif ifndef CC_X86 CC_X86:= $(CC) endif - ifndef CXX_PPC - CXX_PPC:= $(CXX) + ifndef CXX_ARM + CXX_ARM:= $(CXX) endif ifndef CXX_X86 CXX_X86:= $(CXX) endif - ifndef CFLAGS_PPC - CFLAGS_PPC= $(CFLAGS) + ifndef CFLAGS_ARM + CFLAGS_ARM= $(CFLAGS) endif ifndef CFLAGS_X86 CFLAGS_X86= $(CFLAGS) endif - ifndef INCLUDES_PPC - INCLUDES_PPC= $(INCLUDES) + ifndef INCLUDES_ARM + INCLUDES_ARM= $(INCLUDES) endif ifndef INCLUDES_X86 INCLUDES_X86= $(INCLUDES) endif - ## - # gcc-3.3 doesn't permit specifying -o with -c so we have to let it generate - # the default .o and then rename it. We set this variable to know whether - # we're dealing with gcc-3.3 and thus have to deal with this case. (We only - # need to use gcc-3.3 for the PowerPC-half of Mac OS X Universal builds.) - ## - GCC_33_PPC:= $(findstring 3.3,$(CC_PPC)) - endif # UNIVERSAL ## @@ -85,17 +77,17 @@ OBJECTS:= $(C_OBJECTS) $(OC_OBJECTS) \ $(CXX_OBJECTS) $(OCXX_OBJECTS) ifeq ($(UNIVERSAL),1) - C_OBJECTS_PPC:= $(C_SOURCES:.c=-ppc.o) + C_OBJECTS_ARM:= $(C_SOURCES:.c=-arm.o) C_OBJECTS_X86:= $(C_SOURCES:.c=-x86.o) - CXX_OBJECTS_PPC:= $(CXX_SOURCES:.cpp=-ppc.o) + CXX_OBJECTS_ARM:= $(CXX_SOURCES:.cpp=-arm.o) CXX_OBJECTS_X86:= $(CXX_SOURCES:.cpp=-x86.o) - OC_OBJECTS_PPC:= $(OC_SOURCES:.m=-ppc.o) + OC_OBJECTS_ARM:= $(OC_SOURCES:.m=-arm.o) OC_OBJECTS_X86:= $(OC_SOURCES:.m=-x86.o) - OCXX_OBJECTS_PPC:= $(OCXX_SOURCES:.mm=-ppc.o) + OCXX_OBJECTS_ARM:= $(OCXX_SOURCES:.mm=-arm.o) OCXX_OBJECTS_X86:= $(OCXX_SOURCES:.mm=-x86.o) - OBJECTS_PPC:= $(C_OBJECTS_PPC) $(OC_OBJECTS_PPC) \ - $(CXX_OBJECTS_PPC) $(OCXX_OBJECTS_PPC) + OBJECTS_ARM:= $(C_OBJECTS_ARM) $(OC_OBJECTS_ARM) \ + $(CXX_OBJECTS_ARM) $(OCXX_OBJECTS_ARM) OBJECTS_X86:= $(C_OBJECTS_X86) $(OC_OBJECTS_X86) \ $(CXX_OBJECTS_X86) $(OCXX_OBJECTS_X86) endif @@ -105,17 +97,9 @@ # given architecture. ## ifeq ($(UNIVERSAL),1) - ifdef SDKROOT_PPC - %-ppc.o : SDKROOT:= $(SDKROOT_PPC) - %-ppc : SDKROOT:= $(SDKROOT_PPC) - ifdef GCC_33_PPC - %-ppc.o : export NEXT_ROOT:= $(SDKROOT_PPC) - %-ppc : export NEXT_ROOT:= $(SDKROOT_PPC) - %-ppc.o : MACOSX_ISYSROOT:= - %-ppc : MACOSX_ISYSROOT:= - %-ppc.o : MACOSX_SYSLIBROOT:= - %-ppc : MACOSX_SYSLIBROOT:= - endif + ifdef SDKROOT_ARM + %-arm.o : SDKROOT:= $(SDKROOT_ARM) + %-arm : SDKROOT:= $(SDKROOT_ARM) endif ifdef SDKROOT_X86 %-x86.o : SDKROOT:= $(SDKROOT_X86) @@ -131,21 +115,11 @@ ifeq ($(UNIVERSAL),1) -ifdef GCC_33_PPC -%-ppc.o : %.c - $(CC_PPC) -c $(CFLAGS_PPC) $(INCLUDES_PPC) $< && mv $*.o $@ -else -%-ppc.o : %.c - $(CC_PPC) -c $(CFLAGS_PPC) $(INCLUDES_PPC) -o $@ $< -endif +%-arm.o : %.c + $(CC_ARM) -c $(CFLAGS_ARM) $(INCLUDES_ARM) -o $@ $< -ifdef GCC_33_PPC -%-ppc.o : %.cpp - $(CXX_PPC) -c $(CFLAGS_PPC) $(INCLUDES_PPC) $< && mv $*.o $@ -else -%-ppc.o : %.cpp - $(CXX_PPC) -c $(CFLAGS_PPC) $(INCLUDES_PPC) -o $@ $< -endif +%-arm.o : %.cpp + $(CXX_ARM) -c $(CFLAGS_ARM) $(INCLUDES_ARM) -o $@ $< %-x86.o : %.c $(CC_X86) -c $(CFLAGS_X86) $(INCLUDES_X86) -o $@ $< @@ -166,21 +140,11 @@ ## ifeq ($(UNIVERSAL),1) -ifdef GCC_33_PPC -%-ppc.o : %.m - $(CC_PPC) -c $(filter-out -fast,$(CFLAGS_PPC)) $(INCLUDES_PPC) $< && mv $*.o $@ -else -%-ppc.o : %.m - $(CC_PPC) -c $(filter-out -fast,$(CFLAGS_PPC)) $(INCLUDES_PPC) -o $@ $< -endif +%-arm.o : %.m + $(CC_ARM) -c $(filter-out -fast,$(CFLAGS_ARM)) $(INCLUDES_ARM) -o $@ $< -ifdef GCC_33_PPC -%-ppc.o : %.mm - $(CXX_PPC) -c $(filter-out -fast,$(CFLAGS_PPC)) $(INCLUDES_PPC) $< && mv $*.o $@ -else -%-ppc.o : %.mm - $(CXX_PPC) -c $(filter-out -fast,$(CFLAGS_PPC)) $(INCLUDES_PPC) -o $@ $< -endif +%-arm.o : %.mm + $(CXX_ARM) -c $(filter-out -fast,$(CFLAGS_ARM)) $(INCLUDES_ARM) -o $@ $< %-x86.o : %.m $(CC_X86) -c $(filter-out -fast,$(CFLAGS_X86)) $(INCLUDES_X86) -o $@ $< @@ -204,27 +168,27 @@ ## ifeq ($(UNIVERSAL),1) - CC_LINK_PPC:= $(CC_PPC) + CC_LINK_ARM:= $(CC_ARM) CC_LINK_X86:= $(CC_X86) ifdef CXX_SOURCES - CC_LINK_PPC:= $(CXX_PPC) + CC_LINK_ARM:= $(CXX_ARM) CC_LINK_X86:= $(CXX_X86) - endif + endif ifdef OCXX_SOURCES - CC_LINK_PPC:= $(CXX_PPC) + CC_LINK_ARM:= $(CXX_ARM) CC_LINK_X86:= $(CXX_X86) endif - %-ppc : CFLAGS:= $(CFLAGS_PPC) + %-arm : CFLAGS:= $(CFLAGS_ARM) %-x86 : CFLAGS:= $(CFLAGS_X86) - %-ppc : CC_LINK:= $(CC_LINK_PPC) + %-arm : CC_LINK:= $(CC_LINK_ARM) %-x86 : CC_LINK:= $(CC_LINK_X86) - %-ppc$(JNILIB_EXT) : CFLAGS:= $(CFLAGS_PPC) + %-arm$(JNILIB_EXT) : CFLAGS:= $(CFLAGS_ARM) %-x86$(JNILIB_EXT) : CFLAGS:= $(CFLAGS_X86) - %-ppc$(JNILIB_EXT) : CC_LINK:= $(CC_LINK_PPC) + %-arm$(JNILIB_EXT) : CC_LINK:= $(CC_LINK_ARM) %-x86$(JNILIB_EXT) : CC_LINK:= $(CC_LINK_X86) else # UNIVERSAL diff -Nru lightzone-4.2.2/lightcrafts/resources/com/lightcrafts/ui/editor/assoc/resources/CompatibleCameras.properties lightzone-4.2.3/lightcrafts/resources/com/lightcrafts/ui/editor/assoc/resources/CompatibleCameras.properties --- lightzone-4.2.2/lightcrafts/resources/com/lightcrafts/ui/editor/assoc/resources/CompatibleCameras.properties 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/resources/com/lightcrafts/ui/editor/assoc/resources/CompatibleCameras.properties 2021-04-17 01:19:49.000000000 +0000 @@ -2,9 +2,11 @@ CANON\ EOS\ 200D = CANON\ EOS\ REBEL\ SL2 CANON\ EOS\ 800D = CANON\ EOS\ REBEL\ T7I CANON\ EOS\ 8000D = CANON\ EOS\ 760D +CANON\ EOS\ KISS\ X9I = CANON\ EOS\ REBEL\ T7I CANON\ EOS\ KISS\ X8I = CANON\ EOS\ 750D CANON\ EOS\ KISS\ X7I = CANON\ EOS\ 700D CANON\ EOS\ KISS\ X6I = CANON\ EOS\ 650D +CANON\ EOS\ KISS\ X9 = CANON\ EOS\ REBEL\ SL2 CANON\ EOS\ KISS\ X7 = CANON\ EOS\ 100D CANON\ EOS\ KISS\ X5 = CANON\ EOS\ 600D CANON\ EOS\ KISS\ X4 = CANON\ EOS\ 550D @@ -63,7 +65,6 @@ SONY\ DSC-RX100M5 = SONY\ DSC-RX100M2 SONY\ ILCE-6500 = SONY\ ILCE-6300 SONY\ ILCE-5000 = SONY\ ILCE-3000 -SONY\ ILCE-5000 = SONY\ ILCE-3000 SONY\ ILCE-5100 = SONY\ ILCE-3000 SONY\ ILCE-6000 = SONY\ ILCE-3000 SONY\ ILCE-QX1 = SONY\ ILCE-3000 diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/app/batch/BatchConfiguratorPresenter.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/app/batch/BatchConfiguratorPresenter.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/app/batch/BatchConfiguratorPresenter.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/app/batch/BatchConfiguratorPresenter.java 2021-04-17 01:19:49.000000000 +0000 @@ -8,7 +8,6 @@ import com.lightcrafts.image.export.ImageFileExportOptions; import com.lightcrafts.ui.base.BasePresenter; import lombok.Getter; -import lombok.val; import java.io.File; @@ -53,8 +52,12 @@ final File configDirectory = config.directory != null && config.directory.isDirectory() ? config.directory : new File(System.getProperty("java.io.tmpdir")); - val directory = mView.chooseDirectory(configDirectory); + var directory = mView.chooseDirectory(configDirectory); + if (directory == null) return; + if (!directory.isDirectory()) { + directory = directory.getParentFile(); + } if (directory != null) { config.directory = directory; dirLabelText = directory.getName(); diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/app/CheckForUpdate.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/app/CheckForUpdate.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/app/CheckForUpdate.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/app/CheckForUpdate.java 2021-04-17 01:19:49.000000000 +0000 @@ -565,7 +565,7 @@ public static void main( String[] args ) throws MalformedURLException { val isAvailable = checkIfUpdateIsAvailable( - "4.2.2", new URL("file:///tmp/lightzone/appcast.xml")); + "4.2.3", new URL("file:///tmp/lightzone/appcast.xml")); System.exit(isAvailable ? 0 : 1); } } diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/app/ComboFrame.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/app/ComboFrame.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/app/ComboFrame.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/app/ComboFrame.java 2021-04-17 01:19:49.000000000 +0000 @@ -46,7 +46,6 @@ import java.beans.PropertyChangeEvent; import java.beans.PropertyChangeListener; import java.io.File; -import java.lang.reflect.Method; import java.net.URL; import java.util.List; import java.util.*; @@ -155,11 +154,6 @@ LastActiveComboFrame = this; } - // Mac OS X 10.7 Lion Fullscreen Support - if (Platform.isMac()) { - enableFullScreenMode(this); - } - // Java 1.6 will just use a cofee cup otherwise... setIconImage(IconImage); @@ -1380,19 +1374,4 @@ }; // *** Helper interface implementations for use in the browser: end. *** - - public static void enableFullScreenMode(Window window) { - try { - Class clazz = Class.forName("com.apple.eawt.FullScreenUtilities"); - Class[] param = new Class[]{Window.class, Boolean.TYPE}; - Method method = clazz.getMethod("setWindowCanFullScreen", param); - method.invoke(clazz, window, true); - } - catch (ClassNotFoundException e0) { - // Just ignore it, may be the OS is older than 10.7 Lion - } - catch (Exception e) { - System.err.println("Could not enable OS X fullscreen mode " + e); - } - } } diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/image/metadata/GPSDirectory.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/image/metadata/GPSDirectory.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/image/metadata/GPSDirectory.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/image/metadata/GPSDirectory.java 2021-04-17 01:19:49.000000000 +0000 @@ -4,7 +4,7 @@ package com.lightcrafts.image.metadata; import com.lightcrafts.image.metadata.providers.GPSProvider; -import com.lightcrafts.image.metadata.values.UnsignedRationalMetaValue; +import com.lightcrafts.image.metadata.values.RationalMetaValue; import com.lightcrafts.utils.Rational; import com.lightcrafts.utils.tuple.Pair; import lombok.Getter; @@ -104,7 +104,7 @@ if (metaValue == null) { return null; } - val values = ((UnsignedRationalMetaValue) metaValue).getRationalValues(); + val values = ((RationalMetaValue) metaValue).getRationalValues(); if (values.length != 3) { return null; } diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/model/ImageEditor/BlendedOperation.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/model/ImageEditor/BlendedOperation.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/model/ImageEditor/BlendedOperation.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/model/ImageEditor/BlendedOperation.java 2021-04-17 01:19:49.000000000 +0000 @@ -135,7 +135,6 @@ @Override public RGBColorSelection getColorSelectionAt(Point2D p) { - System.out.println("setColorSelection(): " + p); this.clickPoint = p; settingsChanged(); diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/model/ImageEditor/ImageEditorDisplay.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/model/ImageEditor/ImageEditorDisplay.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/model/ImageEditor/ImageEditorDisplay.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/model/ImageEditor/ImageEditorDisplay.java 2021-04-17 01:19:49.000000000 +0000 @@ -316,8 +316,6 @@ } val g2d = (Graphics2D)g; - g2d.setBackground(backgroundColor); - g2d.clearRect(0, 0, getWidth(), getHeight()); HiDpi.resetTransformScaleOf(g2d); @@ -370,7 +368,7 @@ drawBackgroundTile(g2d, tileIndex, tileClipRect, tile); } g2d.setClip(originalClipBounds); // reset the clip rect - + repaint(); updateTileComputingStatus(tileIndices, originalClipBounds); } @@ -381,7 +379,7 @@ if (!validImageBackground[tx][ty] && tile != null) { validImageBackground[tx][ty] = true; - g2d.drawImage(getBackgroundTile(tile, tx, ty), null, tile.getMinX(), tile.getMinY()); + g2d.drawImage(getBackgroundTile(tile, tx, ty), tile.getMinX(), tile.getMinY(), this); return; } @@ -389,20 +387,20 @@ val backgroundTileCache = backgroundCache.get(new CacheKey(tx, ty)); if (backgroundTileCache != null) { // Recycle the background tile - g2d.drawImage(backgroundTileCache, null, source.tileXToX(tx), source.tileYToY(ty)); + g2d.drawImage(backgroundTileCache, source.tileXToX(tx), source.tileYToY(ty), this); return; } val cachedTiles = availableTiles(new Point(tx, ty)); if (cachedTiles.length == 1 && cachedTiles[0] != null) { val cachedTile = (WritableRaster) cachedTiles[0]; - g2d.drawImage(getBackgroundTile(cachedTile, tx, ty), null, - cachedTile.getMinX(), cachedTile.getMinY()); + g2d.drawImage(getBackgroundTile(cachedTile, tx, ty), + cachedTile.getMinX(), cachedTile.getMinY(), this); return; } if (backgroundImage instanceof BufferedImage) { - g2d.drawImage((BufferedImage) backgroundImage, null, tileClipRect.x, tileClipRect.y); + g2d.drawImage((BufferedImage) backgroundImage, tileClipRect.x, tileClipRect.y, this); return; } diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/platform/Platform.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/platform/Platform.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/platform/Platform.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/platform/Platform.java 2021-04-17 01:19:49.000000000 +0000 @@ -24,6 +24,8 @@ import java.io.IOException; import java.lang.management.ManagementFactory; import java.net.InetAddress; +import java.net.URL; +import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.LinkOption; import java.nio.file.Path; @@ -310,9 +312,17 @@ * an active internet connection and can reach the specified host. */ public boolean hasInternetConnectionTo( String hostName ) { + final int timeoutMs = 1000; try { final InetAddress address = InetAddress.getByName(hostName); - return address.isReachable(2000); + if (address.isReachable(timeoutMs)) return true; + + final var url = new URL("https://" + hostName); + final URLConnection conn = url.openConnection(); + conn.setConnectTimeout(timeoutMs); + conn.connect(); + conn.getInputStream().close(); + return true; } catch (Throwable t) { return false; @@ -330,21 +340,6 @@ } /** - * Detect whether the specified key is currently pressed. The purpose of - * a platform-specific implementation of this method is to distinguish - * the synthetic key events generated by auto-repeat. This default - * implementation just throws UnsupportedOperationException. - */ - @SuppressWarnings( { "UnusedDeclaration", "MethodMayBeStatic" } ) - public boolean isKeyPressed( int keyCode ) - throws UnsupportedOperationException - { - throw new UnsupportedOperationException( - "The current Platform does not implement isKeyPressed()" - ); - } - - /** * Checks whether the given {@link File} is special in some way on the * platform. * diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/editor/ModeManager.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/editor/ModeManager.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/editor/ModeManager.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/editor/ModeManager.java 2021-04-17 01:19:49.000000000 +0000 @@ -4,7 +4,6 @@ package com.lightcrafts.ui.editor; import com.lightcrafts.model.CropBounds; -import com.lightcrafts.platform.Platform; import com.lightcrafts.ui.crop.CropListener; import com.lightcrafts.ui.crop.CropMode; import com.lightcrafts.ui.mode.AbstractMode; @@ -27,6 +26,8 @@ import java.awt.event.KeyEvent; import java.awt.geom.AffineTransform; import java.awt.geom.NoninvertibleTransformException; +import java.util.HashMap; +import java.util.Map; /** * Handle switching among all the Modes. Lots of special mode-transition @@ -44,35 +45,41 @@ // Look for the special key events to enter and exit the pan mode, // taking care to filter out auto-repeat events: - private static int PanKeyCode = Platform.isMac() - ? KeyEvent.VK_META - : KeyEvent.VK_CONTROL; + private static final Map PanKeyCodeAndTime = new HashMap<>() {{ + put(KeyEvent.VK_SPACE, 0L); + put(KeyEvent.VK_META, 0L); + put(KeyEvent.VK_CONTROL, 0L); + }}; - private KeyEventPostProcessor panModeKeyProcessor = + private final KeyEventPostProcessor panModeKeyProcessor = new KeyEventPostProcessor() { private boolean isPanMode; @Override public boolean postProcessKeyEvent(KeyEvent e) { val wasPanMode = (overlay.peekMode() == transientPanMode); - if (e.getKeyCode() == PanKeyCode) { - if (e.getID() == KeyEvent.KEY_PRESSED) { - isPanMode = true; - } - if (e.getID() == KeyEvent.KEY_RELEASED) { - if (Platform.isMac()) { - isPanMode = false; - } - else { + val keyCode = e.getKeyCode(); + if (PanKeyCodeAndTime.containsKey(keyCode)) { + switch (e.getID()) { + case KeyEvent.KEY_PRESSED: + PanKeyCodeAndTime.replace(keyCode, e.getWhen()); + isPanMode = true; + break; + case KeyEvent.KEY_RELEASED: // Detect and ignore auto-repeat release events - isPanMode = Platform.getPlatform().isKeyPressed(PanKeyCode); - } + val lastPressed = PanKeyCodeAndTime.get(keyCode); + if (e.getWhen() > lastPressed + 1) { + PanKeyCodeAndTime.replace(keyCode, 0L); + isPanMode = PanKeyCodeAndTime.values().stream() + .anyMatch(t -> t > 0L); + } + break; } } - if (isPanMode && ! wasPanMode) { + if (! wasPanMode && isPanMode) { overlay.pushMode(transientPanMode); } - if (wasPanMode && ! isPanMode) { + else if (wasPanMode && ! isPanMode) { overlay.popMode(); } return false; // these key events have other interpretations diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/mode/DropperMode.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/mode/DropperMode.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/mode/DropperMode.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/mode/DropperMode.java 2021-04-17 01:19:49.000000000 +0000 @@ -2,6 +2,12 @@ package com.lightcrafts.ui.mode; +import com.lightcrafts.ui.operation.OpControl; +import com.lightcrafts.utils.WeakHashSet; +import com.lightcrafts.utils.awt.geom.HiDpi; + +import javax.swing.*; +import javax.swing.event.MouseInputListener; import java.awt.*; import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; @@ -12,11 +18,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Set; -import javax.swing.*; -import javax.swing.event.MouseInputListener; - -import com.lightcrafts.ui.operation.OpControl; -import com.lightcrafts.utils.WeakHashSet; /** * A Mode that draws nothing but handles mouse events, designed to go with @@ -53,7 +54,8 @@ m_overlay.addMouseListener( new MouseAdapter() { public void mousePressed( MouseEvent me ) { - notifyPointSelected( me.getPoint() ); + Point p = HiDpi.imageSpacePointFrom(me.getPoint()); + notifyPointSelected(p); } } ); diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/region/ClonePointMode.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/region/ClonePointMode.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/region/ClonePointMode.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/region/ClonePointMode.java 2021-04-17 01:19:49.000000000 +0000 @@ -2,6 +2,8 @@ package com.lightcrafts.ui.region; +import com.lightcrafts.utils.awt.geom.HiDpi; + import java.awt.*; import java.awt.event.MouseEvent; @@ -22,7 +24,7 @@ } public void mouseReleased(MouseEvent event) { - Point p = event.getPoint(); + Point p = HiDpi.imageSpacePointFrom(event.getPoint()); update(p, false); model.setMajorMode(new EditCurveMode(this, curve)); if (! currentPoint.equals(startPoint)) { @@ -35,13 +37,13 @@ } public void mouseMoved(MouseEvent event) { - Point p = event.getPoint(); + Point p = HiDpi.imageSpacePointFrom(event.getPoint()); update(p, true); autoscroll(event); } public void mouseDragged(MouseEvent event) { - Point p = event.getPoint(); + Point p = HiDpi.imageSpacePointFrom(event.getPoint()); update(p, true); autoscroll(event); } diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/region/curves/ClonePoint.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/region/curves/ClonePoint.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/region/curves/ClonePoint.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/region/curves/ClonePoint.java 2021-04-17 01:19:49.000000000 +0000 @@ -2,6 +2,7 @@ package com.lightcrafts.ui.region.curves; +import com.lightcrafts.utils.awt.geom.HiDpi; import com.lightcrafts.utils.xml.XMLException; import com.lightcrafts.utils.xml.XmlNode; @@ -18,7 +19,9 @@ */ class ClonePoint implements Cloneable { - private static final int NominalRadius = 8; // rendering size at 1-1 + // rendering size at 1-1 + private static final float NominalRadius = + 8 * (float) Math.sqrt(HiDpi.defaultTransform.getDeterminant()); private Stroke foregroundStroke = new BasicStroke(1f); private Stroke backgroundStroke = new BasicStroke(3f); diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/region/curves/EllipticCurve.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/region/curves/EllipticCurve.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/ui/region/curves/EllipticCurve.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/ui/region/curves/EllipticCurve.java 2021-04-17 01:19:49.000000000 +0000 @@ -1,4 +1,5 @@ /* Copyright (C) 2005-2011 Fabio Riccardi */ +/* Copyright (C) 2021- Masahiro Kitagawa */ package com.lightcrafts.ui.region.curves; @@ -28,24 +29,25 @@ private final static Preferences Prefs = Preferences.userRoot().node( "/com/lightcrafts/ui/region/curves" ); + // Default values for new EllipticCurves. See EllipticCurve(Point2D). private final static String EllipticXTag = "SpotX"; private final static String EllipticYTag = "SpotY"; private final static String EllipticCloneXTag = "SpotCloneX"; private final static String EllipticCloneYTag = "SpotCloneY"; - // Minimum value for width and height of EllipticCurves - private static final double MIN_RADIUS = 0.5; - // Default width for new EllipticCurves. + // Minimum value for half width and half height of EllipticCurves + private static final double MIN_RADIUS = 0.5; + + // Default half width for new EllipticCurves. private static double EllipticX = Math.max(Prefs.getDouble(EllipticXTag, 30), MIN_RADIUS); - // Default height for new EllipticCurves. + + // Default half height for new EllipticCurves. private static double EllipticY = Math.max(Prefs.getDouble(EllipticYTag, 30), MIN_RADIUS); // Default clone point offsets for new EllipticCurves. - private static double EllipticCloneX = - Prefs.getDouble(EllipticCloneXTag, EllipticX); - private static double EllipticCloneY = - Prefs.getDouble(EllipticCloneYTag, 0); + private static double EllipticCloneX = Prefs.getDouble(EllipticCloneXTag, EllipticX); + private static double EllipticCloneY = Prefs.getDouble(EllipticCloneYTag, 0); // If the width is directly manipulated (user interaction, restore), then // stop updating it automatically when control points change. @@ -62,72 +64,66 @@ } public EllipticCurve(Point2D center) { - Point2D upperLeft = new Point2D.Double( + final var upperLeft = new Point2D.Double( center.getX() - EllipticX, center.getY() - EllipticY ); - Point2D lowerRight = new Point2D.Double( + final var lowerRight = new Point2D.Double( center.getX() + EllipticX, center.getY() + EllipticY ); addPoint(upperLeft); addPoint(lowerRight); - double scale = Math.min( - 2 * Math.abs(EllipticX), 2 * Math.abs(EllipticY) - ); + final var scale = Math.min(2 * Math.abs(EllipticX), 2 * Math.abs(EllipticY)); setWidth((float) scale / 6); - Point2D clonePt = new Point2D.Double( + final var clonePt = new Point2D.Double( center.getX() + EllipticCloneX, center.getY() + EllipticCloneY ); setClonePoint(clonePt); } + @Override public void movePoint(int n, Point2D p) { // Preserve the center: - Point2D p1 = (Point2D) points.get(0); - Point2D p2 = (Point2D) points.get(1); - double centerX = (p1.getX() + p2.getX()) / 2; - double centerY = (p1.getY() + p2.getY()) / 2; + final var center = getCenter(); + final var centerX = center.getX(); + final var centerY = center.getY(); // Update the control points to the complementary locations, to // preserve the center. - double dx = Math.max(p.getX() - centerX, MIN_RADIUS); - double dy = Math.max(p.getY() - centerY, MIN_RADIUS); - Point2D q1 = new Point2D.Double(centerX + dx, centerY + dy); - super.movePoint(n, q1); - Point2D q2 = new Point2D.Double(centerX - dx, centerY - dy); - super.movePoint(1 - n, q2); - - // Revise the default values. - p1 = (Point2D) points.get(0); - p2 = (Point2D) points.get(1); - EllipticX = (p1.getX() - p2.getX()) / 2; - EllipticY = (p1.getY() - p2.getY()) / 2; + final var sign = (n == 0) ? -1.0 : 1.0; + EllipticX = Math.max(sign * (p.getX() - centerX), MIN_RADIUS); + EllipticY = Math.max(sign * (p.getY() - centerY), MIN_RADIUS); + final var tl = new Point2D.Double(centerX - EllipticX, centerY - EllipticY); + final var br = new Point2D.Double(centerX + EllipticX, centerY + EllipticY); + super.movePoint(0, tl); + super.movePoint(1, br); // Adjust the inner curve width automatically. if (! isManualWidthSet) { Rectangle2D bounds = shape.getBounds(); - double width = bounds.getWidth(); - double height = bounds.getHeight(); - double scale = Math.min(width, height); + final var width = bounds.getWidth(); + final var height = bounds.getHeight(); + final var scale = Math.min(width, height); setWidth((float) scale / 6); isManualWidthSet = false; } savePrefs(); } + @Override public void setClonePoint(Point2D p) { super.setClonePoint(p); // Revise the default clone point offsets. - Point2D p1 = (Point2D) points.get(0); - Point2D p2 = (Point2D) points.get(1); - EllipticCloneX = p.getX() - (p1.getX() + p2.getX()) / 2; - EllipticCloneY = p.getY() - (p1.getY() + p2.getY()) / 2; + final var center = getCenter(); + EllipticCloneX = p.getX() - center.getX(); + EllipticCloneY = p.getY() - center.getY(); savePrefs(); } + @Override public void setInnerShape(Point2D p) { super.setInnerShape(p); isManualWidthSet = true; @@ -136,6 +132,7 @@ /** * This is the only Curve implementation with a fixed number of points. */ + @Override public boolean allowsAddRemovePoints() { return false; } @@ -143,47 +140,50 @@ /** * This is the only Curve that is valid with only two points. */ + @Override public boolean isValidShape() { - return (points.size() == 2); + if (points.size() != 2) return false; + var tl = points.get(0); + var br = points.get(1); + return br.getX() - tl.getX() >= MIN_RADIUS && br.getY() - tl.getY() >= MIN_RADIUS; } /** * Turn off automatic inner curve width updates if this Curve has ever * been saved. */ + @Override public void restore(XmlNode node) throws XMLException { super.restore(node); isManualWidthSet = true; } + @Override void updateShape() { if (points.size() != 2) { shape = new GeneralPath(); return; } - Point2D p1 = (Point2D) points.get(0); - Point2D p2 = (Point2D) points.get(1); - double minX = Math.min(p1.getX(), p2.getX()); - double minY = Math.min(p1.getY(), p2.getY()); - double maxX = Math.max(p1.getX(), p2.getX()); - double maxY = Math.max(p1.getY(), p2.getY()); - - double x = minX; - double y = minY; - double w = maxX - minX; - double h = maxY - minY; + final var tl = points.get(0); + final var br = points.get(1); + + final var minX = tl.getX(); + final var minY = tl.getY(); - Rectangle2D bounds = new Rectangle2D.Double(x, y, w, h); + final var w = br.getX() - minX; + final var h = br.getY() - minY; - Shape ne = new Arc2D.Double(bounds, -45, 180, Arc2D.OPEN); - Shape sw = new Arc2D.Double(bounds, 135, 180, Arc2D.OPEN); + final var bounds = new Rectangle2D.Double(minX, minY, w, h); + + final Shape ne = new Arc2D.Double(bounds, -45, 180, Arc2D.OPEN); + final Shape sw = new Arc2D.Double(bounds, 135, 180, Arc2D.OPEN); segments.clear(); segments.add(ne); segments.add(sw); - float[] start = new float[6]; + final var start = new float[6]; ne.getPathIterator(null).currentSegment(start); GeneralPath path = new GeneralPath(); @@ -201,4 +201,14 @@ Prefs.putDouble(EllipticCloneXTag, EllipticCloneX); Prefs.putDouble(EllipticCloneYTag, EllipticCloneY); } + + private Point2D getCenter() { + return midpoint(points.get(0), points.get(1)); + } + + private static Point2D midpoint(Point2D p1, Point2D p2) { + final var centerX = (p1.getX() + p2.getX()) / 2; + final var centerY = (p1.getY() + p2.getY()) / 2; + return new Point2D.Double(centerX, centerY); + } } diff -Nru lightzone-4.2.2/lightcrafts/src/com/lightcrafts/utils/LCArrays.java lightzone-4.2.3/lightcrafts/src/com/lightcrafts/utils/LCArrays.java --- lightzone-4.2.2/lightcrafts/src/com/lightcrafts/utils/LCArrays.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/src/com/lightcrafts/utils/LCArrays.java 2021-04-17 01:19:49.000000000 +0000 @@ -3,6 +3,7 @@ package com.lightcrafts.utils; import java.lang.reflect.Array; +import java.nio.ByteBuffer; /** * Various array utilities. @@ -49,8 +50,12 @@ * @param destPos The starting position in the destination array. * @param length The number of bytes to be copied. */ - public static native void copy( int[] src, int srcPos, - byte[] dest, int destPos, int length ); + public static void copy( int[] src, int srcPos, + byte[] dest, int destPos, int length ) { + ByteBuffer.wrap(dest, destPos, length) + .asIntBuffer() + .put(src, srcPos, length / 8); + } /** * Copies the bytes from the source array to the destination array @@ -72,8 +77,12 @@ * @param destPos The starting position in the destination array. * @param length The number of bytes to be copied. */ - public static native void copy( short[] src, int srcPos, - byte[] dest, int destPos, int length ); + public static void copy( short[] src, int srcPos, + byte[] dest, int destPos, int length ) { + ByteBuffer.wrap(dest, destPos, length) + .asShortBuffer() + .put(src, srcPos, length / 4); + } /** * Copies the bytes from the source array to the destination array @@ -95,8 +104,12 @@ * @param destPos The starting position in the destination array. * @param length The number of bytes to be copied. */ - public static native void copy( byte[] src, int srcPos, - int[] dest, int destPos, int length ); + public static void copy( byte[] src, int srcPos, + int[] dest, int destPos, int length ) { + ByteBuffer.wrap(src, srcPos, length) + .asIntBuffer() + .get(dest, destPos, length / 8); + } /** * Copies the bytes from the source array to the destination array @@ -118,8 +131,12 @@ * @param destPos The starting position in the destination array. * @param length The number of bytes to be copied. */ - public static native void copy( byte[] src, int srcPos, - short[] dest, int destPos, int length ); + public static void copy( byte[] src, int srcPos, + short[] dest, int destPos, int length ) { + ByteBuffer.wrap(src, srcPos, length) + .asShortBuffer() + .get(dest, destPos, length / 4); + } /** * Resize an array. @@ -135,22 +152,18 @@ * and new lengths are the same, returns the old array as-is. */ public static Object resize( Object oldArray, int newLength ) { - final Class c = oldArray.getClass(); + final Class c = oldArray.getClass(); if ( !c.isArray() ) throw new IllegalArgumentException( "given non-array" ); final int oldLength = Array.getLength( oldArray ); if ( oldLength == newLength ) return oldArray; - final Class type = c.getComponentType(); + final Class type = c.getComponentType(); final Object newArray = Array.newInstance( type, newLength ); final int copyLength = Math.min( oldLength, newLength ); if ( copyLength > 0 ) System.arraycopy( oldArray, 0, newArray, 0, copyLength ); return newArray; } - - static { - System.loadLibrary( "LCArrays" ); - } } /* vim:set et sw=4 ts=4: */ diff -Nru lightzone-4.2.2/lightcrafts/test/src/com/lightcrafts/utils/LCArraysTest.java lightzone-4.2.3/lightcrafts/test/src/com/lightcrafts/utils/LCArraysTest.java --- lightzone-4.2.2/lightcrafts/test/src/com/lightcrafts/utils/LCArraysTest.java 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/test/src/com/lightcrafts/utils/LCArraysTest.java 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,50 @@ +/* Copyright (C) 2021- Masahiro Kitagawa */ + +package com.lightcrafts.utils; + +import static org.junit.Assert.assertArrayEquals; + +public class LCArraysTest { + private final byte[] byteArray = {1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}; + private final int[] intArray = {0x1000000, 0, 0x1000000, 0}; + private final short[] shortArray = {0x100, 0, 0, 0, 0x100, 0, 0, 0}; + + @org.junit.Test + public void testCopyIntArrayToByteArray() { + final int length = intArray.length * 4; + var dstByteArray = new byte[length]; + LCArrays.copy(intArray, 0, dstByteArray, 0, length); + assertArrayEquals(dstByteArray, byteArray); + } + + @org.junit.Test + public void testCopyShortArrayToByteArray() { + final int length = intArray.length * 2; + var dstByteArray = new byte[length]; + LCArrays.copy(shortArray, 0, dstByteArray, 0, length); + assertArrayEquals(dstByteArray, byteArray); + } + + @org.junit.Test + public void testCopyByteArrayToIntArray() { + final int length = byteArray.length; + var dstIntArray = new int[length]; + LCArrays.copy(byteArray, 0, dstIntArray, 0, length); + assertArrayEquals(dstIntArray, intArray); + } + + @org.junit.Test + public void testCopyByteArrayToShortArray() { + final int length = byteArray.length; + var dstShortArray = new short[length]; + LCArrays.copy(byteArray, 0, dstShortArray, 0, length); + assertArrayEquals(dstShortArray, shortArray); + } + + @org.junit.Test + public void testResize() { + assertArrayEquals((short[]) LCArrays.resize(shortArray, 4), + new short[]{0x100, 0, 0, 0}); + // TODO + } +} \ No newline at end of file diff -Nru lightzone-4.2.2/lightcrafts/test/src/test2.iml lightzone-4.2.3/lightcrafts/test/src/test2.iml --- lightzone-4.2.2/lightcrafts/test/src/test2.iml 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/test/src/test2.iml 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff -Nru lightzone-4.2.2/lightcrafts/version.txt lightzone-4.2.3/lightcrafts/version.txt --- lightzone-4.2.2/lightcrafts/version.txt 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/lightcrafts/version.txt 2021-04-17 01:19:49.000000000 +0000 @@ -1 +1 @@ -4.2.2 +4.2.3 diff -Nru lightzone-4.2.2/linux/build.xml lightzone-4.2.3/linux/build.xml --- lightzone-4.2.2/linux/build.xml 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/build.xml 2021-04-17 01:19:49.000000000 +0000 @@ -51,6 +51,11 @@ + + + + + @@ -60,10 +65,6 @@ - - - - @@ -74,7 +75,7 @@ - + @@ -189,10 +190,6 @@ - - - - diff -Nru lightzone-4.2.2/linux/jnisrc/GNUmakefile lightzone-4.2.3/linux/jnisrc/GNUmakefile --- lightzone-4.2.2/linux/jnisrc/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/jnisrc/GNUmakefile 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -include ../../lightcrafts/mk/recurse.mk - -# vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/linux/jnisrc/jni-linux.mk lightzone-4.2.3/linux/jnisrc/jni-linux.mk --- lightzone-4.2.2/linux/jnisrc/jni-linux.mk 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/jnisrc/jni-linux.mk 1970-01-01 00:00:00.000000000 +0000 @@ -1,10 +0,0 @@ -## -# JNI Linux Makefile -## - -## -# ROOT is defined by the makefile including this one. -## -include $(ROOT)/lightcrafts/jnisrc/jni.mk - -# vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/linux/jnisrc/libLinux/GNUmakefile lightzone-4.2.3/linux/jnisrc/libLinux/GNUmakefile --- lightzone-4.2.2/linux/jnisrc/libLinux/GNUmakefile 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/jnisrc/libLinux/GNUmakefile 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -## -# libLinux Makefile -## - -TARGET_BASE:= Linux - -# Uncomment to compile in debug mode. -#DEBUG:= true - -ROOT:= ../../.. -JNI_LINUX_INCLUDES:= -I$(ROOT)/linux -JNI_LINUX_LINK:= -lLCJNI -lX11 - -JAVAH_CLASSES:= com.lightcrafts.platform.linux.LinuxKeyUtil - -include ../jni-linux.mk - -# vim:set noet sw=8 ts=8: diff -Nru lightzone-4.2.2/linux/jnisrc/libLinux/KeyUtil.cpp lightzone-4.2.3/linux/jnisrc/libLinux/KeyUtil.cpp --- lightzone-4.2.2/linux/jnisrc/libLinux/KeyUtil.cpp 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/jnisrc/libLinux/KeyUtil.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,97 +0,0 @@ -/* Copyright (C) 2005-2011 Fabio Riccardi */ - -// standard -#include -#include -#include -#ifdef DEBUG -#include -#endif - -// local -#include "LC_JNIUtils.h" -#ifndef AUTO_DEP -#include "javah/com_lightcrafts_platform_linux_LinuxKeyUtil.h" -#endif - -using namespace std; -using namespace LightCrafts; - -////////// JNI //////////////////////////////////////////////////////////////// - -#define LinuxKeyUtil_METHOD(method) \ - name4(Java_,com_lightcrafts_platform_linux_LinuxKeyUtil,_,method) - -/** - * Find the index of the first nonzero bit in the given char, where the least - * significant bit has index zero. Used in keysToKeycode(). - */ -int indexOfBit(char c) { -#ifdef DEBUG - assert(c != 0); -#endif - int n; - for (n=0; n<8; n++) { - if (c & 0x01) { - break; - } - c = c >> 1; - } - return n; -} - -/** - * Determine the KeyCode of the first pressed key in the 32-character keys - * array returned from XQueryKeymap. - */ -KeyCode keysToKeycode(char *keys) { - int n; - for (n=0; n<32; n++) { - if (keys[n] != 0) { - return 8 * n + indexOfBit(keys[n]); - } - } - return 0; -} - -/** - * The X11 Display reference is a global variable, initialized in the first - * call to isKeyPressed(). - */ -Display *display = NULL; - -/** - * Detect whether the key corresponding to the given virtual key code is - * currently pressed. (For ASCII characters, the virtual key code is just - * the ASCII code.) - */ -JNIEXPORT jboolean JNICALL LinuxKeyUtil_METHOD(isKeyPressed) - ( JNIEnv *env, jclass, jint keyCode ) -{ - if (display == NULL) { - display = XOpenDisplay(NULL); - } - if (display == NULL) { - cerr << "LinuxPlatform cannot connect to X server " - << XDisplayName(NULL) - << endl; - return false; - } - char keys[32]; - XQueryKeymap(display, keys); - - KeyCode code = keysToKeycode(keys); - - KeySym sym = XkbKeycodeToKeysym(display, code, 0, 0); - - bool pressed = keyCode == sym; -#ifdef DEBUG - cout << "keyCode " << keyCode << " is "; - if ( ! pressed ) { - cout << "not "; - } - cout << "pressed" << endl; -#endif - return pressed; -} -/* vim:set et sw=4 ts=4: */ diff -Nru lightzone-4.2.2/linux/lightzone.changes lightzone-4.2.3/linux/lightzone.changes --- lightzone-4.2.2/linux/lightzone.changes 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/lightzone.changes 2021-04-17 01:19:49.000000000 +0000 @@ -1,3 +1,8 @@ +Sat Apr 17 2021 - KITAGAWA Masahiro + +- Upstream sync to 4.2.3 + +------------------------------------------------------------------- Tue Nov 24 2020 - KITAGAWA Masahiro - Upstream sync to 4.2.2 diff -Nru lightzone-4.2.2/linux/lightzone.spec lightzone-4.2.3/linux/lightzone.spec --- lightzone-4.2.2/linux/lightzone.spec 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/lightzone.spec 2021-04-17 01:19:49.000000000 +0000 @@ -5,7 +5,7 @@ Name: lightzone # Do not use hyphens in Version tag. OBS doesn't handle it properly. # Use 4.1.0.beta2 for betas and 4.1.0.0 for final, since RPM sorts A-Z before 0-9. -Version: 4.2.2 +Version: 4.2.3 Release: 0%{?dist} License: BSD-3-Clause Summary: Open-source professional-level digital darkroom software @@ -62,7 +62,7 @@ Requires: libgomp1 %endif -BuildRequires: javapackages-tools, %{libX11_devel}, ant, autoconf, gcc, gcc-c++, make, git, javahelp2, %{lcms2_devel}, lensfun-devel, %{libjpeg_devel}, libtiff-devel, %{pkg_config}, rsync +BuildRequires: javapackages-tools, java-11-openjdk-devel, %{libX11_devel}, ant, autoconf, gcc, gcc-c++, make, git, javahelp2, %{lcms2_devel}, lensfun-devel, %{libjpeg_devel}, libtiff-devel, %{pkg_config}, rsync Requires: javahelp2, lcms2, lensfun, %{xmllint} BuildRoot: %{_tmppath}/%{name}-%{version}-build diff -Nru lightzone-4.2.2/linux/linux.iml lightzone-4.2.3/linux/linux.iml --- lightzone-4.2.2/linux/linux.iml 1970-01-01 00:00:00.000000000 +0000 +++ lightzone-4.2.3/linux/linux.iml 2021-04-17 01:19:49.000000000 +0000 @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff -Nru lightzone-4.2.2/linux/PKGBUILD lightzone-4.2.3/linux/PKGBUILD --- lightzone-4.2.2/linux/PKGBUILD 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/PKGBUILD 2021-04-17 01:19:49.000000000 +0000 @@ -1,29 +1,45 @@ # Maintainer: KITAGAWA Masahiro -pkgname="lightzone" -pkgver=4.2.2 +pkgname=lightzone +pkgver=4.2.3 pkgrel=0 pkgdesc="Open-source professional-level digital darkroom software" url="http://lightzoneproject.org/" license=('custom:BSD-3-Clause') arch=('x86_64') -depends=('java-runtime=13' 'javahelp2' 'lensfun' 'lcms2' 'libjpeg-turbo' 'libtiff' 'libxml2') -makedepends=('java-environment=13' 'apache-ant' 'autoconf' 'gcc' 'make' 'git' 'libx11' 'pkgconf' 'rsync' 'javahelp2' 'lcms2' 'libjpeg-turbo' 'libtiff') +depends=('java-runtime=11' + 'javahelp2' + 'lcms2' + 'lensfun' + 'libjpeg-turbo' + 'libtiff' + 'libxml2') +makedepends=('java-environment=11' + 'ant' + 'autoconf' + 'gcc' + 'make' + 'git' + 'libx11' + 'pkgconf' + 'rsync' + 'javahelp2' + 'lcms2' + 'libjpeg-turbo' + 'libtiff') -#vcsname=${pkgname}-${pkgver} -#source=("${vcsname}.tar.bz2") vcsname='LightZone' source=("git+https://github.com/ktgw0316/${vcsname}.git") md5sums=('SKIP') build() { # Set $JAVA_HOME and $ANT_HOME - if [ -d /usr/lib/jvm/java-13-jdk ]; then - export JAVA_HOME=/usr/lib/jvm/java-13-jdk - elif [ -d /usr/lib/jvm/java-13-openjdk ]; then - export JAVA_HOME=/usr/lib/jvm/java-13-openjdk + if [ -d /usr/lib/jvm/java-11-jdk ]; then + export JAVA_HOME=/usr/lib/jvm/java-11-jdk + elif [ -d /usr/lib/jvm/java-11-openjdk ]; then + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk else - export JAVA_HOME=/usr/lib/jvm/java-13-openjdk + export JAVA_HOME=/usr/lib/jvm/default fi # Parallel compilation fails, so disable it diff -Nru lightzone-4.2.2/linux/products/lightzone lightzone-4.2.3/linux/products/lightzone --- lightzone-4.2.2/linux/products/lightzone 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/products/lightzone 2021-04-17 01:19:49.000000000 +0000 @@ -2,7 +2,7 @@ # # LightZone startscript # -echo Starting LightZone version 4.2.2 ... +echo Starting LightZone version 4.2.3 ... echo with options : ${@} java -version diff -Nru lightzone-4.2.2/linux/src/com/lightcrafts/platform/linux/LinuxKeyUtil.java lightzone-4.2.3/linux/src/com/lightcrafts/platform/linux/LinuxKeyUtil.java --- lightzone-4.2.2/linux/src/com/lightcrafts/platform/linux/LinuxKeyUtil.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/src/com/lightcrafts/platform/linux/LinuxKeyUtil.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -/* Copyright (C) 2005-2011 Fabio Riccardi */ - -package com.lightcrafts.platform.linux; - -/** - * A JNI wrapper for the Linux implementation of Platform.isKeyPressed(). - */ -class LinuxKeyUtil { - - static native boolean isKeyPressed( int keyCode ); - - static { - System.loadLibrary( "Linux" ); - } -} diff -Nru lightzone-4.2.2/linux/src/com/lightcrafts/platform/linux/LinuxPlatform.java lightzone-4.2.3/linux/src/com/lightcrafts/platform/linux/LinuxPlatform.java --- lightzone-4.2.2/linux/src/com/lightcrafts/platform/linux/LinuxPlatform.java 2020-12-02 13:51:38.000000000 +0000 +++ lightzone-4.2.3/linux/src/com/lightcrafts/platform/linux/LinuxPlatform.java 2021-04-17 01:19:49.000000000 +0000 @@ -114,11 +114,6 @@ return getColorProfiles(); } - @Override - public boolean isKeyPressed(int keyCode) { - return LinuxKeyUtil.isKeyPressed(keyCode); - } - private static synchronized Collection getColorProfiles() { if (Profiles == null) { Profiles = new HashSet<>();