#include int main() { float a[] = { 1.0f, 2.0f, 3.0f, 4.0f }; float b[] = { 1.1f, 2.2f, 3.3f, 4.4f }; float s = 2.0f; for (int i = 0; i < 4; i++) printf("a[%d] = %f\n", i, a[i]); __asm { movups xmm1, a ;//load 4 floats = 128 bit movups xmm2, b ;//load 4 floats = 128 bit addps xmm1, xmm2 ;//add all 4 floats in parallel addss xmm1, s ;//add scalar s to 1st float, 2nd - 4th float remain unchanged movups a, xmm1 ;//store into a } for (int i = 0; i < 4; i++) printf("a[%d] = %f\n", i, a[i]); return 0; }