0001-AVX-Instruction-Set-Support.patch

adamsaka, 11/15/2012 06:18 AM

Download (12 KB)

View differences:

sys/config/LINT64
117 117
# CPU_ENABLE_EST enables support for Enhanced SpeedStep technology
118 118
# found in Pentium(tm) M processors.
119 119
#
120
# CPU_ENABLE_AVX enables AVX instruction set.
121
# This option requires gcc version 4.7 or later.
122
# 
120 123
#options 	CPU_DISABLE_SSE
121 124
options		CPU_ENABLE_EST
125
options 	CPU_ENABLE_AVX   	#Requires gcc 4.7 or later.
122 126

  
123 127
#####################################################################
124 128
# COMPATIBILITY OPTIONS
sys/config/X86_64_GENERIC
10 10
cpu		HAMMER_CPU
11 11
ident		X86_64_GENERIC
12 12
maxusers	0
13
#options	CPU_ENABLE_AVX		#Support AVX Instructions (GCC4.7 Req)
13 14

  
14 15
makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
15 16

  
sys/cpu/x86_64/include/cpufunc.h
579 579
}
580 580

  
581 581
static __inline void
582
xsetbv(u_int ecx, u_int eax, u_int edx)
583
{
584
	__asm __volatile("xsetbv" 
585
			 : 
586
			 : "a" (eax), "c" (ecx), "d" (edx));
587
}
588

  
589
static __inline void
582 590
load_cr0(u_long data)
583 591
{
584 592

  
sys/cpu/x86_64/include/npx.h
46 46
#ifndef _CPU_NPX_H_
47 47
#define	_CPU_NPX_H_
48 48

  
49
#include "opt_cpu.h"
50

  
49 51
#ifndef _SYS_TYPES_H_
50 52
#include <sys/types.h>
51 53
#endif
......
124 126
	u_char sv_pad[220];
125 127
} __attribute__((aligned(16)));
126 128

  
129
#ifdef  CPU_ENABLE_AVX
130
struct	saveymm {
131
	u_char xsavedata[832];
132
} __attribute__((aligned(64)));	
133

  
134
#endif
135

  
127 136
union	savefpu {
128 137
	struct	save87	sv_87;
129 138
	struct	savexmm	sv_xmm;
139
#ifdef CPU_ENABLE_AVX
140
	struct  saveymm sv_ymm;
141
#endif
130 142
};
131 143

  
132 144
/*
sys/cpu/x86_64/include/specialreg.h
41 41
#define	CR0_MP	0x00000002	/* "Math" (fpu) Present */
42 42
#define	CR0_EM	0x00000004	/* EMulate FPU instructions. (trap ESC only) */
43 43
#define	CR0_TS	0x00000008	/* Task Switched (if MP, trap ESC and WAIT) */
44
#define	CR0_PG	0x80000000	/* PaGing enable */
44
#define	CR0_PG	0x80000000	/* Paging enable */
45 45

  
46 46
/*
47 47
 * Bits in 486 special registers:
48 48
 */
49 49
#define	CR0_NE	0x00000020	/* Numeric Error enable (EX16 vs IRQ13) */
50
#define	CR0_WP	0x00010000	/* Write Protect (honor page protect in
51
							   all modes) */
50
#define	CR0_WP	0x00010000	/* Write Protect (honor page protect in	all modes) */
52 51
#define	CR0_AM	0x00040000	/* Alignment Mask (set to enable AC flag) */
53 52
#define	CR0_NW  0x20000000	/* Not Write-through */
54 53
#define	CR0_CD  0x40000000	/* Cache Disable */
55 54

  
56 55
/*
57
 * Bits in PPro special registers
56
 * Bits in CR4 special register
58 57
 */
59
#define	CR4_VME	0x00000001	/* Virtual 8086 mode extensions */
60
#define	CR4_PVI	0x00000002	/* Protected-mode virtual interrupts */
61
#define	CR4_TSD	0x00000004	/* Time stamp disable */
62
#define	CR4_DE	0x00000008	/* Debugging extensions */
63
#define	CR4_PSE	0x00000010	/* Page size extensions */
64
#define	CR4_PAE	0x00000020	/* Physical address extension */
65
#define	CR4_MCE	0x00000040	/* Machine check enable */
66
#define	CR4_PGE	0x00000080	/* Page global enable */
67
#define	CR4_PCE	0x00000100	/* Performance monitoring counter enable */
68
#define	CR4_FXSR 0x00000200	/* Fast FPU save/restore used by OS */
69
#define	CR4_XMM	0x00000400	/* enable SIMD/MMX2 to use except 16 */
58
#define	CR4_VME		0x00000001	/* Virtual 8086 mode extensions */
59
#define	CR4_PVI		0x00000002	/* Protected-mode virtual interrupts */
60
#define	CR4_TSD		0x00000004	/* Time stamp disable */
61
#define	CR4_DE		0x00000008	/* Debugging extensions */
62
#define	CR4_PSE		0x00000010	/* Page size extensions */
63
#define	CR4_PAE		0x00000020	/* Physical address extension */
64
#define	CR4_MCE		0x00000040	/* Machine check enable */
65
#define	CR4_PGE		0x00000080	/* Page global enable */
66
#define	CR4_PCE		0x00000100	/* Performance monitoring counter enable */
67
#define	CR4_FXSR 	0x00000200	/* Fast FPU save/restore used by OS */
68
#define	CR4_XMM	    	0x00000400	/* Enable SIMD/MMX2 to use except 16 */
69
#define	CR4_XSAVE 	0x00040000	/* Enable XSave (for AVX Instructions)*/ 
70 70

  
71 71
/*
72 72
 * Bits in x86_64 special registers.  EFER is 64 bits wide.
73 73
 */
74
#define	EFER_SCE 0x000000001	/* System Call Extensions (R/W) */
75
#define	EFER_LME 0x000000100	/* Long mode enable (R/W) */
76
#define	EFER_LMA 0x000000400	/* Long mode active (R) */
77
#define	EFER_NXE 0x000000800	/* PTE No-Execute bit enable (R/W) */
74
#define	EFER_SCE 	0x000000001	/* System Call Extensions (R/W) */
75
#define	EFER_LME 	0x000000100	/* Long mode enable (R/W) */
76
#define	EFER_LMA 	0x000000400	/* Long mode active (R) */
77
#define	EFER_NXE 	0x000000800	/* PTE No-Execute bit enable (R/W) */
78 78

  
79 79
/*
80 80
 * CPUID instruction features register
......
132 132
#define	CPUID2_SSE42	0x00100000
133 133
#define	CPUID2_X2APIC	0x00200000
134 134
#define	CPUID2_POPCNT	0x00800000
135
#define	CPUID2_AESNI	0x02000000
136
#define	CPUID2_RDRAND	0x40000000
135
#define	CPUID2_AESNI	0x02000000	/* AES Instruction Set */
136
#define	CPUID2_XSAVE    0x04000000	/* XSave supported by CPU */
137
#define	CPUID2_OSXSAVE  0x08000000      /* XSave and AVX supported by OS */
138
#define	CPUID2_AVX	0x10000000      /* AVX instruction set support */
139
#define	CPUID2_F16C	0x20000000	/* CVT16 instruction set support */
140
#define	CPUID2_RDRAND	0x40000000	/* RdRand. On chip random numbers */ 
137 141
#define	CPUID2_VMM	0x80000000	/* AMD 25481 2.34 page 11 */
138 142

  
143
/*Bits related to the XFEATURE_ENABLED_MASK control register*/
144
#define	CPU_XFEATURE_X87	0x00000001
145
#define	CPU_XFEATURE_SSE	0x00000002
146
#define	CPU_XFEATURE_YMM	0x00000004
147

  
139 148
/*
140 149
 * Important bits in the AMD extended cpuid flags
141 150
 */
sys/platform/pc64/conf/options
18 18

  
19 19
# x86_64 SMP options
20 20
CPU_ENABLE_EST		opt_cpu.h
21
CPU_ENABLE_AVX		opt_cpu.h
21 22

  
22 23
# The cpu type
23 24
#
sys/platform/pc64/include/md_var.h
51 51
extern	u_int	amd_feature2;
52 52
extern	u_int	cpu_clflush_line_size;
53 53
extern	u_int	cpu_fxsr;
54
extern	u_int   cpu_xsave;
54 55
extern	u_int	cpu_high;
55 56
extern	u_int	cpu_id;
56 57
extern	u_int	cpu_procinfo;
sys/platform/pc64/x86_64/initcpu.c
62 62
char	cpu_vendor[20];		/* CPU Origin code */
63 63
u_int	cpu_vendor_id;		/* CPU vendor ID */
64 64
u_int	cpu_fxsr;		/* SSE enabled */
65
u_int   cpu_xsave;		/* AVX enabled by OS*/
65 66
u_int	cpu_mxcsr_mask;		/* Valid bits in mxcsr */
66 67
u_int	cpu_clflush_line_size = 32;	/* Default CLFLUSH line size */
67 68

  
......
152 153
{
153 154
	uint64_t msr;
154 155

  
156
	/*Check for FXSR and SSE support and enable if available.*/
155 157
	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
156 158
		load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
157 159
		cpu_fxsr = hw_instruction_sse = 1;
158 160
	}
159 161

  
162
#if defined(CPU_ENABLE_AVX)
163
	/*Check for XSAVE and AVX support and enable if available.*/
164
	if ((cpu_feature2 & CPUID2_AVX) && (cpu_feature2 & CPUID2_XSAVE)
165
	     && (cpu_feature & CPUID_SSE)){
166
		load_cr4(rcr4() | CR4_XSAVE);
167
		
168
		/* Adjust size of savefpu in npx.h before adding to mask.*/ 
169
		xsetbv(0,CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM,0);
170
		cpu_xsave = 1;
171
	}
172
#endif
173

  
160 174
	if (cpu_vendor_id == CPU_VENDOR_AMD) {
161 175
		switch((cpu_id & 0xFF0000)) {
162 176
		case 0x100000:
sys/platform/pc64/x86_64/npx.c
37 37
 */
38 38

  
39 39
#include "opt_debug_npx.h"
40
#include "opt_cpu.h"
40 41

  
41 42
#include <sys/param.h>
42 43
#include <sys/systm.h>
......
76 77
#define	fxrstor(addr)		__asm("fxrstor %0" : : "m" (*(addr)))
77 78
#define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
78 79
#endif
80
#ifdef  CPU_ENABLE_AVX
81
#define xsave(eax,edx,addr)     __asm __volatile("xsave %0" : "=m" (*(addr)) : "a" (eax),"d" (edx) )
82
#define xrstor(eax,edx,addr)	__asm __volatile("xrstor %0" : : "m" (*(addr)), "a" (eax), "d" (edx))
83
#endif
79 84
#define start_emulating()       __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
80 85
				      : : "n" (CR0_TS) : "ax")
81 86
#define stop_emulating()        __asm("clts")
......
96 101
void
97 102
npxinit(u_short control)
98 103
{
99
	static union savefpu dummy __aligned(16);
104
	/*64-Byte alignment required for xsave*/
105
	static union savefpu dummy __aligned(64);
100 106

  
101 107
	/*
102 108
	 * fninit has the same h/w bugs as fnsave.  Use the detoxified
......
405 411
static void
406 412
fpusave(union savefpu *addr)
407 413
{
414
#ifdef CPU_ENABLE_AVX
415
	if (cpu_xsave)
416
		
417
		xsave(CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM ,0,addr);
418
	else
419
#endif
408 420
#ifndef CPU_DISABLE_SSE
409 421
	if (cpu_fxsr)
410 422
		fxsave(addr);
......
545 557
static void
546 558
fpurstor(union savefpu *addr)
547 559
{
560
#ifdef CPU_ENABLE_AVX
561
	if (cpu_xsave){
562
		xrstor(CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM ,0,addr);
563
	}else
564
#endif
548 565
#ifndef CPU_DISABLE_SSE
549 566
	if (cpu_fxsr) {
550 567
		fpu_clean_state();
551
-