Submit #2456 ยป 0001-AVX-Instruction-Set-Support.patch
sys/config/LINT64 | ||
---|---|---|
# CPU_ENABLE_EST enables support for Enhanced SpeedStep technology
|
||
# found in Pentium(tm) M processors.
|
||
#
|
||
# CPU_ENABLE_AVX enables AVX instruction set.
|
||
# This option requires gcc version 4.7 or later.
|
||
#
|
||
#options CPU_DISABLE_SSE
|
||
options CPU_ENABLE_EST
|
||
options CPU_ENABLE_AVX #Requires gcc 4.7 or later.
|
||
#####################################################################
|
||
# COMPATIBILITY OPTIONS
|
sys/config/X86_64_GENERIC | ||
---|---|---|
cpu HAMMER_CPU
|
||
ident X86_64_GENERIC
|
||
maxusers 0
|
||
#options CPU_ENABLE_AVX #Support AVX Instructions (GCC4.7 Req)
|
||
makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols
|
||
sys/cpu/x86_64/include/cpufunc.h | ||
---|---|---|
}
|
||
static __inline void
|
||
xsetbv(u_int ecx, u_int eax, u_int edx)
|
||
{
|
||
__asm __volatile("xsetbv"
|
||
:
|
||
: "a" (eax), "c" (ecx), "d" (edx));
|
||
}
|
||
static __inline void
|
||
load_cr0(u_long data)
|
||
{
|
||
sys/cpu/x86_64/include/npx.h | ||
---|---|---|
#ifndef _CPU_NPX_H_
|
||
#define _CPU_NPX_H_
|
||
#include "opt_cpu.h"
|
||
#ifndef _SYS_TYPES_H_
|
||
#include <sys/types.h>
|
||
#endif
|
||
... | ... | |
u_char sv_pad[220];
|
||
} __attribute__((aligned(16)));
|
||
#ifdef CPU_ENABLE_AVX
|
||
struct saveymm {
|
||
u_char xsavedata[832];
|
||
} __attribute__((aligned(64)));
|
||
#endif
|
||
union savefpu {
|
||
struct save87 sv_87;
|
||
struct savexmm sv_xmm;
|
||
#ifdef CPU_ENABLE_AVX
|
||
struct saveymm sv_ymm;
|
||
#endif
|
||
};
|
||
/*
|
sys/cpu/x86_64/include/specialreg.h | ||
---|---|---|
#define CR0_MP 0x00000002 /* "Math" (fpu) Present */
|
||
#define CR0_EM 0x00000004 /* EMulate FPU instructions. (trap ESC only) */
|
||
#define CR0_TS 0x00000008 /* Task Switched (if MP, trap ESC and WAIT) */
|
||
#define CR0_PG 0x80000000 /* PaGing enable */
|
||
#define CR0_PG 0x80000000 /* Paging enable */
|
||
/*
|
||
* Bits in 486 special registers:
|
||
*/
|
||
#define CR0_NE 0x00000020 /* Numeric Error enable (EX16 vs IRQ13) */
|
||
#define CR0_WP 0x00010000 /* Write Protect (honor page protect in
|
||
all modes) */
|
||
#define CR0_WP 0x00010000 /* Write Protect (honor page protect in all modes) */
|
||
#define CR0_AM 0x00040000 /* Alignment Mask (set to enable AC flag) */
|
||
#define CR0_NW 0x20000000 /* Not Write-through */
|
||
#define CR0_CD 0x40000000 /* Cache Disable */
|
||
/*
|
||
* Bits in PPro special registers
|
||
* Bits in CR4 special register
|
||
*/
|
||
#define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */
|
||
#define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */
|
||
#define CR4_TSD 0x00000004 /* Time stamp disable */
|
||
#define CR4_DE 0x00000008 /* Debugging extensions */
|
||
#define CR4_PSE 0x00000010 /* Page size extensions */
|
||
#define CR4_PAE 0x00000020 /* Physical address extension */
|
||
#define CR4_MCE 0x00000040 /* Machine check enable */
|
||
#define CR4_PGE 0x00000080 /* Page global enable */
|
||
#define CR4_PCE 0x00000100 /* Performance monitoring counter enable */
|
||
#define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */
|
||
#define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */
|
||
#define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */
|
||
#define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */
|
||
#define CR4_TSD 0x00000004 /* Time stamp disable */
|
||
#define CR4_DE 0x00000008 /* Debugging extensions */
|
||
#define CR4_PSE 0x00000010 /* Page size extensions */
|
||
#define CR4_PAE 0x00000020 /* Physical address extension */
|
||
#define CR4_MCE 0x00000040 /* Machine check enable */
|
||
#define CR4_PGE 0x00000080 /* Page global enable */
|
||
#define CR4_PCE 0x00000100 /* Performance monitoring counter enable */
|
||
#define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */
|
||
#define CR4_XMM 0x00000400 /* Enable SIMD/MMX2 to use except 16 */
|
||
#define CR4_XSAVE 0x00040000 /* Enable XSave (for AVX Instructions)*/
|
||
/*
|
||
* Bits in x86_64 special registers. EFER is 64 bits wide.
|
||
*/
|
||
#define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */
|
||
#define EFER_LME 0x000000100 /* Long mode enable (R/W) */
|
||
#define EFER_LMA 0x000000400 /* Long mode active (R) */
|
||
#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */
|
||
#define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */
|
||
#define EFER_LME 0x000000100 /* Long mode enable (R/W) */
|
||
#define EFER_LMA 0x000000400 /* Long mode active (R) */
|
||
#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */
|
||
/*
|
||
* CPUID instruction features register
|
||
... | ... | |
#define CPUID2_SSE42 0x00100000
|
||
#define CPUID2_X2APIC 0x00200000
|
||
#define CPUID2_POPCNT 0x00800000
|
||
#define CPUID2_AESNI 0x02000000
|
||
#define CPUID2_RDRAND 0x40000000
|
||
#define CPUID2_AESNI 0x02000000 /* AES Instruction Set */
|
||
#define CPUID2_XSAVE 0x04000000 /* XSave supported by CPU */
|
||
#define CPUID2_OSXSAVE 0x08000000 /* XSave and AVX supported by OS */
|
||
#define CPUID2_AVX 0x10000000 /* AVX instruction set support */
|
||
#define CPUID2_F16C 0x20000000 /* CVT16 instruction set support */
|
||
#define CPUID2_RDRAND 0x40000000 /* RdRand. On chip random numbers */
|
||
#define CPUID2_VMM 0x80000000 /* AMD 25481 2.34 page 11 */
|
||
/*Bits related to the XFEATURE_ENABLED_MASK control register*/
|
||
#define CPU_XFEATURE_X87 0x00000001
|
||
#define CPU_XFEATURE_SSE 0x00000002
|
||
#define CPU_XFEATURE_YMM 0x00000004
|
||
/*
|
||
* Important bits in the AMD extended cpuid flags
|
||
*/
|
sys/platform/pc64/conf/options | ||
---|---|---|
# x86_64 SMP options
|
||
CPU_ENABLE_EST opt_cpu.h
|
||
CPU_ENABLE_AVX opt_cpu.h
|
||
# The cpu type
|
||
#
|
sys/platform/pc64/include/md_var.h | ||
---|---|---|
extern u_int amd_feature2;
|
||
extern u_int cpu_clflush_line_size;
|
||
extern u_int cpu_fxsr;
|
||
extern u_int cpu_xsave;
|
||
extern u_int cpu_high;
|
||
extern u_int cpu_id;
|
||
extern u_int cpu_procinfo;
|
sys/platform/pc64/x86_64/initcpu.c | ||
---|---|---|
char cpu_vendor[20]; /* CPU Origin code */
|
||
u_int cpu_vendor_id; /* CPU vendor ID */
|
||
u_int cpu_fxsr; /* SSE enabled */
|
||
u_int cpu_xsave; /* AVX enabled by OS*/
|
||
u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */
|
||
u_int cpu_clflush_line_size = 32; /* Default CLFLUSH line size */
|
||
... | ... | |
{
|
||
uint64_t msr;
|
||
/*Check for FXSR and SSE support and enable if available.*/
|
||
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
|
||
load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
|
||
cpu_fxsr = hw_instruction_sse = 1;
|
||
}
|
||
#if defined(CPU_ENABLE_AVX)
|
||
/*Check for XSAVE and AVX support and enable if available.*/
|
||
if ((cpu_feature2 & CPUID2_AVX) && (cpu_feature2 & CPUID2_XSAVE)
|
||
&& (cpu_feature & CPUID_SSE)){
|
||
load_cr4(rcr4() | CR4_XSAVE);
|
||
|
||
/* Adjust size of savefpu in npx.h before adding to mask.*/
|
||
xsetbv(0,CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM,0);
|
||
cpu_xsave = 1;
|
||
}
|
||
#endif
|
||
if (cpu_vendor_id == CPU_VENDOR_AMD) {
|
||
switch((cpu_id & 0xFF0000)) {
|
||
case 0x100000:
|
sys/platform/pc64/x86_64/npx.c | ||
---|---|---|
*/
|
||
#include "opt_debug_npx.h"
|
||
#include "opt_cpu.h"
|
||
#include <sys/param.h>
|
||
#include <sys/systm.h>
|
||
... | ... | |
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
|
||
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
|
||
#endif
|
||
#ifdef CPU_ENABLE_AVX
|
||
#define xsave(eax,edx,addr) __asm __volatile("xsave %0" : "=m" (*(addr)) : "a" (eax),"d" (edx) )
|
||
#define xrstor(eax,edx,addr) __asm __volatile("xrstor %0" : : "m" (*(addr)), "a" (eax), "d" (edx))
|
||
#endif
|
||
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
|
||
: : "n" (CR0_TS) : "ax")
|
||
#define stop_emulating() __asm("clts")
|
||
... | ... | |
void
|
||
npxinit(u_short control)
|
||
{
|
||
static union savefpu dummy __aligned(16);
|
||
/*64-Byte alignment required for xsave*/
|
||
static union savefpu dummy __aligned(64);
|
||
/*
|
||
* fninit has the same h/w bugs as fnsave. Use the detoxified
|
||
... | ... | |
static void
|
||
fpusave(union savefpu *addr)
|
||
{
|
||
#ifdef CPU_ENABLE_AVX
|
||
if (cpu_xsave)
|
||
|
||
xsave(CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM ,0,addr);
|
||
else
|
||
#endif
|
||
#ifndef CPU_DISABLE_SSE
|
||
if (cpu_fxsr)
|
||
fxsave(addr);
|
||
... | ... | |
static void
|
||
fpurstor(union savefpu *addr)
|
||
{
|
||
#ifdef CPU_ENABLE_AVX
|
||
if (cpu_xsave){
|
||
xrstor(CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM ,0,addr);
|
||
}else
|
||
#endif
|
||
#ifndef CPU_DISABLE_SSE
|
||
if (cpu_fxsr) {
|
||
fpu_clean_state();
|