Project

General

Profile

Submit #2456 ยป 0001-AVX-Instruction-Set-Support.patch

adamsaka, 11/15/2012 06:18 AM

View differences:

sys/config/LINT64
# CPU_ENABLE_EST enables support for Enhanced SpeedStep technology
# found in Pentium(tm) M processors.
#
# CPU_ENABLE_AVX enables AVX instruction set.
# This option requires gcc version 4.7 or later.
#
#options CPU_DISABLE_SSE
options CPU_ENABLE_EST
options CPU_ENABLE_AVX #Requires gcc 4.7 or later.
#####################################################################
# COMPATIBILITY OPTIONS
sys/config/X86_64_GENERIC
cpu HAMMER_CPU
ident X86_64_GENERIC
maxusers 0
#options CPU_ENABLE_AVX #Support AVX Instructions (GCC4.7 Req)
makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols
sys/cpu/x86_64/include/cpufunc.h
}
static __inline void
xsetbv(u_int ecx, u_int eax, u_int edx)
{
__asm __volatile("xsetbv"
:
: "a" (eax), "c" (ecx), "d" (edx));
}
static __inline void
load_cr0(u_long data)
{
sys/cpu/x86_64/include/npx.h
#ifndef _CPU_NPX_H_
#define _CPU_NPX_H_
#include "opt_cpu.h"
#ifndef _SYS_TYPES_H_
#include <sys/types.h>
#endif
......
u_char sv_pad[220];
} __attribute__((aligned(16)));
#ifdef CPU_ENABLE_AVX
struct saveymm {
u_char xsavedata[832];
} __attribute__((aligned(64)));
#endif
union savefpu {
struct save87 sv_87;
struct savexmm sv_xmm;
#ifdef CPU_ENABLE_AVX
struct saveymm sv_ymm;
#endif
};
/*
sys/cpu/x86_64/include/specialreg.h
#define CR0_MP 0x00000002 /* "Math" (fpu) Present */
#define CR0_EM 0x00000004 /* EMulate FPU instructions. (trap ESC only) */
#define CR0_TS 0x00000008 /* Task Switched (if MP, trap ESC and WAIT) */
#define CR0_PG 0x80000000 /* PaGing enable */
#define CR0_PG 0x80000000 /* Paging enable */
/*
* Bits in 486 special registers:
*/
#define CR0_NE 0x00000020 /* Numeric Error enable (EX16 vs IRQ13) */
#define CR0_WP 0x00010000 /* Write Protect (honor page protect in
all modes) */
#define CR0_WP 0x00010000 /* Write Protect (honor page protect in all modes) */
#define CR0_AM 0x00040000 /* Alignment Mask (set to enable AC flag) */
#define CR0_NW 0x20000000 /* Not Write-through */
#define CR0_CD 0x40000000 /* Cache Disable */
/*
* Bits in PPro special registers
* Bits in CR4 special register
*/
#define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */
#define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */
#define CR4_TSD 0x00000004 /* Time stamp disable */
#define CR4_DE 0x00000008 /* Debugging extensions */
#define CR4_PSE 0x00000010 /* Page size extensions */
#define CR4_PAE 0x00000020 /* Physical address extension */
#define CR4_MCE 0x00000040 /* Machine check enable */
#define CR4_PGE 0x00000080 /* Page global enable */
#define CR4_PCE 0x00000100 /* Performance monitoring counter enable */
#define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */
#define CR4_XMM 0x00000400 /* enable SIMD/MMX2 to use except 16 */
#define CR4_VME 0x00000001 /* Virtual 8086 mode extensions */
#define CR4_PVI 0x00000002 /* Protected-mode virtual interrupts */
#define CR4_TSD 0x00000004 /* Time stamp disable */
#define CR4_DE 0x00000008 /* Debugging extensions */
#define CR4_PSE 0x00000010 /* Page size extensions */
#define CR4_PAE 0x00000020 /* Physical address extension */
#define CR4_MCE 0x00000040 /* Machine check enable */
#define CR4_PGE 0x00000080 /* Page global enable */
#define CR4_PCE 0x00000100 /* Performance monitoring counter enable */
#define CR4_FXSR 0x00000200 /* Fast FPU save/restore used by OS */
#define CR4_XMM 0x00000400 /* Enable SIMD/MMX2 to use except 16 */
#define CR4_XSAVE 0x00040000 /* Enable XSave (for AVX Instructions)*/
/*
* Bits in x86_64 special registers. EFER is 64 bits wide.
*/
#define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */
#define EFER_LME 0x000000100 /* Long mode enable (R/W) */
#define EFER_LMA 0x000000400 /* Long mode active (R) */
#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */
#define EFER_SCE 0x000000001 /* System Call Extensions (R/W) */
#define EFER_LME 0x000000100 /* Long mode enable (R/W) */
#define EFER_LMA 0x000000400 /* Long mode active (R) */
#define EFER_NXE 0x000000800 /* PTE No-Execute bit enable (R/W) */
/*
* CPUID instruction features register
......
#define CPUID2_SSE42 0x00100000
#define CPUID2_X2APIC 0x00200000
#define CPUID2_POPCNT 0x00800000
#define CPUID2_AESNI 0x02000000
#define CPUID2_RDRAND 0x40000000
#define CPUID2_AESNI 0x02000000 /* AES Instruction Set */
#define CPUID2_XSAVE 0x04000000 /* XSave supported by CPU */
#define CPUID2_OSXSAVE 0x08000000 /* XSave and AVX supported by OS */
#define CPUID2_AVX 0x10000000 /* AVX instruction set support */
#define CPUID2_F16C 0x20000000 /* CVT16 instruction set support */
#define CPUID2_RDRAND 0x40000000 /* RdRand. On chip random numbers */
#define CPUID2_VMM 0x80000000 /* AMD 25481 2.34 page 11 */
/*Bits related to the XFEATURE_ENABLED_MASK control register*/
#define CPU_XFEATURE_X87 0x00000001
#define CPU_XFEATURE_SSE 0x00000002
#define CPU_XFEATURE_YMM 0x00000004
/*
* Important bits in the AMD extended cpuid flags
*/
sys/platform/pc64/conf/options
# x86_64 SMP options
CPU_ENABLE_EST opt_cpu.h
CPU_ENABLE_AVX opt_cpu.h
# The cpu type
#
sys/platform/pc64/include/md_var.h
extern u_int amd_feature2;
extern u_int cpu_clflush_line_size;
extern u_int cpu_fxsr;
extern u_int cpu_xsave;
extern u_int cpu_high;
extern u_int cpu_id;
extern u_int cpu_procinfo;
sys/platform/pc64/x86_64/initcpu.c
char cpu_vendor[20]; /* CPU Origin code */
u_int cpu_vendor_id; /* CPU vendor ID */
u_int cpu_fxsr; /* SSE enabled */
u_int cpu_xsave; /* AVX enabled by OS*/
u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */
u_int cpu_clflush_line_size = 32; /* Default CLFLUSH line size */
......
{
uint64_t msr;
/*Check for FXSR and SSE support and enable if available.*/
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
cpu_fxsr = hw_instruction_sse = 1;
}
#if defined(CPU_ENABLE_AVX)
/*Check for XSAVE and AVX support and enable if available.*/
if ((cpu_feature2 & CPUID2_AVX) && (cpu_feature2 & CPUID2_XSAVE)
&& (cpu_feature & CPUID_SSE)){
load_cr4(rcr4() | CR4_XSAVE);
/* Adjust size of savefpu in npx.h before adding to mask.*/
xsetbv(0,CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM,0);
cpu_xsave = 1;
}
#endif
if (cpu_vendor_id == CPU_VENDOR_AMD) {
switch((cpu_id & 0xFF0000)) {
case 0x100000:
sys/platform/pc64/x86_64/npx.c
*/
#include "opt_debug_npx.h"
#include "opt_cpu.h"
#include <sys/param.h>
#include <sys/systm.h>
......
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#endif
#ifdef CPU_ENABLE_AVX
#define xsave(eax,edx,addr) __asm __volatile("xsave %0" : "=m" (*(addr)) : "a" (eax),"d" (edx) )
#define xrstor(eax,edx,addr) __asm __volatile("xrstor %0" : : "m" (*(addr)), "a" (eax), "d" (edx))
#endif
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
......
void
npxinit(u_short control)
{
static union savefpu dummy __aligned(16);
/*64-Byte alignment required for xsave*/
static union savefpu dummy __aligned(64);
/*
* fninit has the same h/w bugs as fnsave. Use the detoxified
......
static void
fpusave(union savefpu *addr)
{
#ifdef CPU_ENABLE_AVX
if (cpu_xsave)
xsave(CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM ,0,addr);
else
#endif
#ifndef CPU_DISABLE_SSE
if (cpu_fxsr)
fxsave(addr);
......
static void
fpurstor(union savefpu *addr)
{
#ifdef CPU_ENABLE_AVX
if (cpu_xsave){
xrstor(CPU_XFEATURE_X87 | CPU_XFEATURE_SSE | CPU_XFEATURE_YMM ,0,addr);
}else
#endif
#ifndef CPU_DISABLE_SSE
if (cpu_fxsr) {
fpu_clean_state();
    (1-1/1)