This patch builds support for the new conversions of physical addresses to and from sockets, pnodes and nodes in UV4. It is designed to be as efficient as possible as lookups are done inside an interrupt context in some cases. It will be further optimized when physical hardware is available to measure execution time. Signed-off-by: Mike Travis Tested-by: Dimitri Sivanich Tested-by: John Estabrook Tested-by: Gary Kroening Tested-by: Nathan Zimmer --- arch/x86/include/asm/uv/uv_hub.h | 131 ++++++++++++++++++++++++++++++++----- arch/x86/kernel/apic/x2apic_uv_x.c | 96 ++++++++++++++++++++++++++- 2 files changed, 209 insertions(+), 18 deletions(-) --- linux.orig/arch/x86/include/asm/uv/uv_hub.h +++ linux/arch/x86/include/asm/uv/uv_hub.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -104,7 +105,6 @@ * processor APICID register. */ - /* * Maximum number of bricks in all partitions and in all coherency domains. * This is the total number of bricks accessible in the numalink fabric. It @@ -139,6 +139,14 @@ struct uv_scir_s { unsigned char enabled; }; +/* GAM (globally addressed memory) range table */ +struct uv_gam_range_s { + u32 limit; /* PA bits 56:26 (GAM_RANGE_SHFT) */ + u16 nasid; /* node's global physical address */ + s8 base; /* entry index of node's base addr */ + u8 reserved; +}; + /* * The following defines attributes of the HUB chip. These attributes are * frequently referenced and are kept in a common per hub struct. @@ -152,8 +160,12 @@ struct uv_hub_info_s { unsigned short *socket_to_node; unsigned short *socket_to_pnode; unsigned short *pnode_to_socket; + struct uv_gam_range_s *gr_table; unsigned short min_socket; unsigned short min_pnode; + unsigned char m_val; + unsigned char n_val; + unsigned char gr_table_len; unsigned char hub_revision; unsigned char apic_pnode_shift; unsigned char gpa_shift; @@ -169,8 +181,6 @@ struct uv_hub_info_s { unsigned short pnode_mask; unsigned short coherency_domain_number; unsigned short numa_blade_id; - unsigned char m_val; - unsigned char n_val; unsigned short nr_possible_cpus; unsigned short nr_online_cpus; short memory_nid; @@ -415,18 +425,74 @@ union uvh_apicid { * between socket virtual and socket physical addresses. */ +/* global bits offset - number of local address bits in gpa for this UV arch */ +static inline unsigned int uv_gpa_shift(void) +{ + return uv_hub_info->gpa_shift; +} +#define _uv_gpa_shift + +/* Find node that has the address range that contains global address */ +static inline struct uv_gam_range_s *uv_gam_range(unsigned long pa) +{ + struct uv_gam_range_s *gr = uv_hub_info->gr_table; + unsigned long pal = (pa & uv_hub_info->gpa_mask) >> UV_GAM_RANGE_SHFT; + int i, num = uv_hub_info->gr_table_len; + + if (gr) { + for (i = 0; i < num; i++, gr++) { + if (pal < gr->limit) + return gr; + } + } + pr_crit("UV: GAM Range for 0x%lx not found at %p!\n", pa, gr); + BUG(); +} + +/* Return base address of node that contains global address */ +static inline unsigned long uv_gam_range_base(unsigned long pa) +{ + struct uv_gam_range_s *gr = uv_gam_range(pa); + int base = gr->base; + + if (base < 0) + return 0UL; + + return uv_hub_info->gr_table[base].limit; +} + +/* socket phys RAM --> UV global NASID (UV4+) */ +static inline unsigned long uv_soc_phys_ram_to_nasid(unsigned long paddr) +{ + return uv_gam_range(paddr)->nasid; +} +#define _uv_soc_phys_ram_to_nasid + +/* socket virtual --> UV global NASID (UV4+) */ +static inline unsigned long uv_gpa_nasid(void *v) +{ + return uv_soc_phys_ram_to_nasid(__pa(v)); +} + /* socket phys RAM --> UV global physical address */ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) { + unsigned int m_val = uv_hub_info->m_val; + if (paddr < uv_hub_info->lowmem_remap_top) paddr |= uv_hub_info->lowmem_remap_base; paddr |= uv_hub_info->gnode_upper; - paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | - ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift); + if (m_val) + paddr = ((paddr << uv_hub_info->m_shift) + >> uv_hub_info->m_shift) | + ((paddr >> uv_hub_info->m_val) + << uv_hub_info->n_lshift); + else + paddr |= uv_soc_phys_ram_to_nasid(paddr) + << uv_hub_info->gpa_shift; return paddr; } - /* socket virtual --> UV global physical address */ static inline unsigned long uv_gpa(void *v) { @@ -446,20 +512,27 @@ static inline unsigned long uv_gpa_to_so unsigned long paddr; unsigned long remap_base = uv_hub_info->lowmem_remap_base; unsigned long remap_top = uv_hub_info->lowmem_remap_top; + unsigned int m_val = uv_hub_info->m_val; + + if (m_val) + gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | + ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); - gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | - ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val); paddr = gpa & uv_hub_info->gpa_mask; if (paddr >= remap_base && paddr < remap_base + remap_top) paddr -= remap_base; return paddr; } - /* gpa -> gnode */ static inline unsigned long uv_gpa_to_gnode(unsigned long gpa) { - return gpa >> uv_hub_info->n_lshift; + unsigned int n_lshift = uv_hub_info->n_lshift; + + if (n_lshift) + return gpa >> n_lshift; + + return uv_gam_range(gpa)->nasid >> 1; } /* gpa -> pnode */ @@ -471,21 +544,45 @@ static inline int uv_gpa_to_pnode(unsign /* gpa -> node offset */ static inline unsigned long uv_gpa_to_offset(unsigned long gpa) { - return (gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift; + unsigned int m_shift = uv_hub_info->m_shift; + + if (m_shift) + return (gpa << m_shift) >> m_shift; + + return (gpa & uv_hub_info->gpa_mask) - uv_gam_range_base(gpa); } -/* pnode, offset --> socket virtual */ -static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +/* Convert socket to node */ +static inline int _uv_socket_to_node(int socket, unsigned short *s2nid) { - return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); + return s2nid ? s2nid[socket - uv_hub_info->min_socket] : socket; } -/* Convert socket to node */ static inline int uv_socket_to_node(int socket) { - unsigned short *s2nid = uv_hub_info->socket_to_node; + return _uv_socket_to_node(socket, uv_hub_info->socket_to_node); +} - return s2nid ? s2nid[socket - uv_hub_info->min_socket] : socket; +/* pnode, offset --> socket virtual */ +static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +{ + unsigned int m_val = uv_hub_info->m_val; + unsigned long base; + unsigned short sockid, node, *p2s; + + if (m_val) + return __va(((unsigned long)pnode << m_val) | offset); + + p2s = uv_hub_info->pnode_to_socket; + sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; + node = uv_socket_to_node(sockid); + + /* limit address of previous socket is our base, except node 0 is 0 */ + if (!node) + return __va((unsigned long)offset); + + base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit); + return __va(base << UV_GAM_RANGE_SHFT | offset); } /* Extract/Convert a PNODE from an APICID (full apicid, not processor subset) */ --- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c +++ linux/arch/x86/kernel/apic/x2apic_uv_x.c @@ -317,6 +317,7 @@ static __initdata struct uv_gam_paramete static __initdata unsigned short *_socket_to_node; static __initdata unsigned short *_socket_to_pnode; static __initdata unsigned short *_pnode_to_socket; +static __initdata struct uv_gam_range_s *_gr_table; #define SOCK_EMPTY ((unsigned short)~0) extern int uv_hub_info_version(void) @@ -325,6 +326,97 @@ extern int uv_hub_info_version(void) } EXPORT_SYMBOL(uv_hub_info_version); +/* Build GAM range lookup table */ +static __init void build_uv_gr_table(void) +{ + struct uv_gam_range_entry *gre = uv_gre_table; + struct uv_gam_range_s *grt; + unsigned long last_limit = 0, ram_limit = 0; + int bytes, i, sid, lsid = -1; + + if (!gre) + return; + + bytes = _gr_table_len * sizeof(struct uv_gam_range_s); + grt = kzalloc(bytes, GFP_KERNEL); + BUG_ON(!grt); + _gr_table = grt; + + for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { + if (gre->type == UV_GAM_RANGE_TYPE_HOLE) { + if (!ram_limit) { /* mark hole between ram/non-ram */ + ram_limit = last_limit; + last_limit = gre->limit; + lsid++; + continue; + } + last_limit = gre->limit; + pr_info("UV: extra hole in GAM RE table @%d\n", + (int)(gre - uv_gre_table)); + continue; + } + if (_max_socket < gre->sockid) { + pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n", + gre->sockid, _max_socket, + (int)(gre - uv_gre_table)); + continue; + } + sid = gre->sockid - _min_socket; + if (lsid < sid) { /* new range */ + grt = &_gr_table[sid]; + grt->base = lsid; + grt->nasid = gre->nasid; + grt->limit = last_limit = gre->limit; + lsid = sid; + continue; + } + if (lsid == sid && !ram_limit) { /* update range */ + if (grt->limit == last_limit) { /* .. if contiguous */ + grt->limit = last_limit = gre->limit; + continue; + } + } + if (!ram_limit) { /* non-contiguous ram range */ + grt++; + grt->base = sid - 1; + grt->nasid = gre->nasid; + grt->limit = last_limit = gre->limit; + continue; + } + grt++; /* non-contiguous/non-ram */ + grt->base = grt - _gr_table; /* base is this entry */ + grt->nasid = gre->nasid; + grt->limit = last_limit = gre->limit; + lsid++; + } + + /* shorten table if possible */ + grt++; + i = grt - _gr_table; + if (i < _gr_table_len) { + void *ret; + + bytes = i * sizeof(struct uv_gam_range_s); + ret = krealloc(_gr_table, bytes, GFP_KERNEL); + if (ret) { + _gr_table = ret; + _gr_table_len = i; + } + } + + /* display resultant gam range table */ + for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) { + int gb = grt->base; + unsigned long start = gb < 0 ? 0 : + (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT; + unsigned long end = + (unsigned long)grt->limit << UV_GAM_RANGE_SHFT; + + pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n", + i, grt->nasid, start, end, gb); + } +} + static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { unsigned long val; @@ -1012,6 +1104,8 @@ void __init uv_init_hub_info(struct uv_h hub_info->pnode_to_socket = _pnode_to_socket; hub_info->socket_to_node = _socket_to_node; hub_info->socket_to_pnode = _socket_to_pnode; + hub_info->gr_table_len = _gr_table_len; + hub_info->gr_table = _gr_table; hub_info->gpa_mask = mn.m_val ? (1UL << (mn.m_val + mn.n_val)) - 1 : (1UL << uv_cpuid.gpa_shift) - 1; @@ -1106,7 +1200,6 @@ static void __init decode_gam_rng_tbl(un if (pnode_max < gre->pnode) pnode_max = gre->pnode; } - _min_socket = sock_min; _max_socket = sock_max; _min_pnode = pnode_min; @@ -1352,6 +1445,7 @@ void __init uv_system_init(void) uv_bios_init(); /* get uv_systab for decoding */ decode_uv_systab(); build_socket_tables(); + build_uv_gr_table(); uv_init_hub_info(&hub_info); uv_possible_blades = num_possible_nodes(); if (!_node_to_pnode) --