Index: external/cddl/osnet/dist/uts/common/fs/zfs/arc.c =================================================================== RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/fs/zfs/arc.c,v retrieving revision 1.22 diff -p -u -r1.22 arc.c --- external/cddl/osnet/dist/uts/common/fs/zfs/arc.c 3 Aug 2022 01:53:06 -0000 1.22 +++ external/cddl/osnet/dist/uts/common/fs/zfs/arc.c 5 May 2024 06:43:50 -0000 @@ -275,7 +275,7 @@ int arc_procfd; #endif #endif /* illumos */ -#ifdef __NetBSD__ +#if defined(__NetBSD__) && defined(_KERNEL) #include #ifndef btop #define btop(x) ((x) / PAGE_SIZE) @@ -288,8 +288,9 @@ int arc_procfd; #define freemem uvm_availmem(false) #define minfree uvmexp.freemin #define desfree uvmexp.freetarg -#define zfs_arc_free_target desfree +//#define zfs_arc_free_target desfree #define lotsfree (desfree * 2) +#define maxfree uvmexp.npages #define availrmem desfree #define swapfs_minfree 0 #define swapfs_reserve 0 @@ -297,13 +298,21 @@ int arc_procfd; #define curproc curlwp #define proc_pageout uvm.pagedaemon_lwp +u_int zfs_arc_free_target; +static void +arc_free_target_init(void) +{ + + zfs_arc_free_target = desfree; +} + static void *zio_arena; #include /* Structures used for memory and kva space reclaim. */ static struct callback_entry arc_kva_reclaim_entry; -#endif /* __NetBSD__ */ +#endif /* __NetBSD__ && _KERNEL */ static kmutex_t arc_reclaim_lock; static kcondvar_t arc_reclaim_thread_cv; @@ -468,6 +477,68 @@ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_meta "ARC metadata limit"); #endif +#if defined(__NetBSD__) && defined(_KERNEL) + +static int sysctl_vfs_zfs_arc_meta_limit(SYSCTLFN_PROTO); +static int sysctl_vfs_zfs_arc_max(SYSCTLFN_PROTO); +static int sysctl_vfs_zfs_arc_min(SYSCTLFN_PROTO); +static int sysctl_vfs_zfs_arc_free_target(SYSCTLFN_PROTO); + +SYSCTL_SETUP(sysctl_vfs_zfs_arc_setup, "sysctl vfs.zfs_arc subtree setup") +{ + const struct sysctlnode *rnode = NULL; + + /* vfs.zfs is created in zfs_ioctl.c */ + sysctl_createv(clog, 0, NULL, &rnode, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "zfs_arc", + SYSCTL_DESCR("zfs"), + NULL, 0, NULL, 0, + CTL_VFS, CTL_CREATE, CTL_EOL); + + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD, + "meta_limit", SYSCTL_DESCR("ARC metadata limit"), + sysctl_vfs_zfs_arc_meta_limit, 0, + &zfs_arc_meta_limit, sizeof(zfs_arc_meta_limit), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD, + "meta_min", SYSCTL_DESCR("ARC metadata minimum"), + NULL, 0, &zfs_arc_meta_min, sizeof(zfs_arc_meta_min), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, + "shrink_shift", SYSCTL_DESCR("ARC shrink shift"), + NULL, 0, &zfs_arc_shrink_shift, sizeof(zfs_arc_shrink_shift), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD, + "max", SYSCTL_DESCR("Maximum ARC size"), + sysctl_vfs_zfs_arc_max, 0, + &zfs_arc_max, sizeof(zfs_arc_max), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_QUAD, + "min", SYSCTL_DESCR("Maximum ARC size"), + sysctl_vfs_zfs_arc_min, 0, + &zfs_arc_min, sizeof(zfs_arc_min), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, + "compressed", SYSCTL_DESCR("ARC compression"), + NULL, 0, &zfs_compressed_arc_enabled, sizeof(zfs_compressed_arc_enabled), + CTL_CREATE, CTL_EOL); + sysctl_createv(clog, 0, &rnode, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, + "free_target", SYSCTL_DESCR("Desired number of free pages below which ARC triggers reclaim"), + sysctl_vfs_zfs_arc_free_target, 0, + &zfs_arc_free_target, sizeof(zfs_arc_free_target), + CTL_CREATE, CTL_EOL); +} + +#endif + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -1169,6 +1240,151 @@ sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_AR } #endif +#if defined(__NetBSD__) && defined(_KERNEL) +static int +sysctl_vfs_zfs_arc_meta_limit(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + uint64_t val; + int error; + + val = *(uint64_t *)rnode->sysctl_data; + + node = *rnode; + node.sysctl_data = &val; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return (error); + + if (val <= 0 || val > arc_c_max) + return (EINVAL); + + arc_meta_limit = val; + + *(uint64_t *)rnode->sysctl_data = val; + return (0); +} + +static int +sysctl_vfs_zfs_arc_max(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + uint64_t val; + int error; + + val = *(uint64_t *)rnode->sysctl_data; + + node = *rnode; + node.sysctl_data = &val; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return (error); + +#if 0 + if (zfs_arc_max == 0) { + /* Loader tunable so blindly set */ + zfs_arc_max = val; + return (0); + } +#endif + + if (val < arc_abs_min || val > kmem_size()) + return (EINVAL); + if (val < arc_c_min) + return (EINVAL); + if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit) + return (EINVAL); + + arc_c_max = val; + + arc_c = arc_c_max; + arc_p = (arc_c >> 1); + + if (zfs_arc_meta_limit == 0) { + /* limit meta-data to 1/4 of the arc capacity */ + arc_meta_limit = arc_c_max / 4; + } + +#if 0 + /* if kmem_flags are set, lets try to use less memory */ + if (kmem_debugging()) + arc_c = arc_c / 2; +#endif + + *(uint64_t *)rnode->sysctl_data = arc_c; + return (0); + + return (0); +} + +static int +sysctl_vfs_zfs_arc_min(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + uint64_t val; + int error; + + val = *(uint64_t *)rnode->sysctl_data; + + node = *rnode; + node.sysctl_data = &val; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return (error); + +#if 0 + if (zfs_arc_min == 0) { + /* Loader tunable so blindly set */ + zfs_arc_min = val; + return (0); + } +#endif + + if (val < arc_abs_min || val > arc_c_max) + return (EINVAL); + + arc_c_min = val; + + if (zfs_arc_meta_min == 0) + arc_meta_min = arc_c_min / 2; + + if (arc_c < arc_c_min) + arc_c = arc_c_min; + + *(uint64_t *)rnode->sysctl_data = arc_c_min; + return (0); + +} + +static int +sysctl_vfs_zfs_arc_free_target(SYSCTLFN_ARGS) +{ + struct sysctlnode node; + u_int val; + int error; + + val = *(u_int *)rnode->sysctl_data; + + node = *rnode; + node.sysctl_data = &val; + + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error != 0 || newp == NULL) + return (error); + + if (val < minfree) + return (EINVAL); + if (val > maxfree) + return (EINVAL); + + *(u_int *)rnode->sysctl_data = val; + return (0); +} +#endif + #define GHOST_STATE(state) \ ((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \ (state) == arc_l2c_only) @@ -3493,6 +3709,7 @@ arc_evict_state(arc_state_t *state, uint } kmem_free(markers, sizeof (*markers) * num_sublists); +if (total_evicted > 0) printf("arc_evict_state type %d bytes %"PRId64" = %"PRIu64"\n", (int)type, bytes, total_evicted); return (total_evicted); } @@ -3903,6 +4120,31 @@ arc_available_memory(void) free_memory_reason_t r = FMR_UNKNOWN; #ifdef _KERNEL +#ifdef __NetBSD__ + vmem_size_t totalpercent; + vmem_size_t free; + + /* + * PR kern/57558: + * + * do not let pdaemon get stuck in the uvm_km_va_starved_p() + * state. it starts a tight loop when in uvm_km_va_starved state + * and ZFS is not freeing any pool pages as it started freeing + * only when falling below uvmexp.freetarg. + * now we start freeing when falling below 10% kva free or + * uvmexp.freetarg. + * the 10% magic is shamelessly copied from uvm_km_va_starved_p() + * The interface to the pagedaemon has room for improvement. + */ + + totalpercent = vmem_size(heap_arena, VMEM_ALLOC|VMEM_FREE) / 10; + free = vmem_size(heap_arena, VMEM_FREE); + + if (free < totalpercent) { + needfree = btop(totalpercent - free); + } +#endif + if (needfree > 0) { n = PAGESIZE * (-needfree); if (n < lowest) { @@ -4077,16 +4319,22 @@ arc_kmem_reap_now(void) for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { if (zio_buf_cache[i] != prev_cache) { prev_cache = zio_buf_cache[i]; +printf("kmem_cache_reap_now(%zu)\n", i); kmem_cache_reap_now(zio_buf_cache[i]); } if (zio_data_buf_cache[i] != prev_data_cache) { prev_data_cache = zio_data_buf_cache[i]; +printf("kmem_cache_reap_now(%zu)\n", i); kmem_cache_reap_now(zio_data_buf_cache[i]); } } +printf("kmem_cache_reap_now(buf)\n"); kmem_cache_reap_now(buf_cache); +printf("kmem_cache_reap_now(hdr_full)\n"); kmem_cache_reap_now(hdr_full_cache); +printf("kmem_cache_reap_now(hdr_l2only)\n"); kmem_cache_reap_now(hdr_l2only_cache); +printf("kmem_cache_reap_now(range_seg)\n"); kmem_cache_reap_now(range_seg_cache); #ifdef illumos @@ -4153,6 +4401,10 @@ arc_reclaim_thread(void *dummy __unused) evicted = arc_adjust(); int64_t free_memory = arc_available_memory(); +if (evicted != 0) { + printf("arc_reclaim_thread evicted %"PRIu64"\n", evicted); + printf("arc_reclaim_thread free_memory %"PRId64"\n", free_memory); +} if (free_memory < 0) { arc_no_grow = B_TRUE; @@ -6075,6 +6327,9 @@ arc_init(void) mutex_init(&arc_dnlc_evicts_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&arc_dnlc_evicts_cv, NULL, CV_DEFAULT, NULL); #endif +#if defined(__NetBSD__) && defined(_KERNEL) + arc_free_target_init(); +#endif /* Convert seconds to clock ticks */ arc_min_prefetch_lifespan = 1 * hz; @@ -6094,11 +6349,17 @@ arc_init(void) #endif /* illumos */ /* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */ arc_c_min = MAX(arc_c / 4, arc_abs_min); +#if 0 /* set max to 1/2 of all memory, or all but 1GB, whichever is more */ if (arc_c * 8 >= 1 << 30) arc_c_max = (arc_c * 8) - (1 << 30); else arc_c_max = arc_c_min; +#else + /* set max to 1/2 of all memory, but at least the minimum */ + arc_c_max = MAX(arc_c_min, arc_c * 4); +#endif + /* increase max to at least 5 times the default size */ arc_c_max = MAX(arc_c * 5, arc_c_max); /* Index: external/cddl/osnet/sys/kern/kmem.c =================================================================== RCS file: /cvsroot/src/external/cddl/osnet/sys/kern/kmem.c,v retrieving revision 1.4 diff -p -u -r1.4 kmem.c --- external/cddl/osnet/sys/kern/kmem.c 9 Sep 2023 00:14:16 -0000 1.4 +++ external/cddl/osnet/sys/kern/kmem.c 5 May 2024 06:43:50 -0000 @@ -123,6 +123,7 @@ void kmem_cache_reap_now(kmem_cache_t *km) { + pool_cache_invalidate(km->km_pool); pool_cache_reclaim(km->km_pool); } Index: external/cddl/osnet/sys/kern/misc.c =================================================================== RCS file: /cvsroot/src/external/cddl/osnet/sys/kern/misc.c,v retrieving revision 1.10 diff -p -u -r1.10 misc.c --- external/cddl/osnet/sys/kern/misc.c 30 Jul 2022 13:09:19 -0000 1.10 +++ external/cddl/osnet/sys/kern/misc.c 5 May 2024 06:43:50 -0000 @@ -131,6 +131,8 @@ kmem_reap(void) struct pool *pp; bufcnt = uvmexp.freetarg - uvm_availmem(false); +printf("kmem_reap: %d pages\n", bufcnt); + if (bufcnt < 0) bufcnt = 0;