Skip to content

Cuda plugin fails in pre-dump #2654

@jatin-jangir

Description

@jatin-jangir

i tried to use pre-dump with cuda plugin and it unable to take dump as plugin fails

$ sudo cat dump1.log 
(00.000000) CRIU run id = 576f6d4d-db73-4931-a69f-b424cd2cf997
(00.000078) Version: 4.0 (gitid v4.0-93-g5cea5b6d3)
(00.000088) Running on test1 Linux 5.15.0-136-generic #147~20.04.1-Ubuntu SMP Wed Mar 19 16:13:14 UTC 2025 x86_64
(00.000150) Loaded kdat cache from /run/criu.kdat
(00.000214) Hugetlb size 2 Mb is supported but cannot get dev's number
(00.000242) Hugetlb size 1024 Mb is supported but cannot get dev's number
(00.000917) cpu: x86_family 6 x86_vendor_id GenuineIntel x86_model_id Intel Xeon Processor (Cascadelake)
(00.000975) cpu: fpu: xfeatures_mask 0x2e5 xsave_size 2696 xsave_size_max 2696 xsaves_size 2440
(00.001036) cpu: fpu: x87 floating point registers     xstate_offsets      0 / 0      xstate_sizes    160 / 160   
(00.001047) cpu: fpu: AVX registers                    xstate_offsets    576 / 576    xstate_sizes    256 / 256   
(00.001053) cpu: fpu: AVX-512 opmask                   xstate_offsets   1088 / 832    xstate_sizes     64 / 64    
(00.001059) cpu: fpu: AVX-512 Hi256                    xstate_offsets   1152 / 896    xstate_sizes    512 / 512   
(00.001064) cpu: fpu: AVX-512 ZMM_Hi256                xstate_offsets   1664 / 1408   xstate_sizes   1024 / 1024  
(00.001069) cpu: fpu: Protection Keys User registers   xstate_offsets   2688 / 2432   xstate_sizes      8 / 8     
(00.001100) rlimit: RLIMIT_NOFILE unlimited for self
(00.001122) Enforcing memory tracking for pre-dump.
(00.001134) Enforcing tasks run after pre-dump.
(00.001412) Plugin "cuda_plugin" (version 512 hooks 12)
(00.001447) 	   9 -> 0x7fb4acdbbc60
(00.001452) 	  10 -> 0x7fb4acdbb6ba
(00.001457) 	  11 -> 0x7fb4acdbb41b
(00.001470) irmap: Searching irmap cache in work dir
(00.001502) No irmap-cache image
(00.001514) irmap: Searching irmap cache in parent
(00.001525) No parent images directory provided
(00.001539) irmap: No irmap cache
(00.001544) cpu: fpu:1 fxsr:1 xsave:1 xsaveopt:1 xsavec:1 xgetbv1:1 xsaves:1
(00.001806) run_plugins----
(00.001813) plugin: `cuda_plugin' hook 10 -> 0x7fb4acdbb6ba
(00.001819) Detected cgroup V1 freezer
(00.001980) Seized task 3748, state 1
(00.001997) seccomp: Collected tid_real 3748 mode 0
(00.002042) 	Seizing 3748's 3750 thread
(00.002345) seccomp: Collected tid_real 3750 mode 0
(00.002353) 	Seizing 3748's 3762 thread
(00.002618) seccomp: Collected tid_real 3762 mode 0
(00.002647) Collected (3 attempts, 0 in_progress)
(00.002710) Collected (4 attempts, 0 in_progress)
(00.002764) Collected 3748 in 1 state
(00.002812) Add net ns 1 pid 3773
(00.002841) Add mnt ns 2 pid 3773
(00.003017) 	type sysfs source sysfs mnt_id 25 s_dev 0x17 / @ ./sys flags 0x30000e options 
(00.003041) 	type proc source proc mnt_id 26 s_dev 0x18 / @ ./proc flags 0x30000e options 
(00.003060) 	type devtmpfs source udev mnt_id 27 s_dev 0x5 / @ ./dev flags 0x30000a options size=4016092k,nr_inodes=1004023,mode=755,inode64
(00.003079) 	type devpts source devpts mnt_id 28 s_dev 0x19 / @ ./dev/pts flags 0x30000a options gid=5,mode=620,ptmxmode=000
(00.003103) 	type tmpfs source tmpfs mnt_id 29 s_dev 0x1a / @ ./run flags 0x30000e options size=812800k,mode=755,inode64
(00.003131) 	type ext4 source /dev/vda5 mnt_id 30 s_dev 0xfc00005 / @ ./ flags 0x300000 options errors=remount-ro
(00.003145) 	type securityfs source securityfs mnt_id 31 s_dev 0x6 / @ ./sys/kernel/security flags 0x30000e options 
(00.003158) 	type tmpfs source tmpfs mnt_id 32 s_dev 0x1b / @ ./dev/shm flags 0x1100006 options inode64
(00.003171) 	type tmpfs source tmpfs mnt_id 33 s_dev 0x1c / @ ./run/lock flags 0x30000e options size=5120k,inode64
(00.003186) 	type tmpfs source tmpfs mnt_id 34 s_dev 0x1d / @ ./sys/fs/cgroup flags 0x110000f options mode=755,inode64
(00.003278) 	type cgroup2 source cgroup2 mnt_id 35 s_dev 0x1e / @ ./sys/fs/cgroup/unified flags 0x30000e options nsdelegate
(00.003295) 	type cgroup source cgroup mnt_id 36 s_dev 0x1f / @ ./sys/fs/cgroup/systemd flags 0x30000e options xattr,name=systemd
(00.003307) 	type pstore source pstore mnt_id 37 s_dev 0x20 / @ ./sys/fs/pstore flags 0x30000e options 
(00.003319) 	type bpf source bpf mnt_id 38 s_dev 0x21 / @ ./sys/fs/bpf flags 0x30000e options mode=700
(00.003359) 	type cgroup source cgroup mnt_id 39 s_dev 0x22 / @ ./sys/fs/cgroup/net_cls,net_prio flags 0x30000e options net_cls,net_prio
(00.003373) 	type cgroup source cgroup mnt_id 40 s_dev 0x23 / @ ./sys/fs/cgroup/blkio flags 0x30000e options blkio
(00.003387) 	type cgroup source cgroup mnt_id 41 s_dev 0x24 / @ ./sys/fs/cgroup/cpu,cpuacct flags 0x30000e options cpu,cpuacct
(00.003399) 	type cgroup source cgroup mnt_id 42 s_dev 0x25 / @ ./sys/fs/cgroup/memory flags 0x30000e options memory
(00.003412) 	type cgroup source cgroup mnt_id 43 s_dev 0x26 / @ ./sys/fs/cgroup/devices flags 0x30000e options devices
(00.003429) 	type cgroup source cgroup mnt_id 44 s_dev 0x27 / @ ./sys/fs/cgroup/pids flags 0x30000e options pids
(00.003544) 	type cgroup source cgroup mnt_id 45 s_dev 0x28 / @ ./sys/fs/cgroup/freezer flags 0x30000e options freezer
(00.003562) 	type cgroup source cgroup mnt_id 46 s_dev 0x29 / @ ./sys/fs/cgroup/cpuset flags 0x30000e options cpuset
(00.003575) 	type cgroup source cgroup mnt_id 47 s_dev 0x2a / @ ./sys/fs/cgroup/hugetlb flags 0x30000e options hugetlb
(00.003588) 	type cgroup source cgroup mnt_id 48 s_dev 0x2b / @ ./sys/fs/cgroup/perf_event flags 0x30000e options perf_event
(00.003600) 	type cgroup source cgroup mnt_id 49 s_dev 0x2c / @ ./sys/fs/cgroup/rdma flags 0x30000e options rdma
(00.003612) 	type cgroup source cgroup mnt_id 50 s_dev 0x2d / @ ./sys/fs/cgroup/misc flags 0x30000e options misc
(00.003628) 	type autofs source systemd-1 mnt_id 51 s_dev 0x2e / @ ./proc/sys/fs/binfmt_misc flags 0x300000 options fd=28,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=17705
(00.003659) 	type debugfs source debugfs mnt_id 52 s_dev 0x7 / @ ./sys/kernel/debug flags 0x30000e options 
(00.003672) 	type hugetlbfs source hugetlbfs mnt_id 53 s_dev 0x2f / @ ./dev/hugepages flags 0x300000 options pagesize=2M
(00.003684) 	type mqueue source mqueue mnt_id 54 s_dev 0x14 / @ ./dev/mqueue flags 0x30000e options 
(00.003791) 	type tracefs source tracefs mnt_id 55 s_dev 0xc / @ ./sys/kernel/tracing flags 0x30000e options 
(00.003797) 	skipping fs mounted at /sys/kernel/tracing
(00.003812) 	type fusectl source fusectl mnt_id 56 s_dev 0x30 / @ ./sys/fs/fuse/connections flags 0x30000e options 
(00.003824) 	type configfs source configfs mnt_id 57 s_dev 0x15 / @ ./sys/kernel/config flags 0x30000e options 
(00.003855) 	type squashfs source /dev/loop0 mnt_id 127 s_dev 0x700000 / @ ./snap/bare/5 flags 0x300005 options errors=continue
(00.003869) 	type squashfs source /dev/loop2 mnt_id 130 s_dev 0x700002 / @ ./snap/core22/1908 flags 0x300005 options errors=continue
(00.003881) 	type squashfs source /dev/loop3 mnt_id 133 s_dev 0x700003 / @ ./snap/core18/2855 flags 0x300005 options errors=continue
(00.003943) 	type squashfs source /dev/loop1 mnt_id 136 s_dev 0x700001 / @ ./snap/core20/2496 flags 0x300005 options errors=continue
(00.003956) 	type squashfs source /dev/loop6 mnt_id 139 s_dev 0x700006 / @ ./snap/snap-store/467 flags 0x300005 options errors=continue
(00.003974) 	type squashfs source /dev/loop4 mnt_id 142 s_dev 0x700004 / @ ./snap/core18/2409 flags 0x300005 options errors=continue
(00.003986) 	type squashfs source /dev/loop10 mnt_id 145 s_dev 0x70000a / @ ./snap/gnome-3-34-1804/77 flags 0x300005 options errors=continue
(00.004059) 	type squashfs source /dev/loop14 mnt_id 148 s_dev 0x70000e / @ ./snap/snapd/23771 flags 0x300005 options errors=continue
(00.004066) 	type squashfs source /dev/loop12 mnt_id 151 s_dev 0x70000c / @ ./snap/gnome-3-38-2004/106 flags 0x300005 options errors=continue
(00.004073) 	type squashfs source /dev/loop5 mnt_id 154 s_dev 0x700005 / @ ./snap/gnome-3-34-1804/93 flags 0x300005 options errors=continue
(00.004083) 	type squashfs source /dev/loop15 mnt_id 157 s_dev 0x70000f / @ ./snap/tree/54 flags 0x300005 options errors=continue
(00.004089) 	type squashfs source /dev/loop13 mnt_id 163 s_dev 0x70000d / @ ./snap/snap-store/558 flags 0x300005 options errors=continue
(00.004095) 	type squashfs source /dev/loop7 mnt_id 160 s_dev 0x700007 / @ ./snap/gnome-3-38-2004/143 flags 0x300005 options errors=continue
(00.004108) 	type squashfs source /dev/loop9 mnt_id 161 s_dev 0x700009 / @ ./snap/gtk-common-themes/1535 flags 0x300005 options errors=continue
(00.004114) 	type squashfs source /dev/loop11 mnt_id 162 s_dev 0x70000b / @ ./snap/core20/2501 flags 0x300005 options errors=continue
(00.004120) 	type squashfs source /dev/loop8 mnt_id 172 s_dev 0x700008 / @ ./snap/gtk-common-themes/1534 flags 0x300005 options errors=continue
(00.004126) 	type squashfs source /dev/loop16 mnt_id 175 s_dev 0x700010 / @ ./snap/curl/2295 flags 0x300005 options errors=continue
(00.004172) 	type squashfs source /dev/loop17 mnt_id 178 s_dev 0x700011 / @ ./snap/core24/739 flags 0x300005 options errors=continue
(00.004179) 	type binfmt_misc source binfmt_misc mnt_id 286 s_dev 0x31 / @ ./proc/sys/fs/binfmt_misc flags 0x30000e options 
(00.004190) 	type vfat source /dev/vda1 mnt_id 181 s_dev 0xfc00001 / @ ./boot/efi flags 0x300000 options fmask=0077,dmask=0077,codepage=437,iocharset=iso8859-1,shortname=mixed,errors=remount-ro
(00.004199) 	type tmpfs source tmpfs mnt_id 1272 s_dev 0x36 / @ ./run/user/1001 flags 0x300006 options size=812796k,mode=700,uid=1001,gid=1001,inode64
(00.004208) mnt: Building mountpoints tree
(00.004215) mnt: 	Building plain mount tree
(00.004217) mnt: 		Working on 1272->29
(00.004220) mnt: 		Working on 181->30
(00.004222) mnt: 		Working on 286->51
(00.004225) mnt: 		Working on 178->30
(00.004227) mnt: 		Working on 175->30
(00.004229) mnt: 		Working on 172->30
(00.004232) mnt: 		Working on 162->30
(00.004234) mnt: 		Working on 161->30
(00.004236) mnt: 		Working on 160->30
(00.004238) mnt: 		Working on 163->30
(00.004240) mnt: 		Working on 157->30
(00.004243) mnt: 		Working on 154->30
(00.004245) mnt: 		Working on 151->30
(00.004247) mnt: 		Working on 148->30
(00.004249) mnt: 		Working on 145->30
(00.004252) mnt: 		Working on 142->30
(00.004254) mnt: 		Working on 139->30
(00.004256) mnt: 		Working on 136->30
(00.004258) mnt: 		Working on 133->30
(00.004260) mnt: 		Working on 130->30
(00.004263) mnt: 		Working on 127->30
(00.004265) mnt: 		Working on 57->25
(00.004267) mnt: 		Working on 56->25
(00.004270) mnt: 		Working on 54->27
(00.004272) mnt: 		Working on 53->27
(00.004274) mnt: 		Working on 52->25
(00.004277) mnt: 		Working on 51->26
(00.004279) mnt: 		Working on 50->34
(00.004281) mnt: 		Working on 49->34
(00.004283) mnt: 		Working on 48->34
(00.004286) mnt: 		Working on 47->34
(00.004288) mnt: 		Working on 46->34
(00.004290) mnt: 		Working on 45->34
(00.004292) mnt: 		Working on 44->34
(00.004294) mnt: 		Working on 43->34
(00.004296) mnt: 		Working on 42->34
(00.004299) mnt: 		Working on 41->34
(00.004301) mnt: 		Working on 40->34
(00.004303) mnt: 		Working on 39->34
(00.004305) mnt: 		Working on 38->25
(00.004308) mnt: 		Working on 37->25
(00.004310) mnt: 		Working on 36->34
(00.004312) mnt: 		Working on 35->34
(00.004314) mnt: 		Working on 34->25
(00.004316) mnt: 		Working on 33->29
(00.004319) mnt: 		Working on 32->27
(00.004321) mnt: 		Working on 31->25
(00.004323) mnt: 		Working on 30->1
(00.004326) mnt: 		Working on 29->30
(00.004328) mnt: 		Working on 28->27
(00.004330) mnt: 		Working on 27->30
(00.004332) mnt: 		Working on 26->30
(00.004335) mnt: 		Working on 25->30
(00.004337) mnt: 	Resorting children of 30 in mount order
(00.004348) mnt: 	Resorting children of 178 in mount order
(00.004350) mnt: 	Resorting children of 175 in mount order
(00.004352) mnt: 	Resorting children of 172 in mount order
(00.004354) mnt: 	Resorting children of 162 in mount order
(00.004356) mnt: 	Resorting children of 161 in mount order
(00.004358) mnt: 	Resorting children of 160 in mount order
(00.004360) mnt: 	Resorting children of 163 in mount order
(00.004362) mnt: 	Resorting children of 157 in mount order
(00.004364) mnt: 	Resorting children of 154 in mount order
(00.004366) mnt: 	Resorting children of 151 in mount order
(00.004368) mnt: 	Resorting children of 148 in mount order
(00.004370) mnt: 	Resorting children of 145 in mount order
(00.004372) mnt: 	Resorting children of 142 in mount order
(00.004374) mnt: 	Resorting children of 139 in mount order
(00.004380) mnt: 	Resorting children of 136 in mount order
(00.004382) mnt: 	Resorting children of 133 in mount order
(00.004384) mnt: 	Resorting children of 130 in mount order
(00.004386) mnt: 	Resorting children of 127 in mount order
(00.004388) mnt: 	Resorting children of 181 in mount order
(00.004390) mnt: 	Resorting children of 29 in mount order
(00.004392) mnt: 	Resorting children of 1272 in mount order
(00.004394) mnt: 	Resorting children of 33 in mount order
(00.004397) mnt: 	Resorting children of 27 in mount order
(00.004399) mnt: 	Resorting children of 54 in mount order
(00.004401) mnt: 	Resorting children of 53 in mount order
(00.004403) mnt: 	Resorting children of 32 in mount order
(00.004405) mnt: 	Resorting children of 28 in mount order
(00.004407) mnt: 	Resorting children of 26 in mount order
(00.004409) mnt: 	Resorting children of 51 in mount order
(00.004411) mnt: 	Resorting children of 286 in mount order
(00.004413) mnt: 	Resorting children of 25 in mount order
(00.004417) mnt: 	Resorting children of 56 in mount order
(00.004419) mnt: 	Resorting children of 57 in mount order
(00.004421) mnt: 	Resorting children of 52 in mount order
(00.004423) mnt: 	Resorting children of 38 in mount order
(00.004425) mnt: 	Resorting children of 37 in mount order
(00.004427) mnt: 	Resorting children of 34 in mount order
(00.004433) mnt: 	Resorting children of 50 in mount order
(00.004435) mnt: 	Resorting children of 49 in mount order
(00.004437) mnt: 	Resorting children of 48 in mount order
(00.004439) mnt: 	Resorting children of 47 in mount order
(00.004441) mnt: 	Resorting children of 46 in mount order
(00.004443) mnt: 	Resorting children of 45 in mount order
(00.004446) mnt: 	Resorting children of 44 in mount order
(00.004447) mnt: 	Resorting children of 43 in mount order
(00.004449) mnt: 	Resorting children of 42 in mount order
(00.004451) mnt: 	Resorting children of 41 in mount order
(00.004453) mnt: 	Resorting children of 40 in mount order
(00.004456) mnt: 	Resorting children of 39 in mount order
(00.004458) mnt: 	Resorting children of 36 in mount order
(00.004460) mnt: 	Resorting children of 35 in mount order
(00.004462) mnt: 	Resorting children of 31 in mount order
(00.004464) mnt: Done:
(00.004466) mnt: [./](30->1)
(00.004469) mnt:  [./snap/core24/739](178->30)
(00.004471) mnt:  <--
(00.004474) mnt:  [./snap/curl/2295](175->30)
(00.004476) mnt:  <--
(00.004478) mnt:  [./snap/gtk-common-themes/1534](172->30)
(00.004480) mnt:  <--
(00.004482) mnt:  [./snap/core20/2501](162->30)
(00.004485) mnt:  <--
(00.004487) mnt:  [./snap/gtk-common-themes/1535](161->30)
(00.004489) mnt:  <--
(00.004491) mnt:  [./snap/gnome-3-38-2004/143](160->30)
(00.004493) mnt:  <--
(00.004495) mnt:  [./snap/snap-store/558](163->30)
(00.004498) mnt:  <--
(00.004500) mnt:  [./snap/tree/54](157->30)
(00.004502) mnt:  <--
(00.004504) mnt:  [./snap/gnome-3-34-1804/93](154->30)
(00.004506) mnt:  <--
(00.004508) mnt:  [./snap/gnome-3-38-2004/106](151->30)
(00.004510) mnt:  <--
(00.004513) mnt:  [./snap/snapd/23771](148->30)
(00.004515) mnt:  <--
(00.004517) mnt:  [./snap/gnome-3-34-1804/77](145->30)
(00.004519) mnt:  <--
(00.004521) mnt:  [./snap/core18/2409](142->30)
(00.004523) mnt:  <--
(00.004525) mnt:  [./snap/snap-store/467](139->30)
(00.004528) mnt:  <--
(00.004530) mnt:  [./snap/core20/2496](136->30)
(00.004532) mnt:  <--
(00.004534) mnt:  [./snap/core18/2855](133->30)
(00.004536) mnt:  <--
(00.004538) mnt:  [./snap/core22/1908](130->30)
(00.004540) mnt:  <--
(00.004542) mnt:  [./snap/bare/5](127->30)
(00.004545) mnt:  <--
(00.004547) mnt:  [./boot/efi](181->30)
(00.004549) mnt:  <--
(00.004551) mnt:  [./run](29->30)
(00.004553) mnt:   [./run/user/1001](1272->29)
(00.004556) mnt:   <--
(00.004558) mnt:   [./run/lock](33->29)
(00.004560) mnt:   <--
(00.004562) mnt:  <--
(00.004564) mnt:  [./dev](27->30)
(00.004567) mnt:   [./dev/mqueue](54->27)
(00.004569) mnt:   <--
(00.004571) mnt:   [./dev/hugepages](53->27)
(00.004573) mnt:   <--
(00.004575) mnt:   [./dev/shm](32->27)
(00.004577) mnt:   <--
(00.004588) mnt:   [./dev/pts](28->27)
(00.004590) mnt:   <--
(00.004592) mnt:  <--
(00.004595) mnt:  [./proc](26->30)
(00.004597) mnt:   [./proc/sys/fs/binfmt_misc](51->26)
(00.004599) mnt:    [./proc/sys/fs/binfmt_misc](286->51)
(00.004601) mnt:    <--
(00.004604) mnt:   <--
(00.004606) mnt:  <--
(00.004608) mnt:  [./sys](25->30)
(00.004610) mnt:   [./sys/fs/fuse/connections](56->25)
(00.004613) mnt:   <--
(00.004615) mnt:   [./sys/kernel/config](57->25)
(00.004617) mnt:   <--
(00.004619) mnt:   [./sys/kernel/debug](52->25)
(00.004621) mnt:   <--
(00.004623) mnt:   [./sys/fs/bpf](38->25)
(00.004626) mnt:   <--
(00.004628) mnt:   [./sys/fs/pstore](37->25)
(00.004630) mnt:   <--
(00.004632) mnt:   [./sys/fs/cgroup](34->25)
(00.004634) mnt:    [./sys/fs/cgroup/misc](50->34)
(00.004636) mnt:    <--
(00.004639) mnt:    [./sys/fs/cgroup/rdma](49->34)
(00.004641) mnt:    <--
(00.004643) mnt:    [./sys/fs/cgroup/perf_event](48->34)
(00.004645) mnt:    <--
(00.004647) mnt:    [./sys/fs/cgroup/hugetlb](47->34)
(00.004649) mnt:    <--
(00.004651) mnt:    [./sys/fs/cgroup/cpuset](46->34)
(00.004654) mnt:    <--
(00.004656) mnt:    [./sys/fs/cgroup/freezer](45->34)
(00.004658) mnt:    <--
(00.004660) mnt:    [./sys/fs/cgroup/pids](44->34)
(00.004662) mnt:    <--
(00.004664) mnt:    [./sys/fs/cgroup/devices](43->34)
(00.004667) mnt:    <--
(00.004669) mnt:    [./sys/fs/cgroup/memory](42->34)
(00.004671) mnt:    <--
(00.004673) mnt:    [./sys/fs/cgroup/cpu,cpuacct](41->34)
(00.004675) mnt:    <--
(00.004677) mnt:    [./sys/fs/cgroup/blkio](40->34)
(00.004680) mnt:    <--
(00.004682) mnt:    [./sys/fs/cgroup/net_cls,net_prio](39->34)
(00.004684) mnt:    <--
(00.004686) mnt:    [./sys/fs/cgroup/systemd](36->34)
(00.004688) mnt:    <--
(00.004690) mnt:    [./sys/fs/cgroup/unified](35->34)
(00.004693) mnt:    <--
(00.004695) mnt:   <--
(00.004697) mnt:   [./sys/kernel/security](31->25)
(00.004699) mnt:   <--
(00.004701) mnt:  <--
(00.004703) mnt: <--
(00.004728) net: Collecting netns 1/3773
(00.004853) rmrf: removing .criu.temp-aa-policy.D17Cax
(00.004916) No parent images directory provided
(00.004921) ========================================
(00.004940) Pre-dumping task (pid: 3748 comm: v3)
(00.004968) ========================================
(00.004978) 
(00.004980) Collecting mappings (pid: 3748)
(00.004982) ----------------------------------------
(00.005664) Handling VMA with the following smaps entry: 200000000-200200000 ---p 00000000 00:00 0 
(00.005701) Handling VMA with the following smaps entry: 200200000-200400000 rw-s 00000000 00:05 522                              /dev/nvidia0
(00.005731) run_plugins----
(00.005736) Error (criu/proc_parse.c:116): handle_device_vma plugin failed: No such file or directory
(00.005739) Error (criu/proc_parse.c:632): Can't handle non-regular mapping on 3748's map 200200000
(00.005749) Error (criu/cr-dump.c:1486): Collect mappings (pid: 3748) failed with -1
(00.005771) Unfreezing tasks into 1
(00.005779) 	Unseizing 3748 into 1
(00.005819) Writing image inventory (version 1)
(00.005894) Error (criu/cr-dump.c:1905): Pre-dumping FAILED.

test.cu

#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#define PORT 10000

__device__ int counter = 100;
__global__ void increment()
{
    counter++;
}

int main(void)
{
    cudaFree(0);

    int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
    sockaddr_in addr = {AF_INET, htons(PORT), inet_addr("127.0.0.1")};
    bind(sock, (sockaddr *)&addr, sizeof addr);

    while (true) {
        char buffer[16] = {0};
        sockaddr_in peer = {0};
        socklen_t inetSize = sizeof peer;
        int hCounter = 0;

        recvfrom(sock, buffer, sizeof buffer, 0, (sockaddr *)&peer, &inetSize);

        increment<<<1,1>>>();
        cudaMemcpyFromSymbol(&hCounter, counter, sizeof counter);

        size_t bytes = sprintf(buffer, "%d\n", hCounter);
        sendto(sock, buffer, bytes, 0, (sockaddr *)&peer, inetSize);
    }
    return 0;
}

compile the cuda code
nvcc -o test test.cu

example.sh

#!/bin/bash

# run the counter application
./test &

#get the PID of counter
PID=$!
echo $PID
# wait for counter to bind to the UDP socket
sleep 10
echo "-------------------basic run----------------------"
#send a packet
echo hello | nc -u localhost 10000 -W 1
sleep 10

# confirm that counter is using the GPU
nvidia-smi --query --display=PIDS 

rm -rf dump1
rm -rf dump2
rm -rf final
rm dump1.log
rm dump2.log
rm final.log

# create the directory which will hold the checkpoint image
mkdir -p dump1
# checkpoint counter
criu  pre-dump --shell-job -L "/home/criu/plugins/cuda"  --auto-dedup --images-dir dump1 --tree $PID -vvvv --log-file ../dump1.log

echo "--------------1--------------------"
du -sh dump1
du -sh dump1/*1
echo "--------------1----------------------------------------------------------------------------------"

echo hello | nc -u localhost 10000 -W 1
sleep 10

# create the directory which will hold the checkpoint image
mkdir -p dupm2
# checkpoint counter
criu pre-dump --shell-job -L "/home/criu/plugins/cuda" --auto-dedup --images-dir dump2 --tree $PID -vvvv --log-file ../dump2.log --prev-images-dir ../dump1
echo "--------------2--------------------"
du -sh dump1
echo "--------------2--------------------"
du -sh dump2
echo "--------------2--------------------"
du -sh dump2/*
echo "--------------2-----------------------------------------------------------------------------------"



# create the directory which will hold the checkpoint image
mkdir -p final

# checkpoint counter
criu dump --shell-job -L "/home/criu/plugins/cuda"  --images-dir final --auto-dedup --tree $PID -vvvv --log-file ../final.log --prev-images-dir ../dump2



#nvidia-smi --query --display=PIDS

echo "--------------final--------------------"
du -sh dump1
echo "--------------final--------------------"
du -sh dump2
echo "--------------final--------------------"
du -sh final
echo "--------------final--------------------"
du -sh final/*
echo "--------------final------------------------------------------------------"

sleep 10





rm restore.log
echo "restore counter----------------"
criu restore --shell-job --restore-detached -L "/home/criu/plugins/cuda" --images-dir final -vvvv --log-file ../restore.log

sleep 10

nvidia-smi --query --display=PIDS


echo "send another packet------------"
echo hello | nc -u localhost 10000 -W 1

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions