Ruby Performance Secrets and How to Uncover Them http://www.slideshare.net/adymo/adymo-rubyconf-performance
Ruby Performance Secrets andHow to Uncover Them
http://www.slideshare.net/adymo/adymo-rubyconf-performance
Who am I?
Alexander Dymo
C/C++ since 2000Ruby/Rails since 2006
Started to optimize back in 2007
Never stopped since then
Rails Performance: What You Need to Knowhttps://www.airpair.com/ruby-on-rails/performance
Make Your Ruby/Rails App Fast: Performance And MemoryProfiling Using ruby-prof and Kcachegrindhttp://www.acunote.com/blog/2008/02/make-your-ruby-rails-applications-fast-performance-and-memory-profiling.html
Ruby Performance Tuninghttp://theprosegarden.com/contents-of-recent-issues/#10-14
Ruby Performance
The first comprehensive bookon Ruby Performance
I'm 50% done. Beta soon.
ruby-performance-book.com
What do we talk about today?
Performance tips
Performance best practices
How to understand what's wrong
How to find your own performance tips/best practices
Let's profile memory allocations
Need patched rubyrvm reinstall 1.9.3 --patch railsexpressrvm reinstall 2.0.0 --patch railsexpressrvm reinstall 2.1.4 --patch railsexpress
Let's profile memory allocations
Need visualization toolMac:
brew install qcachegrind
Linux:<your package manager> install kcachegrind
Windows:http://sourceforge.net/projects/qcachegrindwin/
Let's profile memory allocations
ruby-prof -p call_tree –mode=allocations before.rb > callgrind.out.before
ruby-prof -p call_tree –mode=allocations after.rb > callgrind.out.after
kcachegrind callgrind.out.before
kcachegrind callgrind.out.after
static VALUE enum_inject(int argc, VALUE *argv, VALUE obj){ NODE *memo; VALUE init, op; rb_block_call_func *iter = inject_i; … memo = NEW_MEMO(init, argc, op); rb_block_call(obj, id_each, 0, 0, iter, (VALUE)memo); return memo->u1.value;}
> gdb `rbenv which ruby`GNU gdb (GDB) SUSE (7.5.1-2.5.1)Reading symbols from/home/gremlin/.rbenv/versions/2.1.4/bin/ruby...done.(gdb)
(gdb) l enum_inject632 * longest #=> "sheep"633 *634 */635 static VALUE636 enum_inject(int argc, VALUE *argv, VALUE obj)637 {638 NODE *memo;639 VALUE init, op;640 rb_block_call_func *iter = inject_i;641 ID id;(gdb)
636 enum_inject(int argc, VALUE *argv, VALUE obj)637 {638 NODE *memo;639 VALUE init, op;640 rb_block_call_func *iter = inject_i;641 ID id;(gdb) b 638Breakpoint 1 at 0x1cbc0a: file enum.c, line 638.(gdb)
(gdb) r -e '[1,2,3].inject {}'Starting program:/home/gremlin/.rbenv/versions/2.1.4/bin/ruby -e'[1,2,3].inject {}'[Thread debugging using libthread_db enabled]Using host libthread_db library "/lib64/libthread_db.so.1".[New Thread 0x7ffff7ff2700 (LWP 3893)]
Breakpoint 1, enum_inject (argc=0, argv=<optimized out>,obj=93825001586240) at enum.c:640640 rb_block_call_func *iter = inject_i;(gdb)
640 rb_block_call_func *iter = inject_i;(gdb) n665 memo = NEW_MEMO(init, argc, op);(gdb) n666 rb_block_call(obj, id_each, 0, 0, iter,(VALUE)memo);(gdb)
640 rb_block_call_func *iter = inject_i;(gdb) n665 memo = NEW_MEMO(init, argc, op);(gdb) n666 rb_block_call(obj, id_each, 0, 0, iter,(VALUE)memo);(gdb) srb_block_call (obj=93825001586240, mid=1456, argc=0,argv=0x0, bl_proc=0x555555722460 <inject_i>, data2=93825001586200) at vm_eval.c:11421142 {(gdb)
640 rb_block_call_func *iter = inject_i;(gdb) n665 memo = NEW_MEMO(init, argc, op);(gdb) n666 rb_block_call(obj, id_each, 0, 0, iter,(VALUE)memo);(gdb) srb_block_call (obj=93825001586240, mid=1456, argc=0,argv=0x0, bl_proc=0x555555722460 <inject_i>, data2=93825001586200) at vm_eval.c:11421142 {(gdb) s1145 arg.obj = obj;(gdb)
640 rb_block_call_func *iter = inject_i;(gdb) n665 memo = NEW_MEMO(init, argc, op);(gdb) n666 rb_block_call(obj, id_each, 0, 0, iter,(VALUE)memo);(gdb) srb_block_call (obj=93825001586240, mid=1456, argc=0,argv=0x0, bl_proc=0x555555722460 <inject_i>, data2=93825001586200) at vm_eval.c:11421142 {(gdb) s1145 arg.obj = obj;(gdb) s1146 arg.mid = mid;(gdb)
640 rb_block_call_func *iter = inject_i;(gdb) n665 memo = NEW_MEMO(init, argc, op);(gdb) n666 rb_block_call(obj, id_each, 0, 0, iter,(VALUE)memo);(gdb) srb_block_call (obj=93825001586240, mid=1456, argc=0,argv=0x0, bl_proc=0x555555722460 <inject_i>, data2=93825001586200) at vm_eval.c:11421142 {(gdb) s1145 arg.obj = obj;(gdb) s1146 arg.mid = mid;(gdb) s1147 arg.argc = argc;(gdb)
(gdb) s1147 arg.argc = argc;(gdb) s1148 arg.argv = argv;(gdb) s1149 return rb_iterate(iterate_method, (VALUE)&arg,bl_proc, data2);(gdb)
(gdb) s1147 arg.argc = argc;(gdb) s1148 arg.argv = argv;(gdb) s1149 return rb_iterate(iterate_method, (VALUE)&arg,bl_proc, data2);(gdb) srb_iterate (it_proc=it_proc@entry=0x5555556c0790<iterate_method>, data1=data1@entry=140737488340304, bl_proc=0x555555722460 <inject_i>, data2=93825001586200)at vm_eval.c:10541054 {(gdb)
(gdb) s1147 arg.argc = argc;(gdb) s1148 arg.argv = argv;(gdb) s1149 return rb_iterate(iterate_method, (VALUE)&arg,bl_proc, data2);(gdb) srb_iterate (it_proc=it_proc@entry=0x5555556c0790<iterate_method>, data1=data1@entry=140737488340304, bl_proc=0x555555722460 <inject_i>, data2=93825001586200)at vm_eval.c:10541054 {(gdb) s1057 NODE *node = NEW_IFUNC(bl_proc, data2);(gdb)
(gdb) s1147 arg.argc = argc;(gdb) s1148 arg.argv = argv;(gdb) s1149 return rb_iterate(iterate_method, (VALUE)&arg,bl_proc, data2);(gdb) srb_iterate (it_proc=it_proc@entry=0x5555556c0790<iterate_method>, data1=data1@entry=140737488340304, bl_proc=0x555555722460 <inject_i>, data2=93825001586200)at vm_eval.c:10541054 {(gdb) s1057 NODE *node = NEW_IFUNC(bl_proc, data2);(gdb)
static VALUE enum_inject(int argc, VALUE *argv, VALUE obj){ NODE *memo; VALUE init, op; rb_block_call_func *iter = inject_i; … memo = NEW_MEMO(init, argc, op); rb_block_call(obj, id_each, 0, 0, iter, (VALUE)memo); return memo->u1.value;}
VALUE rb_block_call(…){ … return rb_iterate(iterate_method, (VALUE)&arg, bl_proc, data2);}
VALUE rb_iterate(…){ int state; volatile VALUE retval = Qnil; NODE *node = NEW_IFUNC(bl_proc, data2); …}
Lessons learned:
1. use profiler to understand why your code is slow
2. use C debugger to understand Ruby behavior
What's the difference?
str = 'a'*1024*1024*10str = str.gsub('a', 'b')
str = 'a'*1024*1024*10str.gsub!('a', 'b')
str = 'a'*1024*1024*10str = str.gsub('a', 'b')
str = 'a'*1024*1024*10str.gsub!('a', 'b')
replaces 'a' with 'b'creates a new objectreuses "str" name
replaces 'a' with 'b'changes the original
Let's profile memory usage
ruby-prof -p call_tree –mode=memory after.rb >callgrind.out.after
kcachegrind callgrind.out.after
Not all bang! functions are the same
str = 'a'*1024*1024*10str.downcase!
ruby-prof -p call_tree –mode=memory downcase.rb > callgrind.out.downcase
kcachegrind callgrind.out.downcase
Conclusions1. Don't guess. Profile.
2. Guess. Profile.
3. Profile not only CPU, but Memory.
4. Look at the source, use GDB if not enlightened.
5. Challenge all tips/tricks. Understand instead.