Top Banner
MeCC: Memory Comparison- based Clone Detector Heejung Kim 1 , Yungbum Jung 1 , Sunghun Kim 2 , and Kwangkeun Yi 1 1 Seoul National University 2 The Hong Kong University of Science and Technology 1 http://ropas.snu.ac.kr/mecc/
52

MeCC: Memory Comparison based Clone Detector

May 23, 2015

Download

Documents

Sung Kim

ICSE 2011 technical session presentation by Yungbum Jung from SNU.
Welcome message from author
This document is posted to help you gain knowledge. Please leave a comment to let me know what you think about it! Share it to your friends and learn new things together.
Transcript
Page 1: MeCC: Memory Comparison based Clone Detector

MeCC: Memory Comparison-based Clone Detector

Heejung Kim1, Yungbum Jung1, Sunghun Kim2, and Kwangkeun Yi11 Seoul National University

2 The Hong Kong University of Science and Technology

1

http://ropas.snu.ac.kr/mecc/

Page 2: MeCC: Memory Comparison based Clone Detector

Code Clones

• similar code fragments (syntactically or semantically)

2

static PyObject *float_add(PyObject *v, PyObject *w){ double a,b; CONVERT_TO_DOUBLE(v,a); CONVERT_TO_DOUBLE(w,b); PyFPE_START_PROTECT(“add”,return 0) a = a + b; PyFPE_END_PROTECT(a) return PyFloat_FromDouble(a);}

static PyObject *float_mul(PyObject *v, PyObject *w){ double a,b; CONVERT_TO_DOUBLE(v,a); CONVERT_TO_DOUBLE(w,b); PyFPE_START_PROTECT(“multiply”,return 0) a = a * b; PyFPE_END_PROTECT(a) return PyFloat_FromDouble(a);}

Page 3: MeCC: Memory Comparison based Clone Detector

Applications of Code Clones

3

• software refactoring

• detecting potential bugs

• understanding software evolution

• detecting software plagiarism (malicious duplication)

Page 4: MeCC: Memory Comparison based Clone Detector

Clone Detectors• CCFinder [TSE’02]

textual tokens

• DECKARD [ICSE’07] AST characteristic vectors

• PDG-based [ICSE‘08, SAS’01] program dependence graph

4

Effective for syntactic code clones

limited for semantic code clones

Page 5: MeCC: Memory Comparison based Clone Detector

Three code clones missed by syntax-based

clone detection

5

Page 6: MeCC: Memory Comparison based Clone Detector

PyObject *PyBool_FromLong (long ok) { PyObject *result; if (ok) result = Py_True; else result = Py_False; Py_INCREF(result); return result;}

6

#1 Control Replacement

static PyObject *get_pybool (int istrue) { PyObject *result = istrue? Py_True: Py_False;

Py_INCREF(result); return result;}

syntactically different but semantically identical

Page 7: MeCC: Memory Comparison based Clone Detector

void appendPQExpBufferChar (PQExpBuffer str, char ch) { /* Make more room if needed *. if (!enlargePQExpBuffer(str, 1)) return; /* OK, append the data */ str->data[str->len] = ch; str->len++; str->data[str->len] = ‘\0’;}

7

#2 Capturing Procedural Effects

understanding memory behavior of procedures

void appendBinaryPQExpBuffer (PQExpBuffer str, const char* data, size_t datalen) { /* Make more room if needed *. if (!enlargePQExpBuffer(str, datalen)) return; /* OK, append the data */ memcpy(str->data + str->len, data, datalen); str->len+= datalen; str->data[str->len] = ‘\0’;}

Page 8: MeCC: Memory Comparison based Clone Detector

... *set_access_name(cmd_parms *cmd, void *dummy, const char *arg){ void *sconf = cmd->server->module_config; core_server_config *conf =

ap_get_module_config(sconf, &core_module); const char *err = ap_check_cmd_context(sconf,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); if (err != NULL) {

return err;}conf->access_name = apr_pstrdup(cmd->pool,arg);return NULL;

}

... *set_protocol(cmd_parms *cmd, void *dummy, const char *arg){ const char *err = ap_check_cmd_context(cmd,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); core_server_config *conf =

ap_get_module_config(cmd->server->module_config, &core_module); char *proto;

if (err != NULL) { return err;}proto = apr_pstrdup(cmd->pool,arg);ap_str_tolower(proto);conf->protocol = proto;return NULL;

} 8

#3 More Complex Clone

Page 9: MeCC: Memory Comparison based Clone Detector

... *set_protocol(cmd_parms *cmd, void *dummy, const char *arg){ const char *err = ap_check_cmd_context(cmd,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); core_server_config *conf =

ap_get_module_config(cmd->server->module_config, &core_module); char *proto;

if (err != NULL) { return err;}proto = apr_pstrdup(cmd->pool,arg);ap_str_tolower(proto);conf->protocol = proto;return NULL;

}

... *set_access_name(cmd_parms *cmd, void *dummy, const char *arg){ void *sconf = cmd->server->module_config; core_server_config *conf =

ap_get_module_config(sconf, &core_module); const char *err = ap_check_cmd_context(sconf,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); if (err != NULL) {

return err;}conf->access_name = apr_pstrdup(cmd->pool,arg);return NULL;

}

statement reordering

9

Page 10: MeCC: Memory Comparison based Clone Detector

... *set_access_name(cmd_parms *cmd, void *dummy, const char *arg){ void *sconf = cmd->server->module_config; core_server_config *conf =

ap_get_module_config(sconf, &core_module); const char *err = ap_check_cmd_context(sconf,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); if (err != NULL) {

return err;}conf->access_name = apr_pstrdup(cmd->pool,arg);return NULL;

}

... *set_protocol(cmd_parms *cmd, void *dummy, const char *arg){ const char *err = ap_check_cmd_context(cmd,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); core_server_config *conf =

ap_get_module_config(cmd->server->module_config, &core_module); char *proto;

if (err != NULL) { return err;}proto = apr_pstrdup(cmd->pool,arg);ap_str_tolower(proto);conf->protocol = proto;return NULL;

}

intermediate variables

statement reordering

10

Page 11: MeCC: Memory Comparison based Clone Detector

... *set_access_name(cmd_parms *cmd, void *dummy, const char *arg){ void *sconf = cmd->server->module_config; core_server_config *conf =

ap_get_module_config(sconf, &core_module); const char *err = ap_check_cmd_context(sconf,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); if (err != NULL) {

return err;}conf->access_name = apr_pstrdup(cmd->pool,arg);return NULL;

}

intermediate variables

statementsplitting

statement reordering

11

... *set_protocol(cmd_parms *cmd, void *dummy, const char *arg){ const char *err = ap_check_cmd_context(cmd,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); core_server_config *conf =

ap_get_module_config(cmd->server->module_config, &core_module); char *proto;

if (err != NULL) { return err;}proto = apr_pstrdup(cmd->pool,arg);ap_str_tolower(proto);conf->protocol = proto;return NULL;

}

Page 12: MeCC: Memory Comparison based Clone Detector

... *set_access_name(cmd_parms *cmd, void *dummy, const char *arg){ void *sconf = cmd->server->module_config; core_server_config *conf =

ap_get_module_config(sconf, &core_module); const char *err = ap_check_cmd_context(sconf,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); if (err != NULL) {

return err;}conf->access_name = apr_pstrdup(cmd->pool,arg);return NULL;

}

... *set_protocol(cmd_parms *cmd, void *dummy, const char *arg){ const char *err = ap_check_cmd_context(cmd,NOT_IN_DIR_LOC_FILE | NOT_IN_LIMIT); core_server_config *conf =

ap_get_module_config(cmd->server->module_config, &core_module); char *proto;

if (err != NULL) { return err;}proto = apr_pstrdup(cmd->pool,arg);ap_str_tolower(proto);conf->protocol = proto;return NULL;

} 12

intermediate variables

statementsplitting

statement reordering

Page 13: MeCC: Memory Comparison based Clone Detector

13

These Semantic Clones are Identified by MeCC

Page 14: MeCC: Memory Comparison based Clone Detector

MeCC: Our Approach

• Static analyzer estimates the semantics of programs

• Abstract memories are results of analysis

• Comparing abstract memories is a measure

14

Page 15: MeCC: Memory Comparison based Clone Detector

Clone Detection Process

program

procedures

15

P1

P3 P4

P2

�P

Page 16: MeCC: Memory Comparison based Clone Detector

program

procedures abstractmemoriesP1

P3 P4

P2

�P

Static Analyzer

F(�P ) = �M

Clone Detection Process

Page 17: MeCC: Memory Comparison based Clone Detector

program

procedures

ComparingMemories

abstractmemories

similarities

S(M,M�)

17

P1

P3 P4

P2

�P

Static Analyzer

F(�P ) = �M

Clone Detection Process

Page 18: MeCC: Memory Comparison based Clone Detector

program

procedures

ComparingMemories

Grouping

abstractmemories

similarities

S(M,M�)

18

P1

P3 P4

P2

P1

P3

P2

P4

Code Clones

�P

Static Analyzer

F(�P ) = �M

Clone Detection Process

Page 19: MeCC: Memory Comparison based Clone Detector

program

procedures

ComparingMemories

Grouping

abstractmemories

similarities

S(M,M�)

19

Static Analyzer

P1

P3 P4

P2

P1

P3

P2

P4

Code Clones

�P

F(�P ) = �M

Clone Detection Process

Page 20: MeCC: Memory Comparison based Clone Detector

Estimating Semantics by Abstract Memories

int make (list *a, int count){ int r = count + 1; if (a!=0){ a->next = malloc(...); a->next->val = count; } else { return r - 1; } return r;}

• Estimating an abstract memory at the procedure’s exit point

• Abstract memory is a map from abstract addresses to abstract values

20

Address Values

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

Page 21: MeCC: Memory Comparison based Clone Detector

Estimating Semantics by Abstract Memories

int make (list *a, int count){ int r = count + 1; if (a!=0){ a->next = malloc(...); a->next->val = count; } else { return r - 1; } return r;}

• Estimating an abstract memory at the procedure’s exit point

• Abstract memory is a map from abstract addresses to abstract values

21

Address Values

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

Page 22: MeCC: Memory Comparison based Clone Detector

int make (list *a, int count){ int r = count + 1; if (a!=0){ a->next = malloc(...); a->next->val = count; } else { return r - 1; } return r;}

Address Values

22

Estimating Semantics by Abstract MemoriesS(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

• Use symbols for unknown input values

• All abstract values are guarded by execution path conditions

Page 23: MeCC: Memory Comparison based Clone Detector

int make (list *a, int count){ int r = count + 1; if (a!=0){ a->next = malloc(...); a->next->val = count; } else { return r - 1; } return r;}

23

Estimating Semantics by Abstract Memories

• Use symbols for unknown input values

• All abstract values are guarded by execution path conditions

Address Values

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

Page 24: MeCC: Memory Comparison based Clone Detector

copy and modify

24

Estimating Semantics by Abstract Memories

int make (list *a, int count){ int r = count + 1; if (a!=0){ a->next = malloc(...); a->next->val = count; } else { return r - 1; } return r;}

Address Values

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

int make2 (list2 *a, int b){ if (a==0) return b; a->n = malloc(...); a->n->v = b; return b + 2;}

Page 25: MeCC: Memory Comparison based Clone Detector

25

Estimating Semantics by Abstract Memories

int make2 (list2 *a, int b){ if (a==0) return b; a->n = malloc(...); a->n->v = b; return b + 2;}

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

int make (list *a, int count){ int r = count + 1; if (a!=0){ a->next = malloc(...); a->next->val = count; } else { return r - 1; } return r;}

Address Valuescopy and modify

Address Values

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

Page 26: MeCC: Memory Comparison based Clone Detector

program

procedures

ComparingMemories

Grouping

abstractmemories

similarities

S(M,M�)

26

P1

P3 P4

P2

P1

P3

P2

P4

Code Clones

�P

Static Analyzer

F(�P ) = �M

Clone Detection Process

Page 27: MeCC: Memory Comparison based Clone Detector

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

�.val �.v

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

�.val �.v α.n

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α �= 0,β + 1− 1), (α = 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α �= 0,β), (α = 0,β + 1)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

1

field addresses

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

�.val �.v α.n

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

27

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α �= 0,β + 1− 1), (α = 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α �= 0,β), (α = 0,β + 1)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

1

localvariables

Comparing Abstract Memories

1. Classifying addresses into similar classes

parameters

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α �= 0,β + 1− 1), (α = 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α �= 0,β), (α = 0,β + 1)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

1

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α �= 0,β + 1− 1), (α = 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α �= 0,β), (α = 0,β + 1)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

1

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α �= 0,β + 1− 1), (α = 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α �= 0,β), (α = 0,β + 1)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

1

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α �= 0,β + 1− 1), (α = 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α �= 0,β), (α = 0,β + 1)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

1

returnaddress

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

Page 28: MeCC: Memory Comparison based Clone Detector

score 0.5

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

2

{(α = 0,β + 1− 1), (α �= 0,β + 1)}

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

2

score 1.0a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

Γ,∆ � E : τ ∆(y) = {x �→ τ loc}Γ,∆ � {x := E} : y

Γ,∆ � {} : x

Γ[t → t�/f ][t loc/x],∆ � E1 : t� Γ[t → t�/f ],∆ � E2 : t��

Γ,∆ � let procedure f(t x) : t� = E1 in E2 : t��

∆ � T1 : ∆1 ∆1 � T2 : ∆2

∆ � T1 T2 : ∆2

x1 �= x2

∆ � type x = {t1 x1, ..., tk xk} : ∆[{x1 �→ t1 loc, ..., xk �→ tk loc}/x]

2

Comparing Abstract Memories

2. Compare guarded values in the same similar classes (score 0.0 to 1.0)

28

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

Page 29: MeCC: Memory Comparison based Clone Detector

Comparing Abstract Memories

3. Find the best combination that maximizes the total score

{(true,α)}

(4× 1.0 + 1× 0.0 + 4× 1.0 + 2× 0.5)

6 + 5= 0.82

maximum score

| M1 | + | M2 |

| F(c)− F(c�) |

i := 0;while i < 10

b := random_bool();if b then i := i + 1;

end

k = 0 j = 1k = 1 j = 2 + 1k = 2 j = 2 + 2 + 1... ...

{n ≥ 0} x = bar(n) {x = 1 + 2n}{I ∧B} S {I}

{I} while B S {I ∧ ¬B}

{P ∧B} S1 {Q} {P ∧ ¬B} S2 {Q}{P} if B then S1 else S2 {Q}

{y + 7 > 42} x := y + 7 {x > 42}

{y = x+ 10 ∧ x = 1} x := y + 7 {y = x� + 10 ∧ x� = 1 ∧ x := x� + 10 + 7}

{y = y + 7− 7 ∧ y + 7 = 18} x := y + 7 {y = x− 7 ∧ x = 18}

{P} x := E {P [x�/x] ∧ (x = E[x�/x])}

{y + 7 > 42} x = y + 7 {x > 42}

{x ≥ 0 ∧ y ≥ 0} z := x+ y {z ≥ 0}

{x ≥ 0 ∧ y ≥ 0} z := x+ y {z ≥ 0 ∧ x ≥ 0 ∧ y ≥ 0}

{x = 5 ∧ y = 2} z := x+ y {x = 5 ∧ z = 7}

{n ≥ 0 ∧ n2 > 28} m := n+ 1;m := m ∗m {¬(m = 36)}

{∀i.a[i] = 10 ∧ k ≥ 0} a[k] = 0 {∀i.a[i] = 10 ∧ k ≥ 0}

F(�c) = �M

S(M,M�)

1

{(true,α)}

(4× 1.0 + 1× 0.0 + 4× 1.0 + 2× 0.5)

6 + 5= 0.82

S(M1,M2) =maximum score

| M1 | + | M2 |

| F(c)− F(c�) |

i := 0;while i < 10

b := random_bool();if b then i := i + 1;

end

k = 0 j = 1k = 1 j = 2 + 1k = 2 j = 2 + 2 + 1... ...

{n ≥ 0} x = bar(n) {x = 1 + 2n}{I ∧B} S {I}

{I} while B S {I ∧ ¬B}

{P ∧B} S1 {Q} {P ∧ ¬B} S2 {Q}{P} if B then S1 else S2 {Q}

{y + 7 > 42} x := y + 7 {x > 42}

{y = x+ 10 ∧ x = 1} x := y + 7 {y = x� + 10 ∧ x� = 1 ∧ x := x� + 10 + 7}

{y = y + 7− 7 ∧ y + 7 = 18} x := y + 7 {y = x− 7 ∧ x = 18}

{P} x := E {P [x�/x] ∧ (x = E[x�/x])}

{y + 7 > 42} x = y + 7 {x > 42}

{x ≥ 0 ∧ y ≥ 0} z := x+ y {z ≥ 0}

{x ≥ 0 ∧ y ≥ 0} z := x+ y {z ≥ 0 ∧ x ≥ 0 ∧ y ≥ 0}

{x = 5 ∧ y = 2} z := x+ y {x = 5 ∧ z = 7}

{n ≥ 0 ∧ n2 > 28} m := n+ 1;m := m ∗m {¬(m = 36)}

{∀i.a[i] = 10 ∧ k ≥ 0} a[k] = 0 {∀i.a[i] = 10 ∧ k ≥ 0}

F(�c) = �M

S(M,M�)

1

29

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a �→ {(true,α)}count �→ {(true,β)}r �→ {(true,β + 1)}α.next �→ {(α �= 0, �)}�.val �→ {(α �= 0,β)}RETV �→ {(α = 0,β + 1− 1), (α �= 0,β + 1)}

a �→ {(true,α)}b �→ {(true,β)}α.n �→ {(α �= 0, �)}�.v �→ {(α �= 0,β)}RETV �→ {(α = 0,β), (α �= 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

2

≥ 0.8

Page 30: MeCC: Memory Comparison based Clone Detector

Experimental Results

30

Page 31: MeCC: Memory Comparison based Clone Detector

Subject Projects

31

Projects KLOC Procedures Application

Python 435 7,657 interpreter

Apache 343 9,483 web server

PostgreSQL 937 10,469 database

Page 32: MeCC: Memory Comparison based Clone Detector

Detected ClonesTotal 623

code clones6%

39%53%

2%

Type-1 Type-2Type-3 Type-4

C. K. Roy and J. R. Cordy. A survey on software clone detection research. SCHOOL OF COMPUTING TR 2007-541, QUEENʼS UNIVERSITY, 115, 2007.

Page 33: MeCC: Memory Comparison based Clone Detector

Semantic Clones45% Total 623

code clones6%

39% 53%

2%

Type-1 Type-2Type-3 Type-4

Page 34: MeCC: Memory Comparison based Clone Detector

ComparisonCCfinder

PDG-based

DECKARD

MeCC

0 75 150 225 300

DECKARDcharacteristic vectors

PDG-basedprogram

dependency graphs

34

CCfinder

PDG-based

DECKARD

MeCC

0 10 20 30 40Type-4

CCfindertextual tokens

Type-3

Page 35: MeCC: Memory Comparison based Clone Detector

Applications of Code Clones

35

• software refactoring

• detecting potential bugs

• understanding software evolution

• detecting software plagiarism (malicious duplication)

Page 36: MeCC: Memory Comparison based Clone Detector

Finding Potential Bugs

• A large portion of semantic clones are due to inconsistent changes

• Inconsistent changes may lead to potential bugs (inconsistent clones)

36

Two semantic clones with potential bugs

Page 37: MeCC: Memory Comparison based Clone Detector

#1 Missed Null Check

37

const char *GetVariable (VariableSpace space, const char *name){ struct_variable *current; return NULL; for (current=space->next;current;current=current->next) { if (strcmp(current->name,name) == 0) { return current->value; } } return NULL;}

const char *PQparameterStatus (const PGconn *conn, const char *paramName){ const pgParameterStatus *pstatus; if (!conn || !paramName) return NULL; for (pstatus=conn->pstatus; pstatus!=NULL; pstatus = pstatus->next) { if (strcmp(pstatus->name,paramName)== 0) return pstatus->value; } return NULL;}

if (!space) parameter name also should be checked!

Page 38: MeCC: Memory Comparison based Clone Detector

38Python project revision #20157

A resource leak without endpwent() procedure call

#2 A Resource Leak BugPyObject *pwd_getpwall (PyObject *self){ PyObject *d; struct passwd *p; if ((d = PyList_New(0)) == NULL) return NULL; setpwent(); while ((p = getpwent()) != NULL) { PyObject *v = mkpwent(p); if (v==NULL || PyList_Append(d,v)!=0) { Py_XDECREF(v); Py_DECREF(d); return NULL; } Py_DECREF(v); } endpwent(); return d;}

open user database

close user database

Page 39: MeCC: Memory Comparison based Clone Detector

PyObject *pwd_getpwall (PyObject *self){ PyObject *d; struct passwd *p; if ((d = PyList_New(0)) == NULL) return NULL; setpwent(); while ((p = getpwent()) != NULL) { PyObject *v = mkpwent(p); if (v==NULL || PyList_Append(d,v)!=0) { Py_XDECREF(v); Py_DECREF(d); return NULL; } Py_DECREF(v); } endpwent(); return d;}

39

A Bug-free Procedure

Python project revision #38359

PyObject *spwd_getspall (PyObject *self, PyObject *args){ PyObject *d; struct spwd *p; if ((d = PyList_New(0)) == NULL) return NULL; setspent(); while ((p = getspent()) != NULL) { PyObject *v = mkspent(p); if (v==NULL || PyList_Append(d,v)!=0) { Py_XDECREF(v); Py_DECREF(d); endspent(); return NULL; } Py_DECREF(v); } endspent(); return d;}

Page 40: MeCC: Memory Comparison based Clone Detector

40

PyObject *pwd_getpwall (PyObject *self){ PyObject *d; struct passwd *p; if ((d = PyList_New(0)) == NULL) return NULL; setpwent(); while ((p = getpwent()) != NULL) { PyObject *v = mkpwent(p); if (v==NULL || PyList_Append(d,v)!=0) { Py_XDECREF(v); Py_DECREF(d); return NULL; } Py_DECREF(v); } endpwent(); return d;}

The Bug is Fixed Later

Python project revision #73017

bug-fixedendpwent();

PyObject *spwd_getspall (PyObject *self, PyObject *args){ PyObject *d; struct spwd *p; if ((d = PyList_New(0)) == NULL) return NULL; setspent(); while ((p = getspent()) != NULL) { PyObject *v = mkspent(p); if (v==NULL || PyList_Append(d,v)!=0) { Py_XDECREF(v); Py_DECREF(d); endspent(); return NULL; } Py_DECREF(v); } endspent(); return d;}

Page 41: MeCC: Memory Comparison based Clone Detector

41

revision #20157

revision #38359

revision #73017

Procedure A was created with a resource leak

Procedure B (a code clone of A)is introduced

without resource leaks

The resource leak bug in procedure A is fixed

the resource leak can be fixedif MeCC were applied

4 years

Page 42: MeCC: Memory Comparison based Clone Detector

42

MeCC successfully identifies these procedures

const char *GetVariable (VariableSpace space, const char *name){ struct_variable *current; if (!space) return NULL; for (current=space->next;current;current=current->next) { if (strcmp(current->name,name) == 0) { return current->value; } } return NULL;}

const char *PQparameterStatus (const PGconn *conn, const char *paramName){ const pgParameterStatus *pstatus; if (!conn || !paramName) return NULL; for (pstatus=conn->pstatus; pstatus!=NULL; pstatus = pstatus->next) { if (strcmp(pstatus->name.paramName)== 0) return pstatus->value; } return NULL;}

PyObject *pwd_getpwall (PyObject *self){ PyObject *d; struct passwd *p; if ((d = PyList_New(0)) == NULL) return NULL; setpwent(); while ((p = getpwent()) != NULL) { PyObject *v = mkpwent(p); if (v==NULL || PyList_Append(d,v)!=0) { Py_XDECREF(v); Py_DECREF(d); return NULL; } Py_DECREF(v); } endpwent(); return d;}

PyObject *spwd_getspall (PyObject *self, PyObject *args){ PyObject *d; struct spwd *p; if ((d = PyList_New(0)) == NULL) return NULL; setspent(); while ((p = getspent()) != NULL) { PyObject *v = mkspent(p); if (v==NULL || PyList_Append(d,v)!=0) { Py_XDECREF(v); Py_DECREF(d); endspent(); return NULL; } Py_DECREF(v); } endspent(); return d;}

Page 43: MeCC: Memory Comparison based Clone Detector

Potential Bugs and Code Smells

43

#Semantic Clones

PotentialBugs (%)

Code Smells (%)

Python 95 26 (27.4%) 23 (24.2%)

Apache 81 8 ( 9.9%) 27 (33.3%)

PostgreSQL 102 21 (20.6%) 20 (19.6%)

Total 278 55 (19.8%) 70 (25.2%)

detected by MeCC

Page 44: MeCC: Memory Comparison based Clone Detector

Study Limitation

• Projects are open source and may not be representative

• All clones are manually inspected

• Default options are used for other tools (CCfinder, Deckard, PDG-based)

44

Page 45: MeCC: Memory Comparison based Clone Detector

Conclusion • MeCC: Memory Comparison-based Clone

Detector

• a new clone detector using semantics-based static analysis

• tolerant to syntactic variations

• can be used to find potential bugs

45

Page 46: MeCC: Memory Comparison based Clone Detector

Thank You!

46

http://ropas.snu.ac.kr/mecc/

Page 47: MeCC: Memory Comparison based Clone Detector

Backup Slides

47

Page 48: MeCC: Memory Comparison based Clone Detector

Time Spent Projects KLOC FP Total Time

Python 435 39 264 1h

Apache 343 24 191 5h

PostgreSQL 937 47 278 7h

• False positive ratio is less than 15%

• Slower than other tools (deep semantic analysis)

Ubuntu 64-bit machine with a 2.4 GHz Intel Core 2 Quad CPU and 8 GB RAM.

48

Page 49: MeCC: Memory Comparison based Clone Detector

Structure Initialization

49

Page 50: MeCC: Memory Comparison based Clone Detector

Structure Initialization

50

Page 51: MeCC: Memory Comparison based Clone Detector

Judgement of Clones

• Two parameters

• In our experiment, similarity threshold 0.8 is used

• Penalty function for small size of code clones

S(M1,M2)

log MinEntry

log(| M1 | + | M2 |)

2(2 · 1.0 + 2 · 1.0 + 1 · 0.5)/(6 + 5) = 0.82

a {(true,α)}count {(true,β)}r {(true,β + 1)}α.next {(α �= 0, �)}α.val {(α �= 0,β)}RETV {(α �= 0,β + 1− 1), (α = 0,β + 1)}

a {(true,α)}b {(true,β)}α.n {(α �= 0, �)}α.v {(α �= 0,β)}RETV {(α �= 0,β), (α = 0,β + 2)}

{}, {} � P ⇓ v,M

{}, {} � P : τ

type list = {int x, list next}

let list node = {x:=1, next:={}}in

node.next.x

let x := {a:=1, b:=2} in E

type list = {int x, list next}type tsil = {int x, tsil prev}

let ... {x:=1, next:={}}... {x:=1, prev:={}}

...

∆ � T ∗ : ∆�

Γ,∆ � E : τ

∅ � T ∗ : ∆ ∅,∆ � E : τ∅,∅ � T ∗ E : τ

1

51

Page 52: MeCC: Memory Comparison based Clone Detector

Static Analyzer

• Flow-sensitive

• Context-sensitive by procedural summaries

• Path-sensitive

• Abstract interpretation

52

http://spa-arrow.com