Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KBDEV-1236 #25

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions pori_python/graphkb/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@
looks_like_rid,
stringifyVariant,
)
from .vocab import get_equivalent_terms, get_term_tree, get_terms_set
from .vocab import (
get_equivalent_terms,
get_term_by_name,
get_term_tree,
get_terms_set,
)

FEATURES_CACHE: Set[str] = set()

Expand Down Expand Up @@ -275,7 +280,54 @@ def positions_overlap(
return start is None or pos == start


def equivalent_types(
conn: GraphKBConnection,
type1: str,
type2: str,
strict: bool = False,
) -> bool:
"""
Compare 2 variant types to determine if they should match

Args:
type1: type from the observed variant we want to match to the DB
type2: type from the DB variant
string: Wether or not only the specific-to-generic ones are considered.
mathieulemieux marked this conversation as resolved.
Show resolved Hide resolved
By default (false), not only specific types can match more generic ones,
but generic types can also match more specific ones.

Returns:
bool: True if the types can be matched
"""

# Convert rid to displayName if needed
if looks_like_rid(type1):
type1 = conn.get_records_by_id([type1])[0]['displayName']
if looks_like_rid(type2):
type2 = conn.get_records_by_id([type2])[0]['displayName']

# Get type terms from observed variant
terms1 = []
if strict:
try:
terms1.append(get_term_by_name(conn, type1)['@rid'])
except:
pass
else:
terms1 = get_terms_set(conn, [type1])

# Get type terms from DB variant
terms2 = get_terms_set(conn, [type2])

# Check for intersect
if len(terms2.intersection(terms1)) == 0:
return False

return True


def compare_positional_variants(
conn: GraphKBConnection,
variant: Union[PositionalVariant, ParsedVariant],
reference_variant: Union[PositionalVariant, ParsedVariant],
generic: bool = True,
Expand Down Expand Up @@ -378,6 +430,11 @@ def compare_positional_variants(
elif len(variant["refSeq"]) != len(reference_variant["refSeq"]): # type: ignore
return False

# Equivalent types
if variant.get('type') and reference_variant.get('type'):
if not equivalent_types(conn, variant["type"], reference_variant["type"]):
return False

return True


Expand Down Expand Up @@ -598,10 +655,14 @@ def match_positional_variant(
):
# TODO: Check if variant and reference_variant should be interchanged
if compare_positional_variants(
variant=parsed, reference_variant=cast(PositionalVariant, row), generic=True
conn,
variant=parsed,
reference_variant=cast(PositionalVariant, row),
generic=True,
):
filtered_similarAndGeneric.append(row)
if compare_positional_variants(
conn,
variant=parsed,
reference_variant=cast(PositionalVariant, row),
generic=False, # Similar variants only
Expand Down
39 changes: 27 additions & 12 deletions tests/test_graphkb/test_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,80 +269,95 @@ def test_known_increased_expression(self, conn):
class TestComparePositionalVariants:
def test_nonspecific_altseq(self):
assert match.compare_positional_variants(
{"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}}
conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}}
)
# null matches anything
assert match.compare_positional_variants(
{"break1Start": {"pos": 1}, "untemplatedSeq": "T"}, {"break1Start": {"pos": 1}}
conn, {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}, {"break1Start": {"pos": 1}}
)
assert match.compare_positional_variants(
{"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}
conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "untemplatedSeq": "T"}
)

@pytest.mark.parametrize("seq1", ["T", "X", "?"])
@pytest.mark.parametrize("seq2", ["T", "X", "?"])
def test_ambiguous_altseq(self, seq1, seq2):
# ambiguous AA matches anything the same length
assert match.compare_positional_variants(
conn,
{"break1Start": {"pos": 1}, "untemplatedSeq": seq1},
{"break1Start": {"pos": 1}, "untemplatedSeq": seq2},
)

def test_altseq_length_mismatch(self):
assert not match.compare_positional_variants(
conn,
{"break1Start": {"pos": 1}, "untemplatedSeq": "??"},
{"break1Start": {"pos": 1}, "untemplatedSeq": "T"},
)
assert not match.compare_positional_variants(
conn,
{"break1Start": {"pos": 1}, "untemplatedSeq": "?"},
{"break1Start": {"pos": 1}, "untemplatedSeq": "TT"},
)

def test_nonspecific_refseq(self):
# null matches anything
assert match.compare_positional_variants(
{"break1Start": {"pos": 1}, "refSeq": "T"}, {"break1Start": {"pos": 1}}
conn, {"break1Start": {"pos": 1}, "refSeq": "T"}, {"break1Start": {"pos": 1}}
)
assert match.compare_positional_variants(
{"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "refSeq": "T"}
conn, {"break1Start": {"pos": 1}}, {"break1Start": {"pos": 1}, "refSeq": "T"}
)

@pytest.mark.parametrize("seq1", ["T", "X", "?"])
@pytest.mark.parametrize("seq2", ["T", "X", "?"])
def test_ambiguous_refseq(self, seq1, seq2):
# ambiguous AA matches anything the same length
assert match.compare_positional_variants(
{"break1Start": {"pos": 1}, "refSeq": seq1}, {"break1Start": {"pos": 1}, "refSeq": seq2}
conn,
{"break1Start": {"pos": 1}, "refSeq": seq1},
{"break1Start": {"pos": 1}, "refSeq": seq2},
)

def test_refseq_length_mismatch(self):
assert not match.compare_positional_variants(
{"break1Start": {"pos": 1}, "refSeq": "??"}, {"break1Start": {"pos": 1}, "refSeq": "T"}
conn,
{"break1Start": {"pos": 1}, "refSeq": "??"},
{"break1Start": {"pos": 1}, "refSeq": "T"},
)
assert not match.compare_positional_variants(
{"break1Start": {"pos": 1}, "refSeq": "?"}, {"break1Start": {"pos": 1}, "refSeq": "TT"}
conn,
{"break1Start": {"pos": 1}, "refSeq": "?"},
{"break1Start": {"pos": 1}, "refSeq": "TT"},
)

def test_diff_altseq(self):
assert not match.compare_positional_variants(
conn,
{"break1Start": {"pos": 1}, "untemplatedSeq": "M"},
{"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
)

def test_same_altseq_matches(self):
assert match.compare_positional_variants(
conn,
{"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
{"break1Start": {"pos": 1}, "untemplatedSeq": "R"},
)

def test_diff_refseq(self):
assert not match.compare_positional_variants(
{"break1Start": {"pos": 1}, "refSeq": "M"}, {"break1Start": {"pos": 1}, "refSeq": "R"}
conn,
{"break1Start": {"pos": 1}, "refSeq": "M"},
{"break1Start": {"pos": 1}, "refSeq": "R"},
)

def test_same_refseq_matches(self):
assert match.compare_positional_variants(
{"break1Start": {"pos": 1}, "refSeq": "R"}, {"break1Start": {"pos": 1}, "refSeq": "R"}
conn,
{"break1Start": {"pos": 1}, "refSeq": "R"},
{"break1Start": {"pos": 1}, "refSeq": "R"},
)

def test_range_vs_sub(self):
Expand All @@ -364,8 +379,8 @@ def test_range_vs_sub(self):
"refSeq": "G",
"untemplatedSeq": "VV",
}
assert not match.compare_positional_variants(sub, range_variant)
assert not match.compare_positional_variants(range_variant, sub)
assert not match.compare_positional_variants(conn, sub, range_variant)
assert not match.compare_positional_variants(conn, range_variant, sub)


class TestMatchPositionalVariant:
Expand Down
Loading