|
1 | 1 | tumour_pattern = ( |
2 | 2 | r"(?P<tumour_prefix>[cpyramP]{1,2}\s?)?" # Optional tumour prefix |
3 | 3 | r"T\s?" # 'T' followed by optional space |
4 | | - r"(?P<tumour>([0-4]|is|[Xx]))" # Tumour size (required if 'T' is present) |
| 4 | + r"(?P<tumour>([0-4]|is|[Xx]|[Oo]))" # Tumour size (required if 'T' is present) |
5 | 5 | r"(?:\s?(?P<tumour_specification>[abcdx]|mi))?" # Optional tumour specification |
6 | 6 | r"(?:\s?\((?P<tumour_suffix>[^()]{1,10})\))?" # Optional tumour suffix |
7 | 7 | ) |
8 | 8 |
|
9 | 9 | node_pattern = ( |
10 | 10 | r"(?P<node_prefix>[cpyraP]{1,2}\s?)?" # Optional node prefix |
11 | 11 | r"N\s?" # 'N' followed by optional space |
12 | | - r"(?P<node>[Xx01234\+])" # Node size/status (required if 'N' is present) |
| 12 | + r"(?P<node>[Xx01234\+]|[Oo])" # Node size/status (required if 'N' is present) |
13 | 13 | r"(?:\s?(?P<node_specification>" |
14 | 14 | r"[abcdx]|mi|sn|i[-,+]|mol[-,+]|\(mi\)|\(sn\)|" |
15 | 15 | r"\(i[-,+]\)|\(mol[-,+]\)|\(\d+\s*/\s*\d+\)))?" # Optional specification |
|
19 | 19 | metastasis_pattern = ( |
20 | 20 | r"(?P<metastasis_prefix>[cpyraP]{1,2}\s?)?" # Optional metastasis prefix |
21 | 21 | r"M\s?" # 'M' followed by optional space |
22 | | - r"(?P<metastasis>[Xx0123\+])" # Metastasis status (required if 'M' is present) |
| 22 | + r"(?P<metastasis>[Xx0123\+]|[Oo])" # Metastasis status (required if 'M' is present) |
23 | 23 | r"(?:\s?(?P<metastasis_specification>" |
24 | 24 | r"[abcd]|i\+|mol\+|cy\+|\(i\+\)|\(mol\+\)|" |
25 | 25 | r"\(cy\+\)|PUL|OSS|HEP|BRA|LYM|OTH|MAR|PLE|PER|ADR|SKI))?" # Optional specification |
|
31 | 31 |
|
32 | 32 | resection_pattern = ( |
33 | 33 | r"R\s?" |
34 | | - r"(?P<resection>[Xx012])?" # Optional resection completeness |
| 34 | + r"(?P<resection>[Xx012]|[Oo])?" # Optional resection completeness |
35 | 35 | r"(?:\s?(?P<resection_specification>is|cy\+|\(is\)|\(cy\+\)))?" # Optional specification |
36 | 36 | r"(?:\s?(?P<resection_loc>(\((?P<r_loc>[a-z]+)\)[,;\s]*)*))?" # Optional localization with space |
37 | 37 | ) |
|
46 | 46 |
|
47 | 47 | # We need te exclude pattern like 'T1', 'T2' if they are not followed by node or |
48 | 48 | # metastasis sections. |
| 49 | + |
49 | 50 | exclude_pattern = ( |
50 | 51 | r"(?!T\s*[0-4]\s*[.,\/](?!\s*" |
51 | 52 | + node_pattern |
|
57 | 58 | + "))" |
58 | 59 | ) |
59 | 60 |
|
| 61 | +exclude_pattern = ( |
| 62 | + r"(?!" |
| 63 | + r"(?:[cpyramP]{0,2}\s*)?" # Optional prefix like p, yp, PT |
| 64 | + r"T\s*" |
| 65 | + r"(?:[0-4]|is|[xXoO])" # T stage (includes is, x, o) |
| 66 | + r"(?:[abcdx]|mi)?" # Optional specification |
| 67 | + r"(?:\s*\([^()]{1,10}\))?" # Optional suffix |
| 68 | + r"(?:\s*[\s,\/\.\(\)]|$)" # <-- KEY ADDITION: allow end-of-string ($) |
| 69 | + r"(?!\s*" |
| 70 | + + node_pattern + "?" + TNM_space + "?" + metastasis_pattern + "?" |
| 71 | + + ")" |
| 72 | + + ")" |
| 73 | +) |
| 74 | + |
60 | 75 | tnm_pattern_new = ( |
61 | 76 | r"(?:\b|^)" |
62 | 77 | + exclude_pattern |
|
90 | 105 | + version_pattern |
91 | 106 | + ")?" |
92 | 107 | + r")" |
93 | | - + r"(?:\b|$|\n)" |
| 108 | + + r"(?=[\s\(\)\.,;:/]|$)" |
| 109 | + #+ r"(?:\b|$|\n)" |
94 | 110 | ) |
0 commit comments