From 21dd152823342b9536c3cbe47ad5c74c627d2c3e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 19:02:40 +0000 Subject: [PATCH 1/7] Initial plan From b017a2d8507d380300079718eceaa161b1702b10 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 19:06:51 +0000 Subject: [PATCH 2/7] Add thematic code counting script and report for 16.01.2026 Co-authored-by: makr-code <150588092+makr-code@users.noreply.github.com> --- CODE_COUNT_REPORT_20260116.md | 138 ++++++++++++++++ count_thematic.py | 303 ++++++++++++++++++++++++++++++++++ 2 files changed, 441 insertions(+) create mode 100644 CODE_COUNT_REPORT_20260116.md create mode 100755 count_thematic.py diff --git a/CODE_COUNT_REPORT_20260116.md b/CODE_COUNT_REPORT_20260116.md new file mode 100644 index 000000000..994c8b796 --- /dev/null +++ b/CODE_COUNT_REPORT_20260116.md @@ -0,0 +1,138 @@ +# ThemisDB Code Count Report +## Date: 16.01.2026 19:06:03 + +This report provides a thematic count of C++ code in ThemisDB. + +## 1. Source Code by Theme (`./src`) + +| Theme | Files | Lines | +|-------|-------|-------| +| [root] | 4 | 2,371 | +| acceleration | 16 | 5,566 | +| analytics | 6 | 5,212 | +| api | 3 | 1,408 | +| aql | 2 | 801 | +| auth | 2 | 722 | +| base | 1 | 419 | +| cache | 2 | 600 | +| cdc | 2 | 715 | +| content | 20 | 10,374 | +| exporters | 1 | 657 | +| geo | 3 | 252 | +| governance | 1 | 185 | +| gpu | 1 | 170 | +| importers | 1 | 414 | +| index | 15 | 14,511 | +| llm | 83 | 37,777 | +| metadata | 1 | 680 | +| network | 1 | 340 | +| observability | 1 | 295 | +| performance | 12 | 1,964 | +| plugins | 3 | 1,020 | +| query | 16 | 11,331 | +| replication | 1 | 748 | +| scheduler | 2 | 1,511 | +| search | 1 | 251 | +| security | 18 | 7,205 | +| server | 62 | 35,139 | +| sharding | 42 | 15,642 | +| storage | 13 | 5,899 | +| temporal | 1 | 234 | +| timeseries | 11 | 2,774 | +| transaction | 3 | 1,020 | +| updates | 4 | 1,539 | +| utils | 28 | 8,316 | +| voice | 2 | 738 | +| **TOTAL** | **385** | **178,800** | + +## 2. Headers by Theme (`./include`) + +| Theme | Files | Lines | +|-------|-------|-------| +| [root] | 1 | 262 | +| acceleration | 8 | 1,165 | +| analytics | 7 | 3,323 | +| api | 2 | 475 | +| aql | 2 | 332 | +| auth | 2 | 284 | +| cache | 7 | 380 | +| cdc | 2 | 372 | +| content | 20 | 3,301 | +| document | 2 | 306 | +| exporters | 2 | 287 | +| geo | 2 | 72 | +| governance | 1 | 74 | +| importers | 2 | 250 | +| index | 17 | 4,348 | +| llm | 110 | 22,448 | +| metadata | 1 | 233 | +| network | 1 | 279 | +| observability | 1 | 135 | +| performance | 19 | 2,995 | +| plugins | 5 | 1,763 | +| projects | 1 | 221 | +| query | 35 | 20,935 | +| replication | 2 | 891 | +| scheduler | 2 | 546 | +| search | 1 | 120 | +| security | 18 | 3,097 | +| server | 63 | 8,512 | +| sharding | 44 | 9,803 | +| storage | 11 | 2,399 | +| temporal | 1 | 124 | +| themis | 6 | 1,027 | +| timeseries | 10 | 1,324 | +| transaction | 3 | 510 | +| updates | 4 | 567 | +| utils | 31 | 3,515 | +| voice | 1 | 235 | +| **TOTAL** | **447** | **96,910** | + +## 3. Themis-Specific Headers (`./include/themis`) + +| Component | Files | Lines | +|-----------|-------|-------| +| [themis-root] | 3 | 378 | +| base | 2 | 427 | +| network | 1 | 222 | +| **TOTAL** | **6** | **1,027** | + +## 4. Tools (`./tools`) + +- **Files**: 12 +- **Lines**: 864 + +## 5. Examples (`./examples`) + +- **Files**: 20 +- **Lines**: 4,566 + +## 6. Tests (`./tests`) + +- **Files**: 353 +- **Lines**: 106,016 + +## 7. Benchmarks (`./benchmarks`) + +- **Files**: 86 +- **Lines**: 28,477 + +## 8. Grand Total + +| Category | Files | Lines | +|----------|-------|-------| +| Source (`./src`) | 385 | 178,800 | +| Headers (`./include`) | 447 | 96,910 | +| Tools | 12 | 864 | +| Examples | 20 | 4,566 | +| Tests | 353 | 106,016 | +| Benchmarks | 86 | 28,477 | +| **GRAND TOTAL** | **1303** | **415,633** | + +## 9. Statistics + +- **Average lines per file**: 319.0 +- **Themis-specific files in include**: 6 files, 1,027 lines + +--- +*Report generated automatically for issue #581* \ No newline at end of file diff --git a/count_thematic.py b/count_thematic.py new file mode 100755 index 000000000..26a1239ac --- /dev/null +++ b/count_thematic.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +""" +Thematic Code Counter for ThemisDB +Counts C++ code lines by theme/category for the date 16.01.2026 +""" + +import os +import subprocess +from pathlib import Path +from collections import defaultdict +from datetime import datetime + +def count_lines_in_files(file_list): + """Count total lines in a list of files.""" + if not file_list: + return 0 + try: + result = subprocess.run( + ['wc', '-l'] + file_list, + capture_output=True, + text=True, + check=True + ) + # Last line contains total + lines = result.stdout.strip().split('\n') + if len(lines) > 1: + total_line = lines[-1] + return int(total_line.split()[0]) + elif len(lines) == 1: + return int(lines[0].split()[0]) + except Exception as e: + print(f"Error counting lines: {e}") + return 0 + return 0 + +def find_cpp_files(directory): + """Find all C++ files in a directory.""" + cpp_extensions = ['.cpp', '.h', '.hpp', '.cc', '.cxx'] + cpp_files = [] + + if not os.path.exists(directory): + return [] + + for root, dirs, files in os.walk(directory): + for file in files: + if any(file.endswith(ext) for ext in cpp_extensions): + cpp_files.append(os.path.join(root, file)) + + return cpp_files + +def count_by_category(base_dir): + """Count C++ code lines by category/theme.""" + categories = {} + + if not os.path.exists(base_dir): + return categories + + # Get immediate subdirectories + subdirs = [d for d in os.listdir(base_dir) + if os.path.isdir(os.path.join(base_dir, d))] + + for subdir in sorted(subdirs): + subdir_path = os.path.join(base_dir, subdir) + cpp_files = find_cpp_files(subdir_path) + if cpp_files: + line_count = count_lines_in_files(cpp_files) + file_count = len(cpp_files) + categories[subdir] = { + 'lines': line_count, + 'files': file_count + } + + # Also count files directly in the base directory + direct_files = [os.path.join(base_dir, f) + for f in os.listdir(base_dir) + if os.path.isfile(os.path.join(base_dir, f)) + and any(f.endswith(ext) for ext in ['.cpp', '.h', '.hpp', '.cc', '.cxx'])] + + if direct_files: + line_count = count_lines_in_files(direct_files) + categories['[root]'] = { + 'lines': line_count, + 'files': len(direct_files) + } + + return categories + +def count_themis_only(include_dir): + """Count only Themis-related files in include directory.""" + themis_path = os.path.join(include_dir, 'themis') + + if not os.path.exists(themis_path): + return {}, 0, 0 + + # Count by subdirectories in themis + categories = {} + subdirs = [d for d in os.listdir(themis_path) + if os.path.isdir(os.path.join(themis_path, d))] + + for subdir in sorted(subdirs): + subdir_path = os.path.join(themis_path, subdir) + cpp_files = find_cpp_files(subdir_path) + if cpp_files: + line_count = count_lines_in_files(cpp_files) + categories[subdir] = { + 'lines': line_count, + 'files': len(cpp_files) + } + + # Count files directly in themis directory + direct_files = [os.path.join(themis_path, f) + for f in os.listdir(themis_path) + if os.path.isfile(os.path.join(themis_path, f)) + and any(f.endswith(ext) for ext in ['.cpp', '.h', '.hpp', '.cc', '.cxx'])] + + if direct_files: + line_count = count_lines_in_files(direct_files) + categories['[themis-root]'] = { + 'lines': line_count, + 'files': len(direct_files) + } + + # Calculate totals + total_lines = sum(cat['lines'] for cat in categories.values()) + total_files = sum(cat['files'] for cat in categories.values()) + + return categories, total_lines, total_files + +def count_directory_total(directory, description): + """Count total C++ code in a directory.""" + cpp_files = find_cpp_files(directory) + if cpp_files: + line_count = count_lines_in_files(cpp_files) + return { + 'description': description, + 'lines': line_count, + 'files': len(cpp_files) + } + return { + 'description': description, + 'lines': 0, + 'files': 0 + } + +def generate_report(repo_root): + """Generate comprehensive counting report.""" + report_lines = [] + report_lines.append("# ThemisDB Code Count Report") + report_lines.append(f"## Date: {datetime.now().strftime('%d.%m.%Y %H:%M:%S')}") + report_lines.append("") + report_lines.append("This report provides a thematic count of C++ code in ThemisDB.") + report_lines.append("") + + # Count src by theme + report_lines.append("## 1. Source Code by Theme (`./src`)") + report_lines.append("") + src_categories = count_by_category(os.path.join(repo_root, 'src')) + + if src_categories: + report_lines.append("| Theme | Files | Lines |") + report_lines.append("|-------|-------|-------|") + total_src_lines = 0 + total_src_files = 0 + for theme, data in sorted(src_categories.items()): + report_lines.append(f"| {theme} | {data['files']} | {data['lines']:,} |") + total_src_lines += data['lines'] + total_src_files += data['files'] + report_lines.append(f"| **TOTAL** | **{total_src_files}** | **{total_src_lines:,}** |") + else: + report_lines.append("*No C++ files found*") + report_lines.append("") + + # Count include by theme (all categories) + report_lines.append("## 2. Headers by Theme (`./include`)") + report_lines.append("") + include_categories = count_by_category(os.path.join(repo_root, 'include')) + + if include_categories: + report_lines.append("| Theme | Files | Lines |") + report_lines.append("|-------|-------|-------|") + total_include_lines = 0 + total_include_files = 0 + for theme, data in sorted(include_categories.items()): + report_lines.append(f"| {theme} | {data['files']} | {data['lines']:,} |") + total_include_lines += data['lines'] + total_include_files += data['files'] + report_lines.append(f"| **TOTAL** | **{total_include_files}** | **{total_include_lines:,}** |") + else: + report_lines.append("*No C++ files found*") + report_lines.append("") + + # Count Themis-specific in include + report_lines.append("## 3. Themis-Specific Headers (`./include/themis`)") + report_lines.append("") + themis_categories, themis_lines, themis_files = count_themis_only(os.path.join(repo_root, 'include')) + + if themis_categories: + report_lines.append("| Component | Files | Lines |") + report_lines.append("|-----------|-------|-------|") + for component, data in sorted(themis_categories.items()): + report_lines.append(f"| {component} | {data['files']} | {data['lines']:,} |") + report_lines.append(f"| **TOTAL** | **{themis_files}** | **{themis_lines:,}** |") + else: + report_lines.append("*No Themis-specific files found*") + report_lines.append("") + + # Count tools + report_lines.append("## 4. Tools (`./tools`)") + report_lines.append("") + tools_data = count_directory_total(os.path.join(repo_root, 'tools'), "Development tools") + report_lines.append(f"- **Files**: {tools_data['files']}") + report_lines.append(f"- **Lines**: {tools_data['lines']:,}") + report_lines.append("") + + # Count examples + report_lines.append("## 5. Examples (`./examples`)") + report_lines.append("") + examples_data = count_directory_total(os.path.join(repo_root, 'examples'), "Example applications") + report_lines.append(f"- **Files**: {examples_data['files']}") + report_lines.append(f"- **Lines**: {examples_data['lines']:,}") + report_lines.append("") + + # Count tests + report_lines.append("## 6. Tests (`./tests`)") + report_lines.append("") + tests_data = count_directory_total(os.path.join(repo_root, 'tests'), "Test suite") + report_lines.append(f"- **Files**: {tests_data['files']}") + report_lines.append(f"- **Lines**: {tests_data['lines']:,}") + report_lines.append("") + + # Count benchmarks + report_lines.append("## 7. Benchmarks (`./benchmarks`)") + report_lines.append("") + benchmarks_data = count_directory_total(os.path.join(repo_root, 'benchmarks'), "Performance benchmarks") + report_lines.append(f"- **Files**: {benchmarks_data['files']}") + report_lines.append(f"- **Lines**: {benchmarks_data['lines']:,}") + report_lines.append("") + + # Grand total + report_lines.append("## 8. Grand Total") + report_lines.append("") + grand_total_lines = ( + total_src_lines + + total_include_lines + + tools_data['lines'] + + examples_data['lines'] + + tests_data['lines'] + + benchmarks_data['lines'] + ) + grand_total_files = ( + total_src_files + + total_include_files + + tools_data['files'] + + examples_data['files'] + + tests_data['files'] + + benchmarks_data['files'] + ) + + report_lines.append("| Category | Files | Lines |") + report_lines.append("|----------|-------|-------|") + report_lines.append(f"| Source (`./src`) | {total_src_files} | {total_src_lines:,} |") + report_lines.append(f"| Headers (`./include`) | {total_include_files} | {total_include_lines:,} |") + report_lines.append(f"| Tools | {tools_data['files']} | {tools_data['lines']:,} |") + report_lines.append(f"| Examples | {examples_data['files']} | {examples_data['lines']:,} |") + report_lines.append(f"| Tests | {tests_data['files']} | {tests_data['lines']:,} |") + report_lines.append(f"| Benchmarks | {benchmarks_data['files']} | {benchmarks_data['lines']:,} |") + report_lines.append(f"| **GRAND TOTAL** | **{grand_total_files}** | **{grand_total_lines:,}** |") + report_lines.append("") + + # Additional statistics + report_lines.append("## 9. Statistics") + report_lines.append("") + if grand_total_files > 0: + avg_lines_per_file = grand_total_lines / grand_total_files + report_lines.append(f"- **Average lines per file**: {avg_lines_per_file:.1f}") + report_lines.append(f"- **Themis-specific files in include**: {themis_files} files, {themis_lines:,} lines") + report_lines.append("") + + report_lines.append("---") + report_lines.append("*Report generated automatically for issue #581*") + + return '\n'.join(report_lines) + +def main(): + """Main execution function.""" + repo_root = '/home/runner/work/ThemisDB/ThemisDB' + + print("Generating thematic code count report...") + report = generate_report(repo_root) + + # Save report + report_file = os.path.join(repo_root, 'CODE_COUNT_REPORT_20260116.md') + with open(report_file, 'w') as f: + f.write(report) + + print(f"Report saved to: {report_file}") + print("\nReport Preview:") + print("=" * 80) + print(report) + print("=" * 80) + +if __name__ == '__main__': + main() From 0a8fd6ee8814f76fdb88e6e01507754a77a11130 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 19:08:28 +0000 Subject: [PATCH 3/7] Add scripts counting section to the report Co-authored-by: makr-code <150588092+makr-code@users.noreply.github.com> --- CODE_COUNT_REPORT_20260116.md | 12 +++++++++--- count_thematic.py | 19 +++++++++++++++---- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/CODE_COUNT_REPORT_20260116.md b/CODE_COUNT_REPORT_20260116.md index 994c8b796..bc3b30f47 100644 --- a/CODE_COUNT_REPORT_20260116.md +++ b/CODE_COUNT_REPORT_20260116.md @@ -1,5 +1,5 @@ # ThemisDB Code Count Report -## Date: 16.01.2026 19:06:03 +## Date: 16.01.2026 19:07:52 This report provides a thematic count of C++ code in ThemisDB. @@ -117,7 +117,12 @@ This report provides a thematic count of C++ code in ThemisDB. - **Files**: 86 - **Lines**: 28,477 -## 8. Grand Total +## 8. Scripts (`./scripts`) + +- **Files**: 0 +- **Lines**: 0 + +## 9. Grand Total | Category | Files | Lines | |----------|-------|-------| @@ -127,9 +132,10 @@ This report provides a thematic count of C++ code in ThemisDB. | Examples | 20 | 4,566 | | Tests | 353 | 106,016 | | Benchmarks | 86 | 28,477 | +| Scripts | 0 | 0 | | **GRAND TOTAL** | **1303** | **415,633** | -## 9. Statistics +## 10. Statistics - **Average lines per file**: 319.0 - **Themis-specific files in include**: 6 files, 1,027 lines diff --git a/count_thematic.py b/count_thematic.py index 26a1239ac..896b224f5 100755 --- a/count_thematic.py +++ b/count_thematic.py @@ -236,8 +236,16 @@ def generate_report(repo_root): report_lines.append(f"- **Lines**: {benchmarks_data['lines']:,}") report_lines.append("") + # Count scripts + report_lines.append("## 8. Scripts (`./scripts`)") + report_lines.append("") + scripts_data = count_directory_total(os.path.join(repo_root, 'scripts'), "Build and deployment scripts") + report_lines.append(f"- **Files**: {scripts_data['files']}") + report_lines.append(f"- **Lines**: {scripts_data['lines']:,}") + report_lines.append("") + # Grand total - report_lines.append("## 8. Grand Total") + report_lines.append("## 9. Grand Total") report_lines.append("") grand_total_lines = ( total_src_lines + @@ -245,7 +253,8 @@ def generate_report(repo_root): tools_data['lines'] + examples_data['lines'] + tests_data['lines'] + - benchmarks_data['lines'] + benchmarks_data['lines'] + + scripts_data['lines'] ) grand_total_files = ( total_src_files + @@ -253,7 +262,8 @@ def generate_report(repo_root): tools_data['files'] + examples_data['files'] + tests_data['files'] + - benchmarks_data['files'] + benchmarks_data['files'] + + scripts_data['files'] ) report_lines.append("| Category | Files | Lines |") @@ -264,11 +274,12 @@ def generate_report(repo_root): report_lines.append(f"| Examples | {examples_data['files']} | {examples_data['lines']:,} |") report_lines.append(f"| Tests | {tests_data['files']} | {tests_data['lines']:,} |") report_lines.append(f"| Benchmarks | {benchmarks_data['files']} | {benchmarks_data['lines']:,} |") + report_lines.append(f"| Scripts | {scripts_data['files']} | {scripts_data['lines']:,} |") report_lines.append(f"| **GRAND TOTAL** | **{grand_total_files}** | **{grand_total_lines:,}** |") report_lines.append("") # Additional statistics - report_lines.append("## 9. Statistics") + report_lines.append("## 10. Statistics") report_lines.append("") if grand_total_files > 0: avg_lines_per_file = grand_total_lines / grand_total_files From c1b2ef02d9993d978f00706d17c78cea93050063 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 19:10:20 +0000 Subject: [PATCH 4/7] Improve script: remove unused imports, fix security issues, add CLI args Co-authored-by: makr-code <150588092+makr-code@users.noreply.github.com> --- CODE_COUNT_REPORT_20260116.md | 12 ++++----- count_thematic.py | 47 ++++++++++++++++++----------------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/CODE_COUNT_REPORT_20260116.md b/CODE_COUNT_REPORT_20260116.md index bc3b30f47..e9f3a1704 100644 --- a/CODE_COUNT_REPORT_20260116.md +++ b/CODE_COUNT_REPORT_20260116.md @@ -1,5 +1,5 @@ # ThemisDB Code Count Report -## Date: 16.01.2026 19:07:52 +## Date: 16.01.2026 19:09:45 This report provides a thematic count of C++ code in ThemisDB. @@ -22,7 +22,7 @@ This report provides a thematic count of C++ code in ThemisDB. | governance | 1 | 185 | | gpu | 1 | 170 | | importers | 1 | 414 | -| index | 15 | 14,511 | +| index | 15 | 14,513 | | llm | 83 | 37,777 | | metadata | 1 | 680 | | network | 1 | 340 | @@ -34,7 +34,7 @@ This report provides a thematic count of C++ code in ThemisDB. | scheduler | 2 | 1,511 | | search | 1 | 251 | | security | 18 | 7,205 | -| server | 62 | 35,139 | +| server | 62 | 35,140 | | sharding | 42 | 15,642 | | storage | 13 | 5,899 | | temporal | 1 | 234 | @@ -43,7 +43,7 @@ This report provides a thematic count of C++ code in ThemisDB. | updates | 4 | 1,539 | | utils | 28 | 8,316 | | voice | 2 | 738 | -| **TOTAL** | **385** | **178,800** | +| **TOTAL** | **385** | **178,803** | ## 2. Headers by Theme (`./include`) @@ -126,14 +126,14 @@ This report provides a thematic count of C++ code in ThemisDB. | Category | Files | Lines | |----------|-------|-------| -| Source (`./src`) | 385 | 178,800 | +| Source (`./src`) | 385 | 178,803 | | Headers (`./include`) | 447 | 96,910 | | Tools | 12 | 864 | | Examples | 20 | 4,566 | | Tests | 353 | 106,016 | | Benchmarks | 86 | 28,477 | | Scripts | 0 | 0 | -| **GRAND TOTAL** | **1303** | **415,633** | +| **GRAND TOTAL** | **1303** | **415,636** | ## 10. Statistics diff --git a/count_thematic.py b/count_thematic.py index 896b224f5..039547bbe 100755 --- a/count_thematic.py +++ b/count_thematic.py @@ -6,32 +6,23 @@ import os import subprocess -from pathlib import Path -from collections import defaultdict from datetime import datetime def count_lines_in_files(file_list): - """Count total lines in a list of files.""" + """Count total lines in a list of files using Python's built-in file reading.""" if not file_list: return 0 - try: - result = subprocess.run( - ['wc', '-l'] + file_list, - capture_output=True, - text=True, - check=True - ) - # Last line contains total - lines = result.stdout.strip().split('\n') - if len(lines) > 1: - total_line = lines[-1] - return int(total_line.split()[0]) - elif len(lines) == 1: - return int(lines[0].split()[0]) - except Exception as e: - print(f"Error counting lines: {e}") - return 0 - return 0 + + total_lines = 0 + for file_path in file_list: + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + total_lines += sum(1 for _ in f) + except Exception as e: + print(f"Warning: Could not read {file_path}: {e}") + continue + + return total_lines def find_cpp_files(directory): """Find all C++ files in a directory.""" @@ -294,9 +285,19 @@ def generate_report(repo_root): def main(): """Main execution function.""" - repo_root = '/home/runner/work/ThemisDB/ThemisDB' + import sys + + # Allow passing repo root as argument or use current directory + if len(sys.argv) > 1: + repo_root = sys.argv[1] + else: + repo_root = os.getcwd() + + if not os.path.exists(repo_root): + print(f"Error: Directory {repo_root} does not exist") + sys.exit(1) - print("Generating thematic code count report...") + print(f"Generating thematic code count report for: {repo_root}") report = generate_report(repo_root) # Save report From ec8fdc86275b158dd9742c6dfb7cdaa5b1ef1905 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 16 Jan 2026 19:12:01 +0000 Subject: [PATCH 5/7] Fix remaining code review issues: remove unused import, improve exception handling, fix variable scoping Co-authored-by: makr-code <150588092+makr-code@users.noreply.github.com> --- CODE_COUNT_REPORT_20260116.md | 2 +- count_thematic.py | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/CODE_COUNT_REPORT_20260116.md b/CODE_COUNT_REPORT_20260116.md index e9f3a1704..247d68630 100644 --- a/CODE_COUNT_REPORT_20260116.md +++ b/CODE_COUNT_REPORT_20260116.md @@ -1,5 +1,5 @@ # ThemisDB Code Count Report -## Date: 16.01.2026 19:09:45 +## Date: 16.01.2026 19:11:37 This report provides a thematic count of C++ code in ThemisDB. diff --git a/count_thematic.py b/count_thematic.py index 039547bbe..73a33b322 100755 --- a/count_thematic.py +++ b/count_thematic.py @@ -5,7 +5,6 @@ """ import os -import subprocess from datetime import datetime def count_lines_in_files(file_list): @@ -18,7 +17,7 @@ def count_lines_in_files(file_list): try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: total_lines += sum(1 for _ in f) - except Exception as e: + except (FileNotFoundError, PermissionError, UnicodeDecodeError, OSError) as e: print(f"Warning: Could not read {file_path}: {e}") continue @@ -147,11 +146,11 @@ def generate_report(repo_root): report_lines.append("") src_categories = count_by_category(os.path.join(repo_root, 'src')) + total_src_lines = 0 + total_src_files = 0 if src_categories: report_lines.append("| Theme | Files | Lines |") report_lines.append("|-------|-------|-------|") - total_src_lines = 0 - total_src_files = 0 for theme, data in sorted(src_categories.items()): report_lines.append(f"| {theme} | {data['files']} | {data['lines']:,} |") total_src_lines += data['lines'] @@ -166,11 +165,11 @@ def generate_report(repo_root): report_lines.append("") include_categories = count_by_category(os.path.join(repo_root, 'include')) + total_include_lines = 0 + total_include_files = 0 if include_categories: report_lines.append("| Theme | Files | Lines |") report_lines.append("|-------|-------|-------|") - total_include_lines = 0 - total_include_files = 0 for theme, data in sorted(include_categories.items()): report_lines.append(f"| {theme} | {data['files']} | {data['lines']:,} |") total_include_lines += data['lines'] From 62b49774e1a626068da2f1081db92d5ee954b012 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 17 Jan 2026 06:56:33 +0000 Subject: [PATCH 6/7] Add documentation counting to thematic count report Co-authored-by: makr-code <150588092+makr-code@users.noreply.github.com> --- CODE_COUNT_REPORT_20260116.md | 64 ++++++++++++- count_thematic.py | 168 +++++++++++++++++++++++++++++++++- 2 files changed, 223 insertions(+), 9 deletions(-) diff --git a/CODE_COUNT_REPORT_20260116.md b/CODE_COUNT_REPORT_20260116.md index 247d68630..6afe178d0 100644 --- a/CODE_COUNT_REPORT_20260116.md +++ b/CODE_COUNT_REPORT_20260116.md @@ -1,5 +1,5 @@ # ThemisDB Code Count Report -## Date: 16.01.2026 19:11:37 +## Date: 17.01.2026 06:56:07 This report provides a thematic count of C++ code in ThemisDB. @@ -122,7 +122,42 @@ This report provides a thematic count of C++ code in ThemisDB. - **Files**: 0 - **Lines**: 0 -## 9. Grand Total +## 9. Documentation (`./docs`) + +| Category/Language | Files | Lines | +|-------------------|-------|-------| +| [root] | 258 | 103,933 | +| analysis | 6 | 4,263 | +| api | 1 | 626 | +| architecture | 8 | 4,928 | +| archive | 13 | 4,848 | +| build-guide | 12 | 4,006 | +| de | 802 | 338,570 | +| deployment | 1 | 167 | +| en | 76 | 33,743 | +| es | 2 | 98 | +| features | 5 | 1,460 | +| fr | 14 | 2,875 | +| gimini | 24 | 5,688 | +| ja | 2 | 98 | +| llm | 2 | 485 | +| planning | 1 | 411 | +| reports | 43 | 12,795 | +| research | 15 | 12,042 | +| security | 5 | 1,676 | +| sharding | 1 | 126 | +| testing | 1 | 297 | +| tools | 14 | 3,503 | +| **TOTAL** | **1306** | **536,638** | + +## 10. Root Documentation + +- **Files**: 52 +- **Lines**: 22,487 + +## 11. Grand Total + +### C++ Code | Category | Files | Lines | |----------|-------|-------| @@ -133,12 +168,31 @@ This report provides a thematic count of C++ code in ThemisDB. | Tests | 353 | 106,016 | | Benchmarks | 86 | 28,477 | | Scripts | 0 | 0 | -| **GRAND TOTAL** | **1303** | **415,636** | +| **C++ TOTAL** | **1303** | **415,636** | -## 10. Statistics +### Documentation -- **Average lines per file**: 319.0 +| Category | Files | Lines | +|----------|-------|-------| +| Documentation (`./docs`) | 1306 | 536,638 | +| Root Documentation | 52 | 22,487 | +| **DOCUMENTATION TOTAL** | **1358** | **559,125** | + +### Overall Total + +| Type | Files | Lines | +|------|-------|-------| +| C++ Code | 1303 | 415,636 | +| Documentation | 1358 | 559,125 | +| **GRAND TOTAL** | **2661** | **974,761** | + +## 12. Statistics + +- **Average lines per C++ file**: 319.0 +- **Average lines per documentation file**: 411.7 +- **Average lines per file (all)**: 366.3 - **Themis-specific files in include**: 6 files, 1,027 lines +- **Code-to-documentation ratio**: 0.74:1 --- *Report generated automatically for issue #581* \ No newline at end of file diff --git a/count_thematic.py b/count_thematic.py index 73a33b322..9487d7b24 100755 --- a/count_thematic.py +++ b/count_thematic.py @@ -38,6 +38,21 @@ def find_cpp_files(directory): return cpp_files +def find_doc_files(directory): + """Find all documentation files in a directory.""" + doc_extensions = ['.md', '.rst', '.txt', '.adoc'] + doc_files = [] + + if not os.path.exists(directory): + return [] + + for root, dirs, files in os.walk(directory): + for file in files: + if any(file.endswith(ext) for ext in doc_extensions): + doc_files.append(os.path.join(root, file)) + + return doc_files + def count_by_category(base_dir): """Count C++ code lines by category/theme.""" categories = {} @@ -132,6 +147,59 @@ def count_directory_total(directory, description): 'files': 0 } +def count_documentation_by_category(docs_dir): + """Count documentation files by category/language in docs directory.""" + categories = {} + + if not os.path.exists(docs_dir): + return categories + + # Get immediate subdirectories (languages and categories) + subdirs = [d for d in os.listdir(docs_dir) + if os.path.isdir(os.path.join(docs_dir, d))] + + for subdir in sorted(subdirs): + subdir_path = os.path.join(docs_dir, subdir) + doc_files = find_doc_files(subdir_path) + if doc_files: + line_count = count_lines_in_files(doc_files) + file_count = len(doc_files) + categories[subdir] = { + 'lines': line_count, + 'files': file_count + } + + # Also count files directly in the docs directory + direct_files = [os.path.join(docs_dir, f) + for f in os.listdir(docs_dir) + if os.path.isfile(os.path.join(docs_dir, f)) + and any(f.endswith(ext) for ext in ['.md', '.rst', '.txt', '.adoc'])] + + if direct_files: + line_count = count_lines_in_files(direct_files) + categories['[root]'] = { + 'lines': line_count, + 'files': len(direct_files) + } + + return categories + +def count_documentation_total(directory, description): + """Count total documentation files in a directory.""" + doc_files = find_doc_files(directory) + if doc_files: + line_count = count_lines_in_files(doc_files) + return { + 'description': description, + 'lines': line_count, + 'files': len(doc_files) + } + return { + 'description': description, + 'lines': 0, + 'files': 0 + } + def generate_report(repo_root): """Generate comprehensive counting report.""" report_lines = [] @@ -234,8 +302,46 @@ def generate_report(repo_root): report_lines.append(f"- **Lines**: {scripts_data['lines']:,}") report_lines.append("") + # Count documentation by category + report_lines.append("## 9. Documentation (`./docs`)") + report_lines.append("") + docs_categories = count_documentation_by_category(os.path.join(repo_root, 'docs')) + + total_docs_lines = 0 + total_docs_files = 0 + if docs_categories: + report_lines.append("| Category/Language | Files | Lines |") + report_lines.append("|-------------------|-------|-------|") + for category, data in sorted(docs_categories.items()): + report_lines.append(f"| {category} | {data['files']} | {data['lines']:,} |") + total_docs_lines += data['lines'] + total_docs_files += data['files'] + report_lines.append(f"| **TOTAL** | **{total_docs_files}** | **{total_docs_lines:,}** |") + else: + report_lines.append("*No documentation files found*") + report_lines.append("") + + # Count other documentation (root level markdown files) + report_lines.append("## 10. Root Documentation") + report_lines.append("") + root_doc_files = [os.path.join(repo_root, f) + for f in os.listdir(repo_root) + if os.path.isfile(os.path.join(repo_root, f)) + and any(f.endswith(ext) for ext in ['.md', '.rst', '.txt', '.adoc'])] + + if root_doc_files: + root_doc_lines = count_lines_in_files(root_doc_files) + root_doc_count = len(root_doc_files) + report_lines.append(f"- **Files**: {root_doc_count}") + report_lines.append(f"- **Lines**: {root_doc_lines:,}") + else: + report_lines.append("*No root documentation files found*") + root_doc_lines = root_doc_lines if root_doc_files else 0 + root_doc_count = root_doc_count if root_doc_files else 0 + report_lines.append("") + # Grand total - report_lines.append("## 9. Grand Total") + report_lines.append("## 11. Grand Total") report_lines.append("") grand_total_lines = ( total_src_lines + @@ -244,9 +350,33 @@ def generate_report(repo_root): examples_data['lines'] + tests_data['lines'] + benchmarks_data['lines'] + - scripts_data['lines'] + scripts_data['lines'] + + total_docs_lines + + root_doc_lines ) grand_total_files = ( + total_src_files + + total_include_files + + tools_data['files'] + + examples_data['files'] + + tests_data['files'] + + benchmarks_data['files'] + + scripts_data['files'] + + total_docs_files + + root_doc_count + ) + + # Separate C++ and documentation totals + cpp_total_lines = ( + total_src_lines + + total_include_lines + + tools_data['lines'] + + examples_data['lines'] + + tests_data['lines'] + + benchmarks_data['lines'] + + scripts_data['lines'] + ) + cpp_total_files = ( total_src_files + total_include_files + tools_data['files'] + @@ -255,7 +385,11 @@ def generate_report(repo_root): benchmarks_data['files'] + scripts_data['files'] ) + doc_total_lines = total_docs_lines + root_doc_lines + doc_total_files = total_docs_files + root_doc_count + report_lines.append("### C++ Code") + report_lines.append("") report_lines.append("| Category | Files | Lines |") report_lines.append("|----------|-------|-------|") report_lines.append(f"| Source (`./src`) | {total_src_files} | {total_src_lines:,} |") @@ -265,16 +399,42 @@ def generate_report(repo_root): report_lines.append(f"| Tests | {tests_data['files']} | {tests_data['lines']:,} |") report_lines.append(f"| Benchmarks | {benchmarks_data['files']} | {benchmarks_data['lines']:,} |") report_lines.append(f"| Scripts | {scripts_data['files']} | {scripts_data['lines']:,} |") + report_lines.append(f"| **C++ TOTAL** | **{cpp_total_files}** | **{cpp_total_lines:,}** |") + report_lines.append("") + + report_lines.append("### Documentation") + report_lines.append("") + report_lines.append("| Category | Files | Lines |") + report_lines.append("|----------|-------|-------|") + report_lines.append(f"| Documentation (`./docs`) | {total_docs_files} | {total_docs_lines:,} |") + report_lines.append(f"| Root Documentation | {root_doc_count} | {root_doc_lines:,} |") + report_lines.append(f"| **DOCUMENTATION TOTAL** | **{doc_total_files}** | **{doc_total_lines:,}** |") + report_lines.append("") + + report_lines.append("### Overall Total") + report_lines.append("") + report_lines.append("| Type | Files | Lines |") + report_lines.append("|------|-------|-------|") + report_lines.append(f"| C++ Code | {cpp_total_files} | {cpp_total_lines:,} |") + report_lines.append(f"| Documentation | {doc_total_files} | {doc_total_lines:,} |") report_lines.append(f"| **GRAND TOTAL** | **{grand_total_files}** | **{grand_total_lines:,}** |") report_lines.append("") # Additional statistics - report_lines.append("## 10. Statistics") + report_lines.append("## 12. Statistics") report_lines.append("") + if cpp_total_files > 0: + avg_cpp_lines_per_file = cpp_total_lines / cpp_total_files + report_lines.append(f"- **Average lines per C++ file**: {avg_cpp_lines_per_file:.1f}") + if doc_total_files > 0: + avg_doc_lines_per_file = doc_total_lines / doc_total_files + report_lines.append(f"- **Average lines per documentation file**: {avg_doc_lines_per_file:.1f}") if grand_total_files > 0: avg_lines_per_file = grand_total_lines / grand_total_files - report_lines.append(f"- **Average lines per file**: {avg_lines_per_file:.1f}") + report_lines.append(f"- **Average lines per file (all)**: {avg_lines_per_file:.1f}") report_lines.append(f"- **Themis-specific files in include**: {themis_files} files, {themis_lines:,} lines") + if doc_total_lines > 0: + report_lines.append(f"- **Code-to-documentation ratio**: {cpp_total_lines / doc_total_lines:.2f}:1") report_lines.append("") report_lines.append("---") From 59e5626b7f4dac7ba3495f2387be1974087759b3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 17 Jan 2026 06:57:59 +0000 Subject: [PATCH 7/7] Fix code review issues: extract constants, fix variable scoping Co-authored-by: makr-code <150588092+makr-code@users.noreply.github.com> --- CODE_COUNT_REPORT_20260116.md | 14 +++++++------- count_thematic.py | 22 ++++++++++++---------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/CODE_COUNT_REPORT_20260116.md b/CODE_COUNT_REPORT_20260116.md index 6afe178d0..41d73e64c 100644 --- a/CODE_COUNT_REPORT_20260116.md +++ b/CODE_COUNT_REPORT_20260116.md @@ -1,5 +1,5 @@ # ThemisDB Code Count Report -## Date: 17.01.2026 06:56:07 +## Date: 17.01.2026 06:57:42 This report provides a thematic count of C++ code in ThemisDB. @@ -153,7 +153,7 @@ This report provides a thematic count of C++ code in ThemisDB. ## 10. Root Documentation - **Files**: 52 -- **Lines**: 22,487 +- **Lines**: 22,541 ## 11. Grand Total @@ -175,21 +175,21 @@ This report provides a thematic count of C++ code in ThemisDB. | Category | Files | Lines | |----------|-------|-------| | Documentation (`./docs`) | 1306 | 536,638 | -| Root Documentation | 52 | 22,487 | -| **DOCUMENTATION TOTAL** | **1358** | **559,125** | +| Root Documentation | 52 | 22,541 | +| **DOCUMENTATION TOTAL** | **1358** | **559,179** | ### Overall Total | Type | Files | Lines | |------|-------|-------| | C++ Code | 1303 | 415,636 | -| Documentation | 1358 | 559,125 | -| **GRAND TOTAL** | **2661** | **974,761** | +| Documentation | 1358 | 559,179 | +| **GRAND TOTAL** | **2661** | **974,815** | ## 12. Statistics - **Average lines per C++ file**: 319.0 -- **Average lines per documentation file**: 411.7 +- **Average lines per documentation file**: 411.8 - **Average lines per file (all)**: 366.3 - **Themis-specific files in include**: 6 files, 1,027 lines - **Code-to-documentation ratio**: 0.74:1 diff --git a/count_thematic.py b/count_thematic.py index 9487d7b24..802cd8de2 100755 --- a/count_thematic.py +++ b/count_thematic.py @@ -7,6 +7,10 @@ import os from datetime import datetime +# File extensions +CPP_EXTENSIONS = ['.cpp', '.h', '.hpp', '.cc', '.cxx'] +DOC_EXTENSIONS = ['.md', '.rst', '.txt', '.adoc'] + def count_lines_in_files(file_list): """Count total lines in a list of files using Python's built-in file reading.""" if not file_list: @@ -25,7 +29,6 @@ def count_lines_in_files(file_list): def find_cpp_files(directory): """Find all C++ files in a directory.""" - cpp_extensions = ['.cpp', '.h', '.hpp', '.cc', '.cxx'] cpp_files = [] if not os.path.exists(directory): @@ -33,14 +36,13 @@ def find_cpp_files(directory): for root, dirs, files in os.walk(directory): for file in files: - if any(file.endswith(ext) for ext in cpp_extensions): + if any(file.endswith(ext) for ext in CPP_EXTENSIONS): cpp_files.append(os.path.join(root, file)) return cpp_files def find_doc_files(directory): """Find all documentation files in a directory.""" - doc_extensions = ['.md', '.rst', '.txt', '.adoc'] doc_files = [] if not os.path.exists(directory): @@ -48,7 +50,7 @@ def find_doc_files(directory): for root, dirs, files in os.walk(directory): for file in files: - if any(file.endswith(ext) for ext in doc_extensions): + if any(file.endswith(ext) for ext in DOC_EXTENSIONS): doc_files.append(os.path.join(root, file)) return doc_files @@ -79,7 +81,7 @@ def count_by_category(base_dir): direct_files = [os.path.join(base_dir, f) for f in os.listdir(base_dir) if os.path.isfile(os.path.join(base_dir, f)) - and any(f.endswith(ext) for ext in ['.cpp', '.h', '.hpp', '.cc', '.cxx'])] + and any(f.endswith(ext) for ext in CPP_EXTENSIONS)] if direct_files: line_count = count_lines_in_files(direct_files) @@ -116,7 +118,7 @@ def count_themis_only(include_dir): direct_files = [os.path.join(themis_path, f) for f in os.listdir(themis_path) if os.path.isfile(os.path.join(themis_path, f)) - and any(f.endswith(ext) for ext in ['.cpp', '.h', '.hpp', '.cc', '.cxx'])] + and any(f.endswith(ext) for ext in CPP_EXTENSIONS)] if direct_files: line_count = count_lines_in_files(direct_files) @@ -173,7 +175,7 @@ def count_documentation_by_category(docs_dir): direct_files = [os.path.join(docs_dir, f) for f in os.listdir(docs_dir) if os.path.isfile(os.path.join(docs_dir, f)) - and any(f.endswith(ext) for ext in ['.md', '.rst', '.txt', '.adoc'])] + and any(f.endswith(ext) for ext in DOC_EXTENSIONS)] if direct_files: line_count = count_lines_in_files(direct_files) @@ -327,8 +329,10 @@ def generate_report(repo_root): root_doc_files = [os.path.join(repo_root, f) for f in os.listdir(repo_root) if os.path.isfile(os.path.join(repo_root, f)) - and any(f.endswith(ext) for ext in ['.md', '.rst', '.txt', '.adoc'])] + and any(f.endswith(ext) for ext in DOC_EXTENSIONS)] + root_doc_lines = 0 + root_doc_count = 0 if root_doc_files: root_doc_lines = count_lines_in_files(root_doc_files) root_doc_count = len(root_doc_files) @@ -336,8 +340,6 @@ def generate_report(repo_root): report_lines.append(f"- **Lines**: {root_doc_lines:,}") else: report_lines.append("*No root documentation files found*") - root_doc_lines = root_doc_lines if root_doc_files else 0 - root_doc_count = root_doc_count if root_doc_files else 0 report_lines.append("") # Grand total