From 77546e7fc83dcf84c9730f2ec980adf68cedc0ae Mon Sep 17 00:00:00 2001 From: DaKheera47 Date: Thu, 15 Jan 2026 23:13:49 +0000 Subject: [PATCH] linecount utility --- scripts/linecount.py | 237 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 scripts/linecount.py diff --git a/scripts/linecount.py b/scripts/linecount.py new file mode 100644 index 0000000..9545d66 --- /dev/null +++ b/scripts/linecount.py @@ -0,0 +1,237 @@ +import argparse +import os +from collections import defaultdict +from pathlib import Path + + +SKIP_DIRS_DEFAULT = { + ".git", + ".venv", + "venv", + "node_modules", + "dist", + "build", + "out", + ".pytest_cache", + "__pycache__", + ".mypy_cache", + ".ruff_cache", + ".next", + ".turbo", + ".idea", + ".vscode", + "data", +} + +SKIP_EXTS_DEFAULT = { + ".png", + ".jpg", + ".jpeg", + ".gif", + ".bmp", + ".ico", + ".pdf", + ".zip", + ".gz", + ".tar", + ".tgz", + ".7z", + ".exe", + ".dll", + ".so", + ".dylib", + ".bin", +} + +SPEC_BY_EXT = { + ".py": {"line": ["#"], "block": [("'''", "'''"), ('"""', '"""')]}, + ".js": {"line": ["//"], "block": [("/*", "*/")]}, + ".jsx": {"line": ["//"], "block": [("/*", "*/")]}, + ".ts": {"line": ["//"], "block": [("/*", "*/")]}, + ".tsx": {"line": ["//"], "block": [("/*", "*/")]}, + ".cjs": {"line": ["//"], "block": [("/*", "*/")]}, + ".css": {"line": [], "block": [("/*", "*/")]}, + ".html": {"line": [], "block": [("")]}, + ".htm": {"line": [], "block": [("")]}, + ".md": {"line": [], "block": [("")]}, + ".yml": {"line": ["#"], "block": []}, + ".yaml": {"line": ["#"], "block": []}, + ".env": {"line": ["#"], "block": []}, + ".example": {"line": ["#"], "block": []}, + ".sh": {"line": ["#"], "block": []}, + ".bash": {"line": ["#"], "block": []}, + ".zsh": {"line": ["#"], "block": []}, + ".ps1": {"line": ["#"], "block": [("<#", "#>")]}, + ".json": {"line": [], "block": []}, + ".txt": {"line": [], "block": []}, +} + +SPEC_BY_NAME = { + "dockerfile": {"line": ["#"], "block": []}, + "makefile": {"line": ["#"], "block": []}, + ".gitignore": {"line": ["#"], "block": []}, + ".dockerignore": {"line": ["#"], "block": []}, + ".env": {"line": ["#"], "block": []}, + ".env.example": {"line": ["#"], "block": []}, +} + + +def is_binary(path: str) -> bool: + try: + with open(path, "rb") as f: + return b"\x00" in f.read(2048) + except Exception: + return True + + +def get_spec(path: Path): + name = path.name.lower() + if name in SPEC_BY_NAME: + return SPEC_BY_NAME[name] + ext = path.suffix.lower() + return SPEC_BY_EXT.get(ext, {"line": [], "block": []}) + + +def count_file(path: Path): + spec = get_spec(path) + line_comments = spec["line"] + block_comments = spec["block"] + + code = comment = blank = 0 + in_block_end = None + + with open(path, "r", encoding="utf-8", errors="replace") as f: + for line in f: + stripped = line.strip() + if not stripped: + blank += 1 + continue + + if in_block_end: + end = in_block_end + if end in stripped: + _, after_end = stripped.split(end, 1) + if after_end.strip(): + code += 1 + else: + comment += 1 + in_block_end = None + else: + comment += 1 + continue + + if line_comments and any(stripped.startswith(prefix) for prefix in line_comments): + comment += 1 + continue + + started = False + for start, end in block_comments: + idx = stripped.find(start) + if idx != -1: + before = stripped[:idx].strip() + after = stripped[idx + len(start) :] + if before: + code += 1 + else: + if end in after: + after_end = after.split(end, 1)[1].strip() + if after_end: + code += 1 + else: + comment += 1 + else: + comment += 1 + in_block_end = end + started = True + break + if started: + continue + + code += 1 + + return code, comment, blank + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Count code/comment/blank lines by extension." + ) + parser.add_argument( + "root", + nargs="?", + default=".", + help="Root directory to scan (default: current directory).", + ) + parser.add_argument( + "--include-dir", + action="append", + default=[], + help="Directory name to include even if normally skipped (repeatable).", + ) + parser.add_argument( + "--exclude-dir", + action="append", + default=[], + help="Directory name to exclude (repeatable).", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + root = Path(args.root).resolve() + + skip_dirs = set(SKIP_DIRS_DEFAULT) + skip_dirs.difference_update(args.include_dir) + skip_dirs.update(args.exclude_dir) + + ext_stats = defaultdict(lambda: {"files": 0, "code": 0, "comment": 0, "blank": 0}) + + for dirpath, dirnames, filenames in os.walk(root): + # prune walk by directory name + dirnames[:] = [d for d in dirnames if d not in skip_dirs] + + for name in filenames: + path = Path(dirpath) / name + + ext = path.suffix.lower() + if ext in SKIP_EXTS_DEFAULT: + continue + if is_binary(str(path)): + continue + + code, comment, blank = count_file(path) + + key = ext if ext else "" + ext_stats[key]["files"] += 1 + ext_stats[key]["code"] += code + ext_stats[key]["comment"] += comment + ext_stats[key]["blank"] += blank + + all_files = sum(v["files"] for v in ext_stats.values()) + all_code = sum(v["code"] for v in ext_stats.values()) + all_comment = sum(v["comment"] for v in ext_stats.values()) + all_blank = sum(v["blank"] for v in ext_stats.values()) + all_lines = all_code + all_comment + all_blank + + print(f"Total files: {all_files}") + print(f"Total lines: {all_lines}") + print(f"Code lines: {all_code}") + print(f"Comment lines: {all_comment}") + print(f"Blank lines: {all_blank}") + print("") + print("Lines by extension (code/comment/blank/total, files):") + + def sort_key(item): + stats = item[1] + return stats["code"] + stats["comment"] + stats["blank"] + + for ext, stats in sorted(ext_stats.items(), key=sort_key, reverse=True): + total = stats["code"] + stats["comment"] + stats["blank"] + print( + f"{ext}\t{stats['code']}\t{stats['comment']}\t{stats['blank']}\t{total}\tfiles:{stats['files']}" + ) + + +if __name__ == "__main__": + main()