|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Script to automatically generate the API reference table of contents for AutoGen. |
| 4 | +
|
| 5 | +This script scans all packages and their modules to generate the toctree entries |
| 6 | +for the API documentation index.md file. |
| 7 | +""" |
| 8 | + |
| 9 | +import os |
| 10 | +from pathlib import Path |
| 11 | +from typing import List, Dict, Set |
| 12 | +import re |
| 13 | + |
| 14 | + |
| 15 | +# Constants for package filtering and organization |
| 16 | +DOCUMENTED_PACKAGES = ["autogen_core", "autogen_agentchat", "autogen_ext"] |
| 17 | + |
| 18 | +PACKAGE_SECTIONS = { |
| 19 | + "autogen_agentchat": "AutoGen AgentChat", |
| 20 | + "autogen_core": "AutoGen Core", |
| 21 | + "autogen_ext": "AutoGen Extensions" |
| 22 | +} |
| 23 | + |
| 24 | +# Exclusion patterns for submodules that are re-exported by parent modules |
| 25 | +EXCLUSION_PATTERNS = [ |
| 26 | + # task_centric_memory re-exports from memory_controller and utils |
| 27 | + (r'^autogen_ext\.experimental\.task_centric_memory\.memory_controller$', |
| 28 | + 'autogen_ext.experimental.task_centric_memory'), |
| 29 | + # utils package re-exports from utils.apprentice and other utils submodules |
| 30 | + (r'^autogen_ext\.experimental\.task_centric_memory\.utils\.apprentice$', |
| 31 | + 'autogen_ext.experimental.task_centric_memory.utils'), |
| 32 | + (r'^autogen_ext\.experimental\.task_centric_memory\.utils\.chat_completion_client_recorder$', |
| 33 | + 'autogen_ext.experimental.task_centric_memory.utils'), |
| 34 | + (r'^autogen_ext\.experimental\.task_centric_memory\.utils\.grader$', |
| 35 | + 'autogen_ext.experimental.task_centric_memory.utils'), |
| 36 | + (r'^autogen_ext\.experimental\.task_centric_memory\.utils\.page_logger$', |
| 37 | + 'autogen_ext.experimental.task_centric_memory.utils'), |
| 38 | + (r'^autogen_ext\.experimental\.task_centric_memory\.utils\.teachability$', |
| 39 | + 'autogen_ext.experimental.task_centric_memory.utils'), |
| 40 | +] |
| 41 | + |
| 42 | + |
| 43 | +def is_private_module(module_parts: List[str]) -> bool: |
| 44 | + """Check if any part of the module path indicates it's a private module.""" |
| 45 | + return any(part.startswith('_') and part != '__init__' for part in module_parts) |
| 46 | + |
| 47 | + |
| 48 | +def find_python_packages() -> List[Path]: |
| 49 | + """Find documented Python packages in the workspace.""" |
| 50 | + packages_dir = Path(__file__).parent.parent.parent.parent.parent / "packages" |
| 51 | + python_packages = [] |
| 52 | + |
| 53 | + for package_dir in packages_dir.iterdir(): |
| 54 | + if package_dir.is_dir(): |
| 55 | + # Check if this package is in our documented packages list |
| 56 | + package_name = package_dir.name.replace("-", "_") |
| 57 | + if package_name in DOCUMENTED_PACKAGES: |
| 58 | + src_dir = package_dir / "src" |
| 59 | + if src_dir.exists(): |
| 60 | + python_packages.append(src_dir) |
| 61 | + |
| 62 | + return python_packages |
| 63 | + |
| 64 | + |
| 65 | +def get_module_hierarchy(package_root: Path) -> Dict[str, Set[str]]: |
| 66 | + """Get the module hierarchy for a package, filtering only documented packages.""" |
| 67 | + modules: Dict[str, Set[str]] = {} |
| 68 | + |
| 69 | + for root, dirs, files in os.walk(package_root): |
| 70 | + # Skip __pycache__ and hidden directories |
| 71 | + dirs[:] = [d for d in dirs if not d.startswith('__pycache__') and not d.startswith('.')] |
| 72 | + |
| 73 | + root_path = Path(root) |
| 74 | + |
| 75 | + # Process Python files (excluding private modules) |
| 76 | + for file in files: |
| 77 | + if file.endswith('.py') and file != '__init__.py' and not file.startswith('_'): |
| 78 | + file_path = root_path / file |
| 79 | + module_path = file_path.relative_to(package_root) |
| 80 | + |
| 81 | + # Convert file path to module name |
| 82 | + module_parts = list(module_path.parts[:-1]) + [module_path.stem] |
| 83 | + |
| 84 | + if module_parts: |
| 85 | + # Skip if any part of the module path is private |
| 86 | + if is_private_module(module_parts): |
| 87 | + continue |
| 88 | + |
| 89 | + module_name = '.'.join(module_parts) |
| 90 | + package_name = module_parts[0] |
| 91 | + |
| 92 | + # Only include modules from documented packages |
| 93 | + if package_name in DOCUMENTED_PACKAGES: |
| 94 | + if package_name not in modules: |
| 95 | + modules[package_name] = set() |
| 96 | + |
| 97 | + modules[package_name].add(module_name) |
| 98 | + |
| 99 | + # Also check for directories with __init__.py (packages, excluding private) |
| 100 | + for dir_name in dirs: |
| 101 | + if not dir_name.startswith('_'): # Skip private directories |
| 102 | + dir_path = root_path / dir_name |
| 103 | + if (dir_path / '__init__.py').exists(): |
| 104 | + module_path = dir_path.relative_to(package_root) |
| 105 | + module_parts = list(module_path.parts) |
| 106 | + |
| 107 | + if module_parts: |
| 108 | + # Skip if any part of the module path is private |
| 109 | + if is_private_module(module_parts): |
| 110 | + continue |
| 111 | + |
| 112 | + module_name = '.'.join(module_parts) |
| 113 | + package_name = module_parts[0] |
| 114 | + |
| 115 | + # Only include modules from documented packages |
| 116 | + if package_name in DOCUMENTED_PACKAGES: |
| 117 | + if package_name not in modules: |
| 118 | + modules[package_name] = set() |
| 119 | + |
| 120 | + modules[package_name].add(module_name) |
| 121 | + |
| 122 | + return modules |
| 123 | + |
| 124 | + |
| 125 | +def should_exclude_submodule(module_name: str, all_modules: Set[str]) -> bool: |
| 126 | + """Check if a submodule should be excluded to avoid duplicate documentation.""" |
| 127 | + for pattern, parent_module in EXCLUSION_PATTERNS: |
| 128 | + if re.match(pattern, module_name) and parent_module in all_modules: |
| 129 | + return True |
| 130 | + |
| 131 | + return False |
| 132 | + |
| 133 | + |
| 134 | +def clean_rst_files(reference_dir: Path) -> None: |
| 135 | + """Clean existing RST files to ensure fresh generation.""" |
| 136 | + python_ref_dir = reference_dir / "python" |
| 137 | + if python_ref_dir.exists(): |
| 138 | + print("🧹 Cleaning existing .rst files...") |
| 139 | + rst_files = list(python_ref_dir.glob("*.rst")) |
| 140 | + for rst_file in rst_files: |
| 141 | + rst_file.unlink() |
| 142 | + print(f" Removed {len(rst_files)} existing .rst files") |
| 143 | + |
| 144 | + |
| 145 | +def generate_rst_files(package_roots: List[Path], reference_dir: Path) -> Set[str]: |
| 146 | + """Generate .rst files for all modules found in the packages.""" |
| 147 | + python_ref_dir = reference_dir / "python" |
| 148 | + python_ref_dir.mkdir(exist_ok=True, parents=True) |
| 149 | + |
| 150 | + # Clean existing RST files first |
| 151 | + clean_rst_files(reference_dir) |
| 152 | + |
| 153 | + generated_files = set() |
| 154 | + all_module_names = set() |
| 155 | + |
| 156 | + # First pass: collect all module names |
| 157 | + for package_root in package_roots: |
| 158 | + modules = get_module_hierarchy(package_root) |
| 159 | + for package_name, module_set in modules.items(): |
| 160 | + all_module_names.update(module_set) |
| 161 | + |
| 162 | + # Second pass: generate RST files, excluding problematic submodules |
| 163 | + for package_root in package_roots: |
| 164 | + modules = get_module_hierarchy(package_root) |
| 165 | + |
| 166 | + for package_name, module_set in modules.items(): |
| 167 | + for module_name in module_set: |
| 168 | + # Skip modules that would cause duplicate documentation |
| 169 | + if should_exclude_submodule(module_name, all_module_names): |
| 170 | + print(f" Skipping {module_name} (re-exported by parent)") |
| 171 | + continue |
| 172 | + |
| 173 | + # Use the proper RST filename pattern (keep dots for submodules) |
| 174 | + rst_filename = module_name + '.rst' |
| 175 | + rst_path = python_ref_dir / rst_filename |
| 176 | + |
| 177 | + # Generate .rst content with proper title formatting |
| 178 | + # Title should use dots as separators, but escape underscores for RST |
| 179 | + title = module_name.replace('_', r'\_') |
| 180 | + underline = '=' * len(title) # Underline matches title length |
| 181 | + |
| 182 | + rst_content = f"""{title} |
| 183 | +{underline} |
| 184 | +
|
| 185 | +.. automodule:: {module_name} |
| 186 | + :members: |
| 187 | + :undoc-members: |
| 188 | + :show-inheritance: |
| 189 | + :member-order: bysource |
| 190 | +""" |
| 191 | + |
| 192 | + # Write the .rst file |
| 193 | + with open(rst_path, 'w') as f: |
| 194 | + f.write(rst_content) |
| 195 | + |
| 196 | + generated_files.add(module_name) |
| 197 | + |
| 198 | + return generated_files |
| 199 | + |
| 200 | + |
| 201 | +def generate_toctree_from_rst_files(reference_dir: Path) -> Dict[str, List[str]]: |
| 202 | + """Generate toctree entries directly from existing .rst files.""" |
| 203 | + # Initialize sections using constants |
| 204 | + toctree_sections: Dict[str, List[str]] = {section: [] for section in PACKAGE_SECTIONS.values()} |
| 205 | + |
| 206 | + python_ref_dir = reference_dir / "python" |
| 207 | + if not python_ref_dir.exists(): |
| 208 | + return toctree_sections |
| 209 | + |
| 210 | + # Collect modules by package using constants |
| 211 | + modules_by_section: Dict[str, List[str]] = {section: [] for section in PACKAGE_SECTIONS.values()} |
| 212 | + |
| 213 | + # Get all .rst files and organize them by package |
| 214 | + for rst_file in python_ref_dir.glob("*.rst"): |
| 215 | + module_name = rst_file.stem # filename without .rst extension |
| 216 | + |
| 217 | + # Find which documented package this module belongs to |
| 218 | + for package_prefix, section_name in PACKAGE_SECTIONS.items(): |
| 219 | + if module_name.startswith(package_prefix): |
| 220 | + modules_by_section[section_name].append(module_name) |
| 221 | + break |
| 222 | + |
| 223 | + # Sort modules so parent modules come before child modules |
| 224 | + def sort_modules_hierarchically(modules): |
| 225 | + """Sort modules so that parent modules come before child modules.""" |
| 226 | + return sorted(modules, key=lambda x: (x.count('.'), x)) |
| 227 | + |
| 228 | + # Apply hierarchical sorting and convert to rst paths |
| 229 | + for section_name, modules in modules_by_section.items(): |
| 230 | + toctree_sections[section_name] = [f"python/{m}" for m in sort_modules_hierarchically(modules)] |
| 231 | + |
| 232 | + return toctree_sections |
| 233 | + |
| 234 | + |
| 235 | +def generate_index_content(toctree_sections: Dict[str, List[str]]) -> str: |
| 236 | + """Generate the complete index.md content with automatic toctrees.""" |
| 237 | + |
| 238 | + content = """--- |
| 239 | +myst: |
| 240 | + html_meta: |
| 241 | + "description lang=en": | |
| 242 | + AutoGen is a community-driven project. Learn how to get involved, contribute, and connect with the community. |
| 243 | +--- |
| 244 | +
|
| 245 | +# API Reference |
| 246 | +
|
| 247 | +""" |
| 248 | + |
| 249 | + for section_name, modules in toctree_sections.items(): |
| 250 | + if modules: # Only add section if it has modules |
| 251 | + content += f"""```{{toctree}} |
| 252 | +:caption: {section_name} |
| 253 | +:maxdepth: 2 |
| 254 | +
|
| 255 | +""" |
| 256 | + for module in modules: |
| 257 | + content += f"{module}\n" |
| 258 | + content += "```\n\n" |
| 259 | + |
| 260 | + return content |
| 261 | + |
| 262 | + |
| 263 | +def main(): |
| 264 | + """Main function to generate the API documentation index.""" |
| 265 | + script_dir = Path(__file__).parent |
| 266 | + reference_dir = script_dir / "reference" |
| 267 | + index_file = reference_dir / "index.md" |
| 268 | + |
| 269 | + print("🔍 Scanning Python packages...") |
| 270 | + package_roots = find_python_packages() |
| 271 | + |
| 272 | + all_modules = {} |
| 273 | + for package_root in package_roots: |
| 274 | + print(f" 📦 Scanning {package_root}") |
| 275 | + modules = get_module_hierarchy(package_root) |
| 276 | + all_modules.update(modules) |
| 277 | + |
| 278 | + print("🏗️ Generating .rst files for all discovered modules...") |
| 279 | + generated_files = generate_rst_files(package_roots, reference_dir) |
| 280 | + print(f" Generated {len(generated_files)} .rst files") |
| 281 | + |
| 282 | + print("📝 Generating toctree entries from .rst files...") |
| 283 | + toctree_sections = generate_toctree_from_rst_files(reference_dir) |
| 284 | + |
| 285 | + for section, modules in toctree_sections.items(): |
| 286 | + print(f" {section}: {len(modules)} modules") |
| 287 | + |
| 288 | + print("✍️ Writing index.md...") |
| 289 | + content = generate_index_content(toctree_sections) |
| 290 | + |
| 291 | + with open(index_file, 'w') as f: |
| 292 | + f.write(content) |
| 293 | + |
| 294 | + print(f"✅ Generated API documentation index at {index_file}") |
| 295 | + print("\n📖 Summary:") |
| 296 | + total_modules = sum(len(modules) for modules in toctree_sections.values()) |
| 297 | + print(f" Total modules documented: {total_modules}") |
| 298 | + |
| 299 | + for section, modules in toctree_sections.items(): |
| 300 | + if modules: |
| 301 | + print(f" {section}: {len(modules)} modules") |
| 302 | + |
| 303 | + |
| 304 | +if __name__ == "__main__": |
| 305 | + main() |
0 commit comments