Skip to content

Commit 89841b6

Browse files
authored
Add script to automatically generate API documentation and remove hard-coded RST files; fix API docs (#6755)
1 parent 13f9a73 commit 89841b6

File tree

72 files changed

+652
-777
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+652
-777
lines changed

python/packages/autogen-core/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,6 @@ docs/**/jupyter_execute
171171

172172
# Temporary files
173173
tmp_code_*.py
174+
175+
# Reference files
176+
docs/src/reference

python/packages/autogen-core/docs/src/conf.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pathlib import Path
99
import sys
1010
import os
11+
import subprocess
1112
# -- Project information -----------------------------------------------------
1213
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
1314

@@ -182,6 +183,35 @@
182183
}
183184

184185

186+
def generate_api_docs() -> None:
187+
"""Generate API documentation before building."""
188+
script_path = Path(__file__).parent / "generate_api_toc.py"
189+
if script_path.exists():
190+
print("🔄 Generating API documentation...")
191+
try:
192+
result = subprocess.run(
193+
[sys.executable, str(script_path)],
194+
cwd=script_path.parent,
195+
capture_output=True,
196+
text=True,
197+
check=True
198+
)
199+
print("✅ API documentation generated successfully")
200+
# Print the output for visibility
201+
if result.stdout:
202+
for line in result.stdout.strip().split('\n'):
203+
print(f" {line}")
204+
except subprocess.CalledProcessError as e:
205+
print(f"❌ Failed to generate API documentation: {e}")
206+
if e.stdout:
207+
print(f"stdout: {e.stdout}")
208+
if e.stderr:
209+
print(f"stderr: {e.stderr}")
210+
# Don't fail the build, just warn
211+
else:
212+
print(f"⚠️ API documentation generator not found at {script_path}")
213+
214+
185215
def setup_to_main(
186216
app: Sphinx, pagename: str, templatename: str, context, doctree
187217
) -> None:
@@ -211,6 +241,9 @@ def setup(app: Sphinx) -> Dict[str, Any]:
211241
Returns:
212242
the 2 parallel parameters set to ``True``.
213243
"""
244+
# Generate API documentation before building
245+
app.connect("builder-inited", lambda app: generate_api_docs())
246+
214247
app.connect("html-page-context", setup_to_main)
215248

216249
# Adding here so it is inline and not in a separate file.
Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to automatically generate the API reference table of contents for AutoGen.
4+
5+
This script scans all packages and their modules to generate the toctree entries
6+
for the API documentation index.md file.
7+
"""
8+
9+
import os
10+
from pathlib import Path
11+
from typing import List, Dict, Set
12+
import re
13+
14+
15+
# Constants for package filtering and organization
16+
DOCUMENTED_PACKAGES = ["autogen_core", "autogen_agentchat", "autogen_ext"]
17+
18+
PACKAGE_SECTIONS = {
19+
"autogen_agentchat": "AutoGen AgentChat",
20+
"autogen_core": "AutoGen Core",
21+
"autogen_ext": "AutoGen Extensions"
22+
}
23+
24+
# Exclusion patterns for submodules that are re-exported by parent modules
25+
EXCLUSION_PATTERNS = [
26+
# task_centric_memory re-exports from memory_controller and utils
27+
(r'^autogen_ext\.experimental\.task_centric_memory\.memory_controller$',
28+
'autogen_ext.experimental.task_centric_memory'),
29+
# utils package re-exports from utils.apprentice and other utils submodules
30+
(r'^autogen_ext\.experimental\.task_centric_memory\.utils\.apprentice$',
31+
'autogen_ext.experimental.task_centric_memory.utils'),
32+
(r'^autogen_ext\.experimental\.task_centric_memory\.utils\.chat_completion_client_recorder$',
33+
'autogen_ext.experimental.task_centric_memory.utils'),
34+
(r'^autogen_ext\.experimental\.task_centric_memory\.utils\.grader$',
35+
'autogen_ext.experimental.task_centric_memory.utils'),
36+
(r'^autogen_ext\.experimental\.task_centric_memory\.utils\.page_logger$',
37+
'autogen_ext.experimental.task_centric_memory.utils'),
38+
(r'^autogen_ext\.experimental\.task_centric_memory\.utils\.teachability$',
39+
'autogen_ext.experimental.task_centric_memory.utils'),
40+
]
41+
42+
43+
def is_private_module(module_parts: List[str]) -> bool:
44+
"""Check if any part of the module path indicates it's a private module."""
45+
return any(part.startswith('_') and part != '__init__' for part in module_parts)
46+
47+
48+
def find_python_packages() -> List[Path]:
49+
"""Find documented Python packages in the workspace."""
50+
packages_dir = Path(__file__).parent.parent.parent.parent.parent / "packages"
51+
python_packages = []
52+
53+
for package_dir in packages_dir.iterdir():
54+
if package_dir.is_dir():
55+
# Check if this package is in our documented packages list
56+
package_name = package_dir.name.replace("-", "_")
57+
if package_name in DOCUMENTED_PACKAGES:
58+
src_dir = package_dir / "src"
59+
if src_dir.exists():
60+
python_packages.append(src_dir)
61+
62+
return python_packages
63+
64+
65+
def get_module_hierarchy(package_root: Path) -> Dict[str, Set[str]]:
66+
"""Get the module hierarchy for a package, filtering only documented packages."""
67+
modules: Dict[str, Set[str]] = {}
68+
69+
for root, dirs, files in os.walk(package_root):
70+
# Skip __pycache__ and hidden directories
71+
dirs[:] = [d for d in dirs if not d.startswith('__pycache__') and not d.startswith('.')]
72+
73+
root_path = Path(root)
74+
75+
# Process Python files (excluding private modules)
76+
for file in files:
77+
if file.endswith('.py') and file != '__init__.py' and not file.startswith('_'):
78+
file_path = root_path / file
79+
module_path = file_path.relative_to(package_root)
80+
81+
# Convert file path to module name
82+
module_parts = list(module_path.parts[:-1]) + [module_path.stem]
83+
84+
if module_parts:
85+
# Skip if any part of the module path is private
86+
if is_private_module(module_parts):
87+
continue
88+
89+
module_name = '.'.join(module_parts)
90+
package_name = module_parts[0]
91+
92+
# Only include modules from documented packages
93+
if package_name in DOCUMENTED_PACKAGES:
94+
if package_name not in modules:
95+
modules[package_name] = set()
96+
97+
modules[package_name].add(module_name)
98+
99+
# Also check for directories with __init__.py (packages, excluding private)
100+
for dir_name in dirs:
101+
if not dir_name.startswith('_'): # Skip private directories
102+
dir_path = root_path / dir_name
103+
if (dir_path / '__init__.py').exists():
104+
module_path = dir_path.relative_to(package_root)
105+
module_parts = list(module_path.parts)
106+
107+
if module_parts:
108+
# Skip if any part of the module path is private
109+
if is_private_module(module_parts):
110+
continue
111+
112+
module_name = '.'.join(module_parts)
113+
package_name = module_parts[0]
114+
115+
# Only include modules from documented packages
116+
if package_name in DOCUMENTED_PACKAGES:
117+
if package_name not in modules:
118+
modules[package_name] = set()
119+
120+
modules[package_name].add(module_name)
121+
122+
return modules
123+
124+
125+
def should_exclude_submodule(module_name: str, all_modules: Set[str]) -> bool:
126+
"""Check if a submodule should be excluded to avoid duplicate documentation."""
127+
for pattern, parent_module in EXCLUSION_PATTERNS:
128+
if re.match(pattern, module_name) and parent_module in all_modules:
129+
return True
130+
131+
return False
132+
133+
134+
def clean_rst_files(reference_dir: Path) -> None:
135+
"""Clean existing RST files to ensure fresh generation."""
136+
python_ref_dir = reference_dir / "python"
137+
if python_ref_dir.exists():
138+
print("🧹 Cleaning existing .rst files...")
139+
rst_files = list(python_ref_dir.glob("*.rst"))
140+
for rst_file in rst_files:
141+
rst_file.unlink()
142+
print(f" Removed {len(rst_files)} existing .rst files")
143+
144+
145+
def generate_rst_files(package_roots: List[Path], reference_dir: Path) -> Set[str]:
146+
"""Generate .rst files for all modules found in the packages."""
147+
python_ref_dir = reference_dir / "python"
148+
python_ref_dir.mkdir(exist_ok=True, parents=True)
149+
150+
# Clean existing RST files first
151+
clean_rst_files(reference_dir)
152+
153+
generated_files = set()
154+
all_module_names = set()
155+
156+
# First pass: collect all module names
157+
for package_root in package_roots:
158+
modules = get_module_hierarchy(package_root)
159+
for package_name, module_set in modules.items():
160+
all_module_names.update(module_set)
161+
162+
# Second pass: generate RST files, excluding problematic submodules
163+
for package_root in package_roots:
164+
modules = get_module_hierarchy(package_root)
165+
166+
for package_name, module_set in modules.items():
167+
for module_name in module_set:
168+
# Skip modules that would cause duplicate documentation
169+
if should_exclude_submodule(module_name, all_module_names):
170+
print(f" Skipping {module_name} (re-exported by parent)")
171+
continue
172+
173+
# Use the proper RST filename pattern (keep dots for submodules)
174+
rst_filename = module_name + '.rst'
175+
rst_path = python_ref_dir / rst_filename
176+
177+
# Generate .rst content with proper title formatting
178+
# Title should use dots as separators, but escape underscores for RST
179+
title = module_name.replace('_', r'\_')
180+
underline = '=' * len(title) # Underline matches title length
181+
182+
rst_content = f"""{title}
183+
{underline}
184+
185+
.. automodule:: {module_name}
186+
:members:
187+
:undoc-members:
188+
:show-inheritance:
189+
:member-order: bysource
190+
"""
191+
192+
# Write the .rst file
193+
with open(rst_path, 'w') as f:
194+
f.write(rst_content)
195+
196+
generated_files.add(module_name)
197+
198+
return generated_files
199+
200+
201+
def generate_toctree_from_rst_files(reference_dir: Path) -> Dict[str, List[str]]:
202+
"""Generate toctree entries directly from existing .rst files."""
203+
# Initialize sections using constants
204+
toctree_sections: Dict[str, List[str]] = {section: [] for section in PACKAGE_SECTIONS.values()}
205+
206+
python_ref_dir = reference_dir / "python"
207+
if not python_ref_dir.exists():
208+
return toctree_sections
209+
210+
# Collect modules by package using constants
211+
modules_by_section: Dict[str, List[str]] = {section: [] for section in PACKAGE_SECTIONS.values()}
212+
213+
# Get all .rst files and organize them by package
214+
for rst_file in python_ref_dir.glob("*.rst"):
215+
module_name = rst_file.stem # filename without .rst extension
216+
217+
# Find which documented package this module belongs to
218+
for package_prefix, section_name in PACKAGE_SECTIONS.items():
219+
if module_name.startswith(package_prefix):
220+
modules_by_section[section_name].append(module_name)
221+
break
222+
223+
# Sort modules so parent modules come before child modules
224+
def sort_modules_hierarchically(modules):
225+
"""Sort modules so that parent modules come before child modules."""
226+
return sorted(modules, key=lambda x: (x.count('.'), x))
227+
228+
# Apply hierarchical sorting and convert to rst paths
229+
for section_name, modules in modules_by_section.items():
230+
toctree_sections[section_name] = [f"python/{m}" for m in sort_modules_hierarchically(modules)]
231+
232+
return toctree_sections
233+
234+
235+
def generate_index_content(toctree_sections: Dict[str, List[str]]) -> str:
236+
"""Generate the complete index.md content with automatic toctrees."""
237+
238+
content = """---
239+
myst:
240+
html_meta:
241+
"description lang=en": |
242+
AutoGen is a community-driven project. Learn how to get involved, contribute, and connect with the community.
243+
---
244+
245+
# API Reference
246+
247+
"""
248+
249+
for section_name, modules in toctree_sections.items():
250+
if modules: # Only add section if it has modules
251+
content += f"""```{{toctree}}
252+
:caption: {section_name}
253+
:maxdepth: 2
254+
255+
"""
256+
for module in modules:
257+
content += f"{module}\n"
258+
content += "```\n\n"
259+
260+
return content
261+
262+
263+
def main():
264+
"""Main function to generate the API documentation index."""
265+
script_dir = Path(__file__).parent
266+
reference_dir = script_dir / "reference"
267+
index_file = reference_dir / "index.md"
268+
269+
print("🔍 Scanning Python packages...")
270+
package_roots = find_python_packages()
271+
272+
all_modules = {}
273+
for package_root in package_roots:
274+
print(f" 📦 Scanning {package_root}")
275+
modules = get_module_hierarchy(package_root)
276+
all_modules.update(modules)
277+
278+
print("🏗️ Generating .rst files for all discovered modules...")
279+
generated_files = generate_rst_files(package_roots, reference_dir)
280+
print(f" Generated {len(generated_files)} .rst files")
281+
282+
print("📝 Generating toctree entries from .rst files...")
283+
toctree_sections = generate_toctree_from_rst_files(reference_dir)
284+
285+
for section, modules in toctree_sections.items():
286+
print(f" {section}: {len(modules)} modules")
287+
288+
print("✍️ Writing index.md...")
289+
content = generate_index_content(toctree_sections)
290+
291+
with open(index_file, 'w') as f:
292+
f.write(content)
293+
294+
print(f"✅ Generated API documentation index at {index_file}")
295+
print("\n📖 Summary:")
296+
total_modules = sum(len(modules) for modules in toctree_sections.values())
297+
print(f" Total modules documented: {total_modules}")
298+
299+
for section, modules in toctree_sections.items():
300+
if modules:
301+
print(f" {section}: {len(modules)} modules")
302+
303+
304+
if __name__ == "__main__":
305+
main()

0 commit comments

Comments
 (0)