Skip to content

Commit f7a56e6

Browse files
authored
docs: fix typos in documentation and code comments (#1898)
Follow-up to PRs #1583 / #1611 Fixes a few typos found by running [`codespell` ](https://github.com/codespell-project/codespell)on the repository. Also see related PR #1899 which adds a `codespell` pre-commit hook.
1 parent db82f8b commit f7a56e6

File tree

10 files changed

+13
-13
lines changed

10 files changed

+13
-13
lines changed

docs/torchft.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ In a real-world scenario, `torchft_lighthouse` would likely be on a different ma
5757
### Using semi-synchronous training (Example 2)
5858

5959
TorchFT provides algorithms that do not require per-step synchronization and
60-
the replica groups can sychronize weights every N steps.
60+
the replica groups can synchronize weights every N steps.
6161

6262
**Note on Batch Sizes**: For DiLoCo, there's an important distinction in batch size terminology:
6363

torchtitan/components/metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def _build_metric_logger(
319319
logger_container.add_logger(tensorboard_logger)
320320

321321
if logger_container.number_of_loggers == 0:
322-
logger.debug("No loggers enabled, returning an emtpy LoggerContainer")
322+
logger.debug("No loggers enabled, returning an empty LoggerContainer")
323323
return logger_container
324324

325325

torchtitan/components/quantization/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
class QuantizationConverter(ModelConverter):
2727
"""
28-
Base class for quantization converters, which implements generic validation re-usable across all quantization converters.
28+
Base class for quantization converters, which implements generic validation reusable across all quantization converters.
2929
"""
3030

3131
enabled: bool = False

torchtitan/config/manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def _validate_config(self) -> None:
224224
def register_tyro_rules(registry: tyro.constructors.ConstructorRegistry) -> None:
225225
@registry.primitive_rule
226226
def list_str_rule(type_info: tyro.constructors.PrimitiveTypeInfo):
227-
"""Support for comma separate string parsing"""
227+
"""Support for comma separated string parsing"""
228228
if type_info.type != list[str]:
229229
return None
230230
return tyro.constructors.PrimitiveConstructorSpec(

torchtitan/experiments/simple_fsdp/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ SimpleFSDP relies on compiler backend to perform optimizations (i.e., bucketing
5353
- "aot_eager_autobucketing": perform autobucketing at aten fx-level, and perform code execution with aot_eager backend.
5454

5555

56-
users can specify the pass (e.g., "aot_eager_autobucketing") via addtional configs:
56+
users can specify the pass (e.g., "aot_eager_autobucketing") via additional configs:
5757

5858
```bash
5959
--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config --compile.model_backend_override "aot_eager_autobucketing"

torchtitan/experiments/vlm/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ This results in a very simple and general interface to train modern VLM with int
3434
### Dataloader
3535
This approach requires the dataloader to handle the following aspect:
3636
- [x] Interleave the correct precise numbers of image tokens in the inputs token based on encoder's patch size and input images' size
37-
- [x] Convert images/videos to 1D sequence of patchs:
37+
- [x] Convert images/videos to 1D sequence of patches:
3838
- `rearrange(pixels, 'n (t pt) (h ph) (w pw) c -> n (t h w) (pt p pw c)', pt=temporal_ps, ph=patch_size, pw=patch_size)`
3939
- Pad all image patches sequence to a fixed length and return `pixel_values.shape == [N, L, D]`
40-
- [x] Return a `grid_thw.shape == [N, L, 3]` to keep track of the location indicies of each patches in the images. Padding image can be tracked in the same tensors with values `-1`.
40+
- [x] Return a `grid_thw.shape == [N, L, 3]` to keep track of the location indices of each patches in the images. Padding image can be tracked in the same tensors with values `-1`.
4141
- [x] LLM Sample / Document Packing.
4242
- [x] Captioning dataset: CC12M
4343
- [x] Interleaved dataset: Obelics

torchtitan/experiments/vlm/job_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class Data:
2525
"""
2626
packing_buffer_size: int = 0
2727
""" Set to a value >0 to enable sample packing.
28-
This control the buffer uses to store training samples avaliable for packing.
28+
This control the buffer uses to store training samples available for packing.
2929
"""
3030

3131

torchtitan/models/attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,6 @@ def create_attention_mask(*args, **kwargs):
182182
"""Create an attention mask using compiled create_block_mask.
183183
184184
This function is cached to avoid recreating BlockMasks for the same
185-
argumens.
185+
arguments.
186186
"""
187187
return _compiled_create_block_mask(*args, **kwargs)

torchtitan/models/qwen3/model/state_dict_adapter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def from_hf(self, hf_state_dict: dict[str, Any]) -> dict[str, Any]:
141141
layer_num,
142142
value.device_mesh,
143143
)
144-
else: # keep this path to be compatibile with offline conversion
144+
else: # keep this path to be compatible with offline conversion
145145
stacked_value = self._concatenate_expert_weights(
146146
expert_weights_by_layer,
147147
titan_abstract_key,

torchtitan/models/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def _calculate_strided_shard_shard_indices(
4848
dim_size_to_split: int,
4949
) -> tuple[int, int]:
5050
"""
51-
Given a [StridedShard(dim=i), Shard(dim=i)] placement, caculate the start index
51+
Given a [StridedShard(dim=i), Shard(dim=i)] placement, calculate the start index
5252
and end index on dim-i for GPU rank (strided_shard_dim_degree, shard_dim_rank)
5353
5454
GPU Layout (strided_shard_rank, shard_rank):
@@ -68,12 +68,12 @@ def _calculate_strided_shard_shard_indices(
6868
2 │ GPU(2, 1) │
6969
└─────────────────┘
7070
71-
Calulate the start_index from inner dimesion (Shard(dim=i)) to outer demension (StridedShard(dim=i)).
71+
Calculate the start_index from inner dimension (Shard(dim=i)) to outer dimension (StridedShard(dim=i)).
7272
"""
7373

7474
block_size = dim_size_to_split // (strided_shard_dim_degree * shard_dim_degree)
7575

76-
# Error out if can not evenly divded
76+
# Error out if can not evenly divided
7777
if (
7878
block_size * (strided_shard_dim_degree * shard_dim_degree)
7979
!= dim_size_to_split

0 commit comments

Comments
 (0)