Skip to content

⌨️⚙️ Update torch type checking #1538

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Minor improvements
  • Loading branch information
cthoyt committed May 12, 2025
commit f90fd935612dbc2fd47c85f7bfd7308e3a8dd6ef
2 changes: 1 addition & 1 deletion src/pykeen/contrib/lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def _dataloader(self, triples_factory: CoreTriplesFactory, shuffle: bool = False

#: A resolver for PyTorch Lightning training modules
lit_module_resolver: ClassResolver[LitModule] = ClassResolver.from_subclasses(
base=LitModule,
base=LitModule, # type:ignore[type-abstract]
default=SLCWALitModule,
# note: since this file is executed via __main__, its module name is replaced by __name__
# hence, the two classes' fully qualified names start with "_" and are considered private
Expand Down
2 changes: 1 addition & 1 deletion src/pykeen/experiments/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
type[Model],
Optional[type[Model]], # noqa:UP007
Union[str, Callable[[FloatTensor], FloatTensor]], # noqa:UP007
Hint[nn.Module],
Hint[nn.Module], # type:ignore
}
_SKIP_EXTRANEOUS = {
"predict_with_sigmoid",
Expand Down
2 changes: 1 addition & 1 deletion src/pykeen/inverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,6 @@ def is_inverse(self, ids: LongTensor) -> BoolTensor: # noqa: D102

#: A resolver for relation inverter protocols
relation_inverter_resolver: Resolver[RelationInverter] = Resolver.from_subclasses(
RelationInverter,
RelationInverter, # type:ignore[type-abstract]
default=DefaultRelationInverter,
)
10 changes: 5 additions & 5 deletions src/pykeen/losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -1748,15 +1748,15 @@ def forward(self, x: FloatTensor, target: FloatTensor, weight: FloatTensor | Non

#: A resolver for loss modules
loss_resolver: ClassResolver[Loss] = ClassResolver.from_subclasses(
Loss,
Loss, # type:ignore[type-abstract]
default=MarginRankingLoss,
skip={
PairwiseLoss,
PointwiseLoss,
SetwiseLoss,
PairwiseLoss, # type:ignore[type-abstract]
PointwiseLoss, # type:ignore[type-abstract]
SetwiseLoss, # type:ignore[type-abstract]
DeltaPointwiseLoss,
MarginPairwiseLoss,
AdversarialLoss,
AdversarialLoss, # type:ignore[type-abstract]
},
)
for _name, _cls in loss_resolver.lookup_dict.items():
Expand Down
6 changes: 4 additions & 2 deletions src/pykeen/nn/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -3440,8 +3440,8 @@ def __init__(
num_heads: int = 8,
dropout: float = 0.1,
dim_feedforward: int = 2048,
position_initializer: HintOrType[Initializer] = xavier_normal_,
):
position_initializer: HintOrType[Initializer] = None,
) -> None:
"""
Initialize the module.

Expand Down Expand Up @@ -3470,6 +3470,8 @@ def __init__(
),
num_layers=num_layers,
)
if position_initializer is None:
position_initializer = xavier_normal_
self.position_embeddings = nn.Parameter(position_initializer(torch.empty(2, input_dim)))
self.final = nn.Linear(input_dim, input_dim, bias=True)

Expand Down
7 changes: 4 additions & 3 deletions src/pykeen/nn/node_piece/anchor_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import logging
from abc import ABC, abstractmethod
from collections.abc import Iterable, Sequence
from typing import Any

import numpy
import torch
Expand Down Expand Up @@ -149,7 +150,7 @@ class PageRankAnchorSelection(SingleSelection):
def __init__(
self,
num_anchors: int = 32,
**kwargs,
**kwargs: Any,
) -> None:
"""Initialize the selection strategy.

Expand Down Expand Up @@ -201,7 +202,7 @@ def __init__(
selections: Sequence[HintOrType[AnchorSelection]],
ratios: None | float | Sequence[float] = None,
selections_kwargs: OneOrSequence[OptionalKwargs] = None,
**kwargs,
**kwargs: Any,
) -> None:
"""Initialize the selection strategy.

Expand Down Expand Up @@ -257,7 +258,7 @@ def __call__(

#: A resolver for NodePiece anchor selectors
anchor_selection_resolver: ClassResolver[AnchorSelection] = ClassResolver.from_subclasses(
base=AnchorSelection,
base=AnchorSelection, # type:ignore[type-abstract]
default=DegreeAnchorSelection,
skip={SingleSelection},
)
2 changes: 1 addition & 1 deletion src/pykeen/nn/node_piece/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,6 @@ def __call__(self, path: pathlib.Path) -> tuple[Mapping[int, Collection[int]], i

#: A resolver for NodePiece precomputed tokenizer loaders
precomputed_tokenizer_loader_resolver: ClassResolver[PrecomputedTokenizerLoader] = ClassResolver.from_subclasses(
base=PrecomputedTokenizerLoader,
base=PrecomputedTokenizerLoader, # type:ignore[type-abstract]
default=GalkinPrecomputedTokenizerLoader,
)
20 changes: 11 additions & 9 deletions src/pykeen/nn/node_piece/representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import pathlib
from collections.abc import Callable, Iterable
from collections.abc import Callable, Iterable, Sequence
from typing import NamedTuple

import torch
Expand Down Expand Up @@ -257,6 +257,8 @@ class NodePieceRepresentation(CombinedRepresentation):
github: https://github.com/migalkin/NodePiece
"""

base: Sequence[TokenizationRepresentation]

@update_docstring_with_resolver_keys(
ResolverKey("token_representations", resolver="pykeen.nn.representation_resolver"),
ResolverKey("tokenizers", resolver="pykeen.nn.node_piece.tokenizer_resolver"),
Expand All @@ -275,7 +277,7 @@ def __init__(
aggregation_kwargs: OptionalKwargs = None,
max_id: int | None = None,
**kwargs,
):
) -> None:
"""
Initialize the representation.

Expand Down Expand Up @@ -322,12 +324,12 @@ def __init__(
# inverse triples are created afterwards implicitly
mapped_triples = mapped_triples[mapped_triples[:, 1] < triples_factory.real_num_relations]

token_representations, token_representations_kwargs, num_tokens = broadcast_upgrade_to_sequences(
token_representations_, token_representations_kwargs_, num_tokens = broadcast_upgrade_to_sequences(
token_representations, token_representations_kwargs, num_tokens
)

# tokenize
token_representations = [
base = [
TokenizationRepresentation.from_tokenizer(
tokenizer=tokenizer_inst,
num_tokens=num_tokens_,
Expand All @@ -339,8 +341,8 @@ def __init__(
)
for tokenizer_inst, token_representation, token_representation_kwargs, num_tokens_ in zip(
tokenizer_resolver.make_many(queries=tokenizers, kwargs=tokenizers_kwargs),
token_representations,
token_representations_kwargs,
token_representations_,
token_representations_kwargs_,
num_tokens,
strict=False,
)
Expand All @@ -349,18 +351,18 @@ def __init__(
# Create an MLP for string aggregation
if aggregation == "mlp":
# note: the token representations' shape includes the number of tokens as leading dim
embedding_dim = token_representations[0].shape[1]
embedding_dim = base[0].shape[1]
aggregation = ConcatMLP(
input_dim=embedding_dim * sum(num_tokens),
output_dim=embedding_dim,
)

super().__init__(
max_id=triples_factory.num_entities,
base=token_representations,
base=base,
combination=ConcatAggregationCombination,
combination_kwargs=dict(
aggregation=aggregation, aggregation_kwargs=aggregation_kwargs, dim=-len(token_representations[0].shape)
aggregation=aggregation, aggregation_kwargs=aggregation_kwargs, dim=-len(base[0].shape)
),
**kwargs,
)
Expand Down
11 changes: 6 additions & 5 deletions src/pykeen/nn/node_piece/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from abc import abstractmethod
from collections import defaultdict
from collections.abc import Collection, Mapping
from typing import Any

import more_itertools
import numpy
Expand Down Expand Up @@ -126,15 +127,15 @@ def _call(
num_tokens: int,
num_entities: int,
) -> tuple[int, LongTensor]:
edge_index = edge_index.numpy()
edge_index_np = edge_index.numpy()
# select anchors
logger.info(f"Selecting anchors according to {self.anchor_selection}")
anchors = self.anchor_selection(edge_index=edge_index)
anchors = self.anchor_selection(edge_index=edge_index_np)
if len(numpy.unique(anchors)) < len(anchors):
logger.warning(f"Only {len(numpy.unique(anchors))} out of {len(anchors)} anchors are unique")
# find closest anchors
logger.info(f"Searching closest anchors with {self.searcher}")
tokens = self.searcher(edge_index=edge_index, anchors=anchors, k=num_tokens, num_entities=num_entities)
tokens = self.searcher(edge_index=edge_index_np, anchors=anchors, k=num_tokens, num_entities=num_entities)
num_empty = (tokens < 0).all(axis=1).sum()
if num_empty > 0:
logger.warning(
Expand Down Expand Up @@ -165,7 +166,7 @@ class MetisAnchorTokenizer(AnchorTokenizer):
http://glaros.dtc.umn.edu/gkhome/metis/metis/overview
"""

def __init__(self, num_partitions: int = 2, device: DeviceHint = None, **kwargs):
def __init__(self, num_partitions: int = 2, device: DeviceHint = None, **kwargs: Any) -> None:
"""Initialize the tokenizer.

:param num_partitions: the number of partitions obtained through Metis.
Expand Down Expand Up @@ -280,7 +281,7 @@ def __init__(
pool: Mapping[int, Collection[int]] | None = None,
randomize_selection: bool = False,
loader: HintOrType[PrecomputedTokenizerLoader] = None,
):
) -> None:
r"""Initialize the tokenizer.

.. note::
Expand Down
7 changes: 4 additions & 3 deletions src/pykeen/triples/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ def tensor_to_df(
)

# convert to numpy
tensor = tensor.cpu().numpy()
data = dict(zip(["head_id", "relation_id", "tail_id"], tensor.T, strict=False))
np_tensor = tensor.cpu().numpy()
data = dict(zip(["head_id", "relation_id", "tail_id"], np_tensor.T, strict=False))

# Additional columns
for key, values in kwargs.items():
Expand Down Expand Up @@ -164,7 +164,8 @@ def compute_compressed_adjacency_list(

adj_list[i] = compressed_adj_list[offsets[i]:offsets[i+1]]
"""
num_entities = num_entities or mapped_triples[:, [0, 2]].max().item() + 1
if num_entities is None:
num_entities = mapped_triples[:, [0, 2]].max().item() + 1
Comment on lines +167 to +168
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be replaced by pykeen.triples.utils.get_num_ids

num_triples = mapped_triples.shape[0]
adj_lists: list[list[tuple[int, float]]] = [[] for _ in range(num_entities)]
for i, (s, _, o) in enumerate(mapped_triples):
Expand Down
10 changes: 10 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,22 @@ deps =
types-setuptools
types-tabulate
types-PyYAML
click
more-click
pystow
click-default-group
docdata
types-decorator
requests
numpy
# start adding main deps one at a time
# to incrementally cover lots of issues
extras =
templating
plotting
wordcloud
tests
skip_install = true
commands = mypy --ignore-missing-imports src/ docs/source/examples
description = Run the mypy tool to check static typing on the project.

Expand Down