I am working with knowledge graphs in Pykeen. I made a variation of the negative sampler class provided by Pykeen. However, when I try to call my own class in the Pykeen pipeline, it results in an error saying that a keyword-only argument is missing. The difference with the given class and my class is that I want to add a specific triple to the class so that I can use it in the corrupt_batch function.

This is my own class:

class MyNegativeSampler(NegativeSampler):

def __init__(
    self,
    *,
    triple: torch.Tensor,
    corruption_scheme: Optional[Collection[Target]] = None,
    **kwargs,
) -> None:
    """Initialize the basic negative sampler with the given entities.

    :param corruption_scheme:
        What sides ('h', 'r', 't') should be corrupted. Defaults to head and tail ('h', 't').
    :param kwargs:
        Additional keyword based arguments passed to :class:`pykeen.sampling.NegativeSampler`.
    """
    
    
    super().__init__(**kwargs)
    self.corruption_scheme = corruption_scheme or (LABEL_HEAD, LABEL_TAIL)
    # Set the indices
    self._corruption_indices = [TARGET_TO_INDEX[side] for side in self.corruption_scheme]
    self.triple=triple
    
    
    

def corrupt_batch(self, positive_batch: torch.LongTensor) -> torch.LongTensor:  # noqa: D102
    batch_shape = positive_batch.shape[:-1]

    # clone positive batch for corruption (.repeat_interleave creates a copy)
    negative_batch = positive_batch.view(-1, 3).repeat_interleave(self.num_negs_per_pos, dim=0)

    # Bind the total number of negatives to sample in this batch
    total_num_negatives = negative_batch.shape[0]

    # Equally corrupt all sides
    split_idx = int(math.ceil(total_num_negatives / len(self._corruption_indices)))

    # Do not detach, as no gradients should flow into the indices.
    for index, start in zip(self._corruption_indices, range(0, total_num_negatives, split_idx)):
        stop = min(start + split_idx, total_num_negatives)
        random_replacement_(
            batch=negative_batch,
            index=index,
            selection=slice(start, stop),
            size=stop - start,
            max_index=self.num_relations if index == 1 else self.num_entities,
        )

    
    
    negative_batch_array = negative_batch.view(*batch_shape, self.num_negs_per_pos, 3).numpy()
    first_half = negative_batch_array[:int((len(negative_batch_array)/2))]
    second_half = negative_batch_array[int((len(negative_batch_array)/2)):]
    
    second_half_with_triple = []
    
    for i in range(int((len(negative_batch_array)/2))):
        second_half_with_triple.append([self.triple] )
    
    second_half_with_triple_array = np.array(second_half_with_triple)
    first_half_array = np.array(first_half)
    
    test_list1 = np.ndarray.tolist(first_half_array)
    test_list2 = np.ndarray.tolist(second_half_with_triple_array)
    
    negative_batch_with_triple_list = test_list1 + test_list2
    
    negative_batch_with_triple_array = np.array(negative_batch_with_triple_list)
    
    pt_tensor_from_list = torch.Tensor(negative_batch_with_triple_array)
    typecst = pt_tensor_from_list.type(torch.int64)
    
    #return negative_batch.view(*batch_shape, self.num_negs_per_pos, 3)
    return  typecst

Running the following code works:

triple=([8, 46, 7])
sampler = MyNegativeSampler(triple=triple, mapped_triples=mapped_triples)

print(sampler.corrupt_batch(mapped_triples))

However, when entering the sampler into the pykeen pipeline, it does not work anymore.

pipeline_result_false = pipeline(
dataset= dataset,
model= TransE,
loss= 'MarginRankingLoss',
loss_kwargs = dict(
reduction="mean"),
training_loop='sLCWA',
negative_sampler=sampler,
evaluator='RankBasedEvaluator',
training_kwargs=dict(num_epochs=128),
random_seed = 3757357109,
)

This is the error that shows:

'''
Traceback (most recent call last):
File "c:\Users\Tommy Lohn\OneDrive\Documenten\year 3\Bachelor AI Project\coding\negative sampling test.py", line 305, in <module>
pipeline_result_false = pipeline(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\pipeline\api.py", line 1237, in pipeline
losses = training_loop_instance.train(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\training\training_loop.py", line 371, in train
result = self._train(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\training\training_loop.py", line 516, in _train
sub_batch_size, slice_size = self.sub_batch_and_slice(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\training\training_loop.py", line 932, in sub_batch_and_slice
sub_batch_size, finished_search, supports_sub_batching = self._sub_batch_size_search(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\training\training_loop.py", line 1007, in _sub_batch_size_search
self._train(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\training\training_loop.py", line 576, in _train
train_data_loader = self._create_training_data_loader(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\training\slcwa.py", line 58, in _create_training_data_loader
dataset=triples_factory.create_slcwa_instances(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\triples\triples_factory.py", line 503, in create_slcwa_instances
return cls(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\pykeen\triples\instances.py", line 196, in __init__
self.negative_sampler = negative_sampler_resolver.make(
File "E:\AnacondaPython\envs\Thesis\lib\site-packages\class_resolver\api.py", line 207, in make
raise KeywordArgumentError(cls, e.args[0]) from None
class_resolver.api.KeywordArgumentError: MyNegativeSampler: __init__() missing 1 required keyword-only argument: 'triple'
'''

I think that it has something to do with the superclass, which I will link: https://github.com/pykeen/pykeen/blob/master/src/pykeen/sampling/negative_sampler.py

🔴 No definitive solution yet