Source code for ggfm.data.lm_generate_embs

import torch
import numpy as np


[docs]def generate_lm_embs(all_name, tokenizer, model, indxs, device): r""" Language model generates node embeddings for words。 Parameters ---------- all_name: list Words list. tokenizer: LM tokenizer. model: Language model. indxs: list Index positions of generated node embeddings. device: int Device. """ def get_word_embeddings(word, device): encoded_word = tokenizer.encode(word, add_special_tokens=False) tokens_tensor = torch.tensor([encoded_word]).to(device) with torch.no_grad(): output = model(tokens_tensor) embeddings = output[0][0].mean(dim=0) return embeddings.cpu().numpy() emb = get_word_embeddings("hello", device) emb = np.zeros((len(indxs), len(emb))) for i in range(len(indxs)): cur = indxs[i] word = all_name[cur] emb[i] = get_word_embeddings(word, device) return emb