From dced2b665271f5d3975ce9f3cc44c60b91cdd240 Mon Sep 17 00:00:00 2001 From: bruceEeZhao Date: Tue, 28 Dec 2021 22:45:09 +0800 Subject: [PATCH 1/5] fix encoding error when setup; and Parameter error due to the version of word2vec --- ge/models/deepwalk.py | 4 ++-- ge/models/node2vec.py | 4 ++-- ge/models/struc2vec.py | 4 ++-- setup.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ge/models/deepwalk.py b/ge/models/deepwalk.py index d0fadc7..31ab48f 100644 --- a/ge/models/deepwalk.py +++ b/ge/models/deepwalk.py @@ -38,12 +38,12 @@ def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs): kwargs["sentences"] = self.sentences kwargs["min_count"] = kwargs.get("min_count", 0) - kwargs["size"] = embed_size + kwargs["vector_size"] = embed_size kwargs["sg"] = 1 # skip gram kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax kwargs["workers"] = workers kwargs["window"] = window_size - kwargs["iter"] = iter + kwargs["epochs"] = iter print("Learning embedding vectors...") model = Word2Vec(**kwargs) diff --git a/ge/models/node2vec.py b/ge/models/node2vec.py index 16f86cb..0c2102a 100644 --- a/ge/models/node2vec.py +++ b/ge/models/node2vec.py @@ -43,12 +43,12 @@ def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs): kwargs["sentences"] = self.sentences kwargs["min_count"] = kwargs.get("min_count", 0) - kwargs["size"] = embed_size + kwargs["vector_size"] = embed_size kwargs["sg"] = 1 kwargs["hs"] = 0 # node2vec not use Hierarchical Softmax kwargs["workers"] = workers kwargs["window"] = window_size - kwargs["iter"] = iter + kwargs["epochs"] = iter print("Learning embedding vectors...") model = Word2Vec(**kwargs) diff --git a/ge/models/struc2vec.py b/ge/models/struc2vec.py index 4040562..e637928 100644 --- a/ge/models/struc2vec.py +++ b/ge/models/struc2vec.py @@ -112,8 +112,8 @@ def train(self, embed_size=128, window_size=5, workers=3, iter=5): sentences = self.sentences print("Learning representation...") - model = Word2Vec(sentences, size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers, - iter=iter) + model = Word2Vec(sentences, vector_size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers, + epochs=iter) print("Learning representation done!") self.w2v_model = model diff --git a/setup.py b/setup.py index 38a4235..1843939 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import setuptools -with open("README.md", "r") as fh: +with open("README.md", "r", encoding='utf-8') as fh: long_description = fh.read() From c3195075411b48fad4854ccac2cb456fc1864952 Mon Sep 17 00:00:00 2001 From: bruceEeZhao Date: Thu, 6 Jan 2022 17:21:32 +0800 Subject: [PATCH 2/5] add bfs --- ge/models/__init__.py | 3 +- ge/models/alibaba-eges.py | 67 +++++++++++++++++++++++++++++++++++ ge/models/bfswalk.py | 64 ++++++++++++++++++++++++++++++++++ ge/models/deepwalk.py | 12 +++---- ge/models/node2vec.py | 10 +++--- ge/walker.py | 73 ++++++++++++++++++++++++++++++++------- 6 files changed, 205 insertions(+), 24 deletions(-) create mode 100644 ge/models/alibaba-eges.py create mode 100644 ge/models/bfswalk.py diff --git a/ge/models/__init__.py b/ge/models/__init__.py index d2375e9..cd49858 100644 --- a/ge/models/__init__.py +++ b/ge/models/__init__.py @@ -3,6 +3,7 @@ from .line import LINE from .sdne import SDNE from .struc2vec import Struc2Vec +from .bfswalk import BFSWalk -__all__ = ["DeepWalk", "Node2Vec", "LINE", "SDNE", "Struc2Vec"] +__all__ = ["DeepWalk", "Node2Vec", "LINE", "SDNE", "Struc2Vec", "BFSWalk"] diff --git a/ge/models/alibaba-eges.py b/ge/models/alibaba-eges.py new file mode 100644 index 0000000..b4ec89c --- /dev/null +++ b/ge/models/alibaba-eges.py @@ -0,0 +1,67 @@ +# -*- coding:utf-8 -*- + +""" + + + +Author: + + Chengliang Zhao, bruce.e.zhao@gmail.com + + + +Reference: + + [1] Jizhe Wang, Pipei Huang, Huan Zhao, Zhibo Zhang, Binqiang Zhao, and Dik Lun Lee. 2018. Billion-scale Commodity Embedding for E-commerce Recommendation in Alibaba. In Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining (KDD '18). Association for Computing Machinery, New York, NY, USA, 839–848. DOI:https://doi.org/10.1145/3219819.3219869 + + +""" +from ..walker import RandomWalker +from gensim.models import Word2Vec +import pandas as pd +import numpy as np + + +class EGES: + def __init__(self, graph, walk_length, num_walks, workers=1): + + self.graph = graph + self.w2v_model = None + self._embeddings = {} + + self.walker = RandomWalker( + graph, p=1, q=1, ) + self.sentences = self.walker.simulate_walks( + num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1) + + def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs): + + kwargs["sentences"] = self.sentences + kwargs["min_count"] = kwargs.get("min_count", 0) + kwargs["vector_size"] = embed_size + kwargs["sg"] = 1 # skip gram + kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax + kwargs["workers"] = workers + kwargs["window"] = window_size + kwargs["epochs"] = iter + + # print("Learning embedding vectors...") + # model = Word2Vec(**kwargs) + # print("Learning embedding vectors done!") + + # self.w2v_model = model + # return model + + + + + def get_embeddings(self,): + if self.w2v_model is None: + print("model not train") + return {} + + self._embeddings = {} + for word in self.graph.nodes(): + self._embeddings[word] = self.w2v_model.wv[word] + + return self._embeddings diff --git a/ge/models/bfswalk.py b/ge/models/bfswalk.py new file mode 100644 index 0000000..74e16d0 --- /dev/null +++ b/ge/models/bfswalk.py @@ -0,0 +1,64 @@ +# -*- coding:utf-8 -*- + +""" + + + +Author: + + Weichen Shen,wcshen1994@163.com + + + +Reference: + + [1] Perozzi B, Al-Rfou R, Skiena S. Deepwalk: Online learning of social representations[C]//Proceedings of the 20th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2014: 701-710.(http://www.perozzi.net/publications/14_kdd_deepwalk.pdf) + + + +""" +from ..walker import RandomWalker +from gensim.models import Word2Vec +import pandas as pd + + +class BFSWalk: + def __init__(self, graph, walk_length, num_walks, workers=1, weight = False): + + self.graph = graph + self.w2v_model = None + self._embeddings = {} + + self.walker = RandomWalker( + graph, p=1, q=1, ) + self.sentences = self.walker.simulate_walks("bfs", + num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight = weight) + + def train(self, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs=1, **kwargs): + + kwargs["sentences"] = self.sentences + kwargs["min_count"] = kwargs.get("min_count", 0) + kwargs["vector_size"] = embed_size + kwargs["sg"] = sg # skip gram + kwargs["hs"] = hs # deepwalk use Hierarchical Softmax + kwargs["workers"] = workers + kwargs["window"] = window_size + kwargs["epochs"] = iter + + print("Learning embedding vectors...") + model = Word2Vec(**kwargs) + print("Learning embedding vectors done!") + + self.w2v_model = model + return model + + def get_embeddings(self,): + if self.w2v_model is None: + print("model not train") + return {} + + self._embeddings = {} + for word in self.graph.nodes(): + self._embeddings[word] = self.w2v_model.wv[word] + + return self._embeddings diff --git a/ge/models/deepwalk.py b/ge/models/deepwalk.py index 31ab48f..3a5dc15 100644 --- a/ge/models/deepwalk.py +++ b/ge/models/deepwalk.py @@ -23,7 +23,7 @@ class DeepWalk: - def __init__(self, graph, walk_length, num_walks, workers=1): + def __init__(self, graph, walk_length, num_walks, workers=1, weight = False): self.graph = graph self.w2v_model = None @@ -31,16 +31,16 @@ def __init__(self, graph, walk_length, num_walks, workers=1): self.walker = RandomWalker( graph, p=1, q=1, ) - self.sentences = self.walker.simulate_walks( - num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1) + self.sentences = self.walker.simulate_walks("deep", + num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight = weight) - def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs): + def train(self, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs=1, **kwargs): kwargs["sentences"] = self.sentences kwargs["min_count"] = kwargs.get("min_count", 0) kwargs["vector_size"] = embed_size - kwargs["sg"] = 1 # skip gram - kwargs["hs"] = 1 # deepwalk use Hierarchical Softmax + kwargs["sg"] = sg # skip gram + kwargs["hs"] = hs # deepwalk use Hierarchical Softmax kwargs["workers"] = workers kwargs["window"] = window_size kwargs["epochs"] = iter diff --git a/ge/models/node2vec.py b/ge/models/node2vec.py index 0c2102a..d5b4a72 100644 --- a/ge/models/node2vec.py +++ b/ge/models/node2vec.py @@ -36,16 +36,16 @@ def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_r print("Preprocess transition probs...") self.walker.preprocess_transition_probs() - self.sentences = self.walker.simulate_walks( - num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1) + self.sentences = self.walker.simulate_walks("node", + num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight=False) - def train(self, embed_size=128, window_size=5, workers=3, iter=5, **kwargs): + def train(self, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs = 1, **kwargs): kwargs["sentences"] = self.sentences kwargs["min_count"] = kwargs.get("min_count", 0) kwargs["vector_size"] = embed_size - kwargs["sg"] = 1 - kwargs["hs"] = 0 # node2vec not use Hierarchical Softmax + kwargs["sg"] = sg + kwargs["hs"] = hs # node2vec not use Hierarchical Softmax kwargs["workers"] = workers kwargs["window"] = window_size kwargs["epochs"] = iter diff --git a/ge/walker.py b/ge/walker.py index 7266585..ba96ddb 100644 --- a/ge/walker.py +++ b/ge/walker.py @@ -37,6 +37,47 @@ def deepwalk_walk(self, walk_length, start_node): break return walk + def bfs_walk(self, walk_length, start_node): + + walk = [start_node] + + while len(walk) < walk_length: + cur = walk[-1] + cur_nbrs = list(self.G.neighbors(cur)) + if len(cur_nbrs) > 0: + l = len(cur_nbrs) + ranlist = random.sample(range(0,l),l) + for i in ranlist: + walk.append(cur_nbrs[i]) + #walk.append(random.choice(cur_nbrs)) + else: + break + return walk + + def deepwalk_walk_weighted(self, walk_length, start_node): + + walk = [start_node] + + while len(walk) < walk_length: + cur = walk[-1] + cur_nbrs = list(self.G.neighbors(cur)) + if len(cur_nbrs) > 0: + p = self.chose_node_p(cur, cur_nbrs) + walk.append(random.choice(cur_nbrs, p = p)) + else: + break + return walk + + # 根据边的权重,计算每个edge被选择的概率 + def chose_node_p(self, cur, nbrs): + # 计算每一个位置被选择的概率,返回概率 + weight = [] + for i in nbrs: + weight.append(self.G[cur][i]["weight"]) + + total = sum(weight) + return [i/total for i in weight] + def node2vec_walk(self, walk_length, start_node): G = self.G @@ -116,34 +157,42 @@ def rejection_sample(inv_p, inv_q, nbrs_num): break return walk - def simulate_walks(self, num_walks, walk_length, workers=1, verbose=0): - + def simulate_walks(self, method, num_walks, walk_length, workers=1, verbose=0, weight = False): G = self.G nodes = list(G.nodes()) results = Parallel(n_jobs=workers, verbose=verbose, )( - delayed(self._simulate_walks)(nodes, num, walk_length) for num in + delayed(self._simulate_walks)(method, nodes, num, walk_length, weight) for num in partition_num(num_walks, workers)) walks = list(itertools.chain(*results)) return walks - def _simulate_walks(self, nodes, num_walks, walk_length,): + def _simulate_walks(self, method, nodes, num_walks, walk_length, weight = False): walks = [] for _ in range(num_walks): random.shuffle(nodes) for v in nodes: - if self.p == 1 and self.q == 1: - walks.append(self.deepwalk_walk( - walk_length=walk_length, start_node=v)) - elif self.use_rejection_sampling: - walks.append(self.node2vec_walk2( - walk_length=walk_length, start_node=v)) + if method == "deep": + if weight: + walks.append(self.deepwalk_walk_weighted( + walk_length=walk_length, start_node=v)) + else: + walks.append(self.deepwalk_walk( + walk_length=walk_length, start_node=v)) + elif method == "bfs": + walks.append(self.bfs_walk(walk_length=walk_length, start_node=v)) + elif method == "node": + if self.use_rejection_sampling: + walks.append(self.node2vec_walk2( + walk_length=walk_length, start_node=v)) + else: + walks.append(self.node2vec_walk( + walk_length=walk_length, start_node=v)) else: - walks.append(self.node2vec_walk( - walk_length=walk_length, start_node=v)) + pass return walks def get_alias_edge(self, t, v): From dff30c9c7b32f61f5d62bfe53d822573915d027e Mon Sep 17 00:00:00 2001 From: bruceEeZhao Date: Thu, 6 Jan 2022 17:34:08 +0800 Subject: [PATCH 3/5] fix bug in bfs --- ge/walker.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/ge/walker.py b/ge/walker.py index ba96ddb..2540691 100644 --- a/ge/walker.py +++ b/ge/walker.py @@ -41,17 +41,15 @@ def bfs_walk(self, walk_length, start_node): walk = [start_node] - while len(walk) < walk_length: - cur = walk[-1] - cur_nbrs = list(self.G.neighbors(cur)) - if len(cur_nbrs) > 0: - l = len(cur_nbrs) - ranlist = random.sample(range(0,l),l) - for i in ranlist: - walk.append(cur_nbrs[i]) - #walk.append(random.choice(cur_nbrs)) - else: - break + # while len(walk) < walk_length: + cur = walk[-1] + cur_nbrs = list(self.G.neighbors(cur)) + if len(cur_nbrs) > 0: + l = len(cur_nbrs) + ranlist = random.sample(range(0,l),l) + for i in ranlist: + walk.append(cur_nbrs[i]) + return walk def deepwalk_walk_weighted(self, walk_length, start_node): From d433d8afea68851a62942212a6a7c301e773dc08 Mon Sep 17 00:00:00 2001 From: bruceEeZhao Date: Thu, 17 Mar 2022 19:09:38 +0800 Subject: [PATCH 4/5] fix --- ge/walker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ge/walker.py b/ge/walker.py index 2540691..d8778c0 100644 --- a/ge/walker.py +++ b/ge/walker.py @@ -159,7 +159,9 @@ def simulate_walks(self, method, num_walks, walk_length, workers=1, verbose=0, w G = self.G nodes = list(G.nodes()) - + print(len(nodes)) + nodes = [i for i in nodes if G.out_degree(i)] + print(len(nodes)) results = Parallel(n_jobs=workers, verbose=verbose, )( delayed(self._simulate_walks)(method, nodes, num, walk_length, weight) for num in partition_num(num_walks, workers)) From 5fee03232b8bce23d0e388f520988c356ce182e5 Mon Sep 17 00:00:00 2001 From: bruceEeZhao Date: Fri, 18 Mar 2022 17:26:28 +0800 Subject: [PATCH 5/5] change --- ge/models/bfswalk.py | 15 +++++++++------ ge/models/deepwalk.py | 15 +++++++++------ ge/models/line.py | 2 +- ge/models/node2vec.py | 15 +++++++++------ ge/walker.py | 35 ++++++++++++++++++++--------------- 5 files changed, 48 insertions(+), 34 deletions(-) diff --git a/ge/models/bfswalk.py b/ge/models/bfswalk.py index 74e16d0..72155a6 100644 --- a/ge/models/bfswalk.py +++ b/ge/models/bfswalk.py @@ -18,12 +18,12 @@ """ from ..walker import RandomWalker -from gensim.models import Word2Vec +from gensim.models import Word2Vec, word2vec import pandas as pd class BFSWalk: - def __init__(self, graph, walk_length, num_walks, workers=1, weight = False): + def __init__(self, graph, outlier, walk_length, num_walks, workers=1, weight = False): self.graph = graph self.w2v_model = None @@ -31,13 +31,13 @@ def __init__(self, graph, walk_length, num_walks, workers=1, weight = False): self.walker = RandomWalker( graph, p=1, q=1, ) - self.sentences = self.walker.simulate_walks("bfs", + self.sentences = self.walker.simulate_walks("bfs", outlier, num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight = weight) - def train(self, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs=1, **kwargs): + def train(self, walkfile, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs=1, **kwargs): - kwargs["sentences"] = self.sentences - kwargs["min_count"] = kwargs.get("min_count", 0) + kwargs["sentences"] = word2vec.Text8Corpus(walkfile) + kwargs["min_count"] = kwargs.get("min_count", 1) kwargs["vector_size"] = embed_size kwargs["sg"] = sg # skip gram kwargs["hs"] = hs # deepwalk use Hierarchical Softmax @@ -62,3 +62,6 @@ def get_embeddings(self,): self._embeddings[word] = self.w2v_model.wv[word] return self._embeddings + + def get_sentences(self): + return self.sentences \ No newline at end of file diff --git a/ge/models/deepwalk.py b/ge/models/deepwalk.py index 3a5dc15..0d3386a 100644 --- a/ge/models/deepwalk.py +++ b/ge/models/deepwalk.py @@ -18,12 +18,12 @@ """ from ..walker import RandomWalker -from gensim.models import Word2Vec +from gensim.models import Word2Vec, word2vec import pandas as pd class DeepWalk: - def __init__(self, graph, walk_length, num_walks, workers=1, weight = False): + def __init__(self, graph, outlier, walk_length, num_walks, workers=1, weight = False): self.graph = graph self.w2v_model = None @@ -31,13 +31,13 @@ def __init__(self, graph, walk_length, num_walks, workers=1, weight = False): self.walker = RandomWalker( graph, p=1, q=1, ) - self.sentences = self.walker.simulate_walks("deep", + self.sentences = self.walker.simulate_walks("deep", outlier, num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight = weight) - def train(self, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs=1, **kwargs): + def train(self, walkfile, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs=1, **kwargs): - kwargs["sentences"] = self.sentences - kwargs["min_count"] = kwargs.get("min_count", 0) + kwargs["sentences"] = word2vec.Text8Corpus(walkfile) + kwargs["min_count"] = kwargs.get("min_count", 1) kwargs["vector_size"] = embed_size kwargs["sg"] = sg # skip gram kwargs["hs"] = hs # deepwalk use Hierarchical Softmax @@ -62,3 +62,6 @@ def get_embeddings(self,): self._embeddings[word] = self.w2v_model.wv[word] return self._embeddings + + def get_sentences(self): + return self.sentences \ No newline at end of file diff --git a/ge/models/line.py b/ge/models/line.py index 04c5073..96de8b0 100644 --- a/ge/models/line.py +++ b/ge/models/line.py @@ -92,7 +92,7 @@ def __init__(self, graph, embedding_size=8, negative_ratio=5, order='second',): self.node_size = graph.number_of_nodes() self.edge_size = graph.number_of_edges() self.samples_per_epoch = self.edge_size*(1+negative_ratio) - + self._gen_sampling_table() self.reset_model() diff --git a/ge/models/node2vec.py b/ge/models/node2vec.py index d5b4a72..1e0c6a8 100644 --- a/ge/models/node2vec.py +++ b/ge/models/node2vec.py @@ -18,7 +18,7 @@ """ -from gensim.models import Word2Vec +from gensim.models import Word2Vec, word2vec import pandas as pd from ..walker import RandomWalker @@ -26,7 +26,7 @@ class Node2Vec: - def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0): + def __init__(self, graph, outlier, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_rejection_sampling=0): self.graph = graph self._embeddings = {} @@ -36,13 +36,13 @@ def __init__(self, graph, walk_length, num_walks, p=1.0, q=1.0, workers=1, use_r print("Preprocess transition probs...") self.walker.preprocess_transition_probs() - self.sentences = self.walker.simulate_walks("node", + self.sentences = self.walker.simulate_walks("node", outlier, num_walks=num_walks, walk_length=walk_length, workers=workers, verbose=1, weight=False) - def train(self, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs = 1, **kwargs): + def train(self, walkfile, embed_size=128, window_size=5, workers=3, iter=5, sg = 1, hs = 1, **kwargs): - kwargs["sentences"] = self.sentences - kwargs["min_count"] = kwargs.get("min_count", 0) + kwargs["sentences"] = word2vec.Text8Corpus(walkfile) + kwargs["min_count"] = kwargs.get("min_count", 1) kwargs["vector_size"] = embed_size kwargs["sg"] = sg kwargs["hs"] = hs # node2vec not use Hierarchical Softmax @@ -68,3 +68,6 @@ def get_embeddings(self,): self._embeddings[word] = self.w2v_model.wv[word] return self._embeddings + + def get_sentences(self): + return self.sentences \ No newline at end of file diff --git a/ge/walker.py b/ge/walker.py index d8778c0..e3408c5 100644 --- a/ge/walker.py +++ b/ge/walker.py @@ -35,7 +35,8 @@ def deepwalk_walk(self, walk_length, start_node): walk.append(random.choice(cur_nbrs)) else: break - return walk + + return " ".join(walk) def bfs_walk(self, walk_length, start_node): @@ -50,7 +51,7 @@ def bfs_walk(self, walk_length, start_node): for i in ranlist: walk.append(cur_nbrs[i]) - return walk + return " ".join(walk) def deepwalk_walk_weighted(self, walk_length, start_node): @@ -64,7 +65,7 @@ def deepwalk_walk_weighted(self, walk_length, start_node): walk.append(random.choice(cur_nbrs, p = p)) else: break - return walk + return " ".join(walk) # 根据边的权重,计算每个edge被选择的概率 def chose_node_p(self, cur, nbrs): @@ -100,7 +101,7 @@ def node2vec_walk(self, walk_length, start_node): else: break - return walk + return " ".join(walk) def node2vec_walk2(self, walk_length, start_node): """ @@ -153,28 +154,32 @@ def rejection_sample(inv_p, inv_q, nbrs_num): walk.append(next_node) else: break - return walk + return " ".join(walk) - def simulate_walks(self, method, num_walks, walk_length, workers=1, verbose=0, weight = False): + def simulate_walks(self, method, outlier, num_walks, walk_length, workers=1, verbose=0, weight = False): G = self.G nodes = list(G.nodes()) print(len(nodes)) nodes = [i for i in nodes if G.out_degree(i)] + nodes.extend(outlier) + random.shuffle(nodes) print(len(nodes)) - results = Parallel(n_jobs=workers, verbose=verbose, )( - delayed(self._simulate_walks)(method, nodes, num, walk_length, weight) for num in - partition_num(num_walks, workers)) + # results = Parallel(n_jobs=workers, verbose=verbose, )( + # delayed(self._simulate_walks)(method, nodes, num, walk_length, weight) for num in + # partition_num(num_walks, workers)) - walks = list(itertools.chain(*results)) + # walks = list(itertools.chain(*results)) + walks = [] + self._simulate_walks(walks, method, nodes, num_walks, walk_length, weight) return walks - def _simulate_walks(self, method, nodes, num_walks, walk_length, weight = False): - walks = [] - for _ in range(num_walks): - random.shuffle(nodes) - for v in nodes: + def _simulate_walks(self, walks, method, nodes, num_walks, walk_length, weight = False): + # walks = [] + for v in nodes: + for _ in range(num_walks): + # random.shuffle(nodes) if method == "deep": if weight: walks.append(self.deepwalk_walk_weighted(