Skip to content

Commit

Permalink
preliminary work for open-source
Browse files Browse the repository at this point in the history
  • Loading branch information
shengzeang committed Apr 26, 2022
1 parent fc7db52 commit e318a62
Show file tree
Hide file tree
Showing 91 changed files with 219 additions and 179 deletions.
19 changes: 13 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## SGL: Scalable Graph Learning

**SGL** is a Graph Neural Network (GNN) toolkit targeting scalable graph learning, which supports deep graph learning on
extremely large datasets. SGL allows users to easily implement scalable graph neural networks and evalaute its
extremely large datasets. SGL allows users to easily implement scalable graph neural networks and evaluate its
performance on various downstream tasks like node classification, node clustering, and link prediction. Further, SGL
supports auto neural architecture search functionality based
on <a href="https://github.com/PKU-DAIR/open-box" target="_blank" rel="nofollow">OpenBox</a>. SGL is designed and
Expand All @@ -12,21 +12,29 @@ the <a href="https://cuibinpku.github.io/index.html" target="_blank" rel="nofoll

+ **High scalability**: Follow the scalable design paradigm **SGAP**
in <a href="https://arxiv.org/abs/2203.00638" target="_blank" rel="nofollow">PaSca</a>, SGL scale to graph data with
billions of nodes and edegs.
billions of nodes and edges.
+ **Auto neural architecture search**: Automatically choose decent neural architectures according to specific tasks, and
pre-defined objectives (e.g., inference time).
+ **Ease of use**: User-friendly interfaces of implementing existing scalable GNNs and executing various downstream
tasks.

## Installation (TODO)

#### Install from pip
Some datasets in SGL are constructed based
on <a href="https://github.com/pyg-team/pytorch_geometric" target="_blank" rel="nofollow">PyG</a>. Please follow the
link below to install PyG first before installing
SGL: https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html.

#### Install from source
### Install from pip

To install SGL from PyPI:

```bash
pip install sgl-dair
```

## Quick Start (TODO)

**TODO**
A quick start example is given by:

```python
Expand All @@ -39,7 +47,6 @@ model = SGC(prop_steps=3, feat_dim=dataset.num_features, num_classes=dataset.num

device = "cuda:0"
test_acc = NodeClassification(dataset, model, lr=0.1, weight_decay=5e-5, epochs=200, device=device).test_acc
print(test_acc)
```

**TODO**
Expand Down
5 changes: 0 additions & 5 deletions __init__.py

This file was deleted.

7 changes: 3 additions & 4 deletions test.py → examples/sgc_pubmed.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from dataset import Planetoid
from models.homo import SGC
from tasks import NodeClassification
from sgl.dataset import Planetoid
from sgl.models.homo import SGC
from sgl.tasks import NodeClassification

dataset = Planetoid("pubmed", "./", "official")
model = SGC(prop_steps=3, feat_dim=dataset.num_features, num_classes=dataset.num_classes)

device = "cuda:0"
test_acc = NodeClassification(dataset, model, lr=0.1, weight_decay=5e-5, epochs=200, device=device).test_acc
print(test_acc)
6 changes: 3 additions & 3 deletions test_nas.py → examples/test_nas.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np
import torch

from dataset import Planetoid
from models.search_models import SearchModel
from search.auto_search import SearchManager
from sgl.dataset import Planetoid
from sgl.models.search_models import SearchModel
from sgl.search.auto_search import SearchManager

dataset = Planetoid("cora", "./", "official")

Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[build-system]
requires = [
"setuptools>=42",
"wheel"
]
build-backend = "setuptools.build_meta"
14 changes: 9 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
torch==1.11.0
scipy==1.5.2
ogb==1.3.3
numpy==1.22.3
torch-geometric
torch>=1.8
networkx
tqdm
numpy>=1.21
scipy
gensim
scikit_learn
ogb
openbox
26 changes: 26 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import setuptools

with open("README.md", "r", encoding="utf-8") as readme:
long_description = readme.read()

with open("requirements.txt", "r", encoding="utf-8") as require:
requirements = [r for r in require.read().splitlines() if r != '']

setuptools.setup(
name="sgl-dair",
version="0.1.0",
author="DAIR Lab @PKU",
description="Graph Neural Network (GNN) toolkit targeting scalable graph learning",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/PKU-DAIR/SGL",
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
packages=setuptools.find_packages(),
python_requires='>=3.6',
install_requires=requirements,
data_files=["requirements.txt"],
)
1 change: 1 addition & 0 deletions sgl/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
name = "sgl"
Empty file added sgl/data/__init__.py
Empty file.
File renamed without changes.
6 changes: 3 additions & 3 deletions data/base_dataset.py → sgl/data/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import warnings
from scipy.sparse import csr_matrix

from data.base_data import Node, Edge
from data.utils import file_exist, to_undirected
from dataset.choose_edge_type import ChooseMultiSubgraphs
from sgl.data.base_data import Node, Edge
from sgl.data.utils import file_exist, to_undirected
from sgl.dataset.choose_edge_type import ChooseMultiSubgraphs


# Base class for node-level tasks
Expand Down
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions dataset/acm.py → sgl/dataset/acm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from torch_geometric.datasets import HGBDataset
from typing import Tuple

from data.base_data import HeteroGraph
from data.base_dataset import HeteroNodeDataset
from dataset.utils import pkl_read_file
from sgl.data.base_data import HeteroGraph
from sgl.data.base_dataset import HeteroNodeDataset
from sgl.dataset.utils import pkl_read_file


class Acm(HeteroNodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/actor.py → sgl/dataset/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import torch
from torch_sparse import SparseTensor, coalesce

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to


class Actor(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/airports.py → sgl/dataset/airports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import pickle as pkl
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to, random_split_dataset
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to, random_split_dataset


class Airports(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/amazon.py → sgl/dataset/amazon.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import pickle as pkl
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to, read_npz, random_split_dataset
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to, read_npz, random_split_dataset


class Amazon(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/amazon_product.py → sgl/dataset/amazon_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import scipy.sparse as sp
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to


class AmazonProduct(NodeDataset):
Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions dataset/coauthor.py → sgl/dataset/coauthor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import pickle as pkl
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to, read_npz, random_split_dataset
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to, read_npz, random_split_dataset


class Coauthor(NodeDataset):
Expand Down
8 changes: 4 additions & 4 deletions dataset/dblp.py → sgl/dataset/dblp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import torch
from typing import Tuple

from data.base_data import HeteroGraph
from data.base_dataset import HeteroNodeDataset
from dataset.dblp_original import DBLP
from dataset.utils import pkl_read_file
from sgl.data.base_data import HeteroGraph
from sgl.data.base_dataset import HeteroNodeDataset
from sgl.dataset.dblp_original import DBLP
from sgl.dataset.utils import pkl_read_file


class Dblp(HeteroNodeDataset):
Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions dataset/facebook.py → sgl/dataset/facebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import pickle as pkl
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to, random_split_dataset
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to, random_split_dataset


class Facebook(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/flickr.py → sgl/dataset/flickr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import scipy.sparse as sp
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to


class Flickr(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/github.py → sgl/dataset/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import pickle as pkl
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import download_to, pkl_read_file, random_split_dataset
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import download_to, pkl_read_file, random_split_dataset


class Github(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/karateclub.py → sgl/dataset/karateclub.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import pickle as pkl
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file


class KarateClub(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/linkx_dataset.py → sgl/dataset/linkx_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import torch.nn.functional as F
from scipy.io import loadmat

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to


# A variety of non-homophilous graph datasets
Expand Down
6 changes: 3 additions & 3 deletions dataset/nell.py → sgl/dataset/nell.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from torch_geometric.data import extract_tar
from torch_geometric.io import read_txt_array

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to


class Nell(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/ogbn.py → sgl/dataset/ogbn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import torch
from ogb.nodeproppred import PygNodePropPredDataset

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, to_undirected
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, to_undirected


class Ogbn(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/ogbn_mag.py → sgl/dataset/ogbn_mag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import torch
from ogb.nodeproppred import PygNodePropPredDataset

from data.base_data import HeteroGraph
from data.base_dataset import HeteroNodeDataset
from dataset.utils import pkl_read_file, to_undirected
from sgl.data.base_data import HeteroGraph
from sgl.data.base_dataset import HeteroNodeDataset
from sgl.dataset.utils import pkl_read_file, to_undirected


class OgbnMag(HeteroNodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/planetoid.py → sgl/dataset/planetoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import scipy.sparse as sp
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to


class Planetoid(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/reddit.py → sgl/dataset/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import torch
from torch_geometric.data import extract_zip

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to


class Reddit(NodeDataset):
Expand Down
6 changes: 3 additions & 3 deletions dataset/twitch.py → sgl/dataset/twitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import pickle as pkl
import torch

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to, random_split_dataset
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to, random_split_dataset


class Twitch(NodeDataset):
Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions dataset/webkb.py → sgl/dataset/webkb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import torch
from torch_sparse import coalesce

from data.base_data import Graph
from data.base_dataset import NodeDataset
from dataset.utils import pkl_read_file, download_to
from sgl.data.base_data import Graph
from sgl.data.base_dataset import NodeDataset
from sgl.dataset.utils import pkl_read_file, download_to


class WebKB(NodeDataset):
Expand Down
Loading

0 comments on commit e318a62

Please sign in to comment.