Add Jet paper benchmarking samples and submission scripts (#35)

* Add benchmark code for Jet on CPU * Add submission script and data formatting for Niagara * Update naming and processing for non shared example * Update README and descriptions * Add m12 benchmarks * Ensure cd into submission dir called * m12 submission scripts for all cases * Update m12 description * Fix number of tasks flag * Fix file parsing * Add CMake build commands to readme * Fix referencing * Add GPU sliced and non-sliced benchmarks * Fix json data format for m12 * Add GBS benchmark * Fix naming in GBS build * Ensure reduction result returned * Relabel input for m12 * Output tensor with operator[] * Return param sweep for cpu m10 * Ensure consistent indexing into contracted example outputs * Update slicing for GBS * Ensure correct data offsets with non power-2 data * Add job submission scripts for GPU m10 * Update submission dir * Update GPU submission scripts * Ensure GBS outputs to correct file * Add deletion task to GPU full BM * Add m=12 GPU slice example * Tidy m12 benchmarks * Readd sliced m12 * Update m=12 examples * Fix missing clurm param for cpu m=10 * Update GPU benchmarks * Add multi-sliced GPU m=12 benchmark * Update GPU benchmark required branch * Ensure multiple slices contracted on m12 * Update Python setup env script * Move files * Add walrus data file * Add GBS TN data * Add JET GBS benchmarks * Fix compilation and linking errors on PPC systems * Readd walrus * Update TBCC to TBC * Fix Jet GBS csv generation * Add script for plotting benchmark data * Update readme and docs for data generation and analysis * Add scripts for AWS use * Run C++ formatter * Add Ubuntu runnable jobs * Add Cot m10 example * Add m12 with selective number of slices * Add full and sliced m12 for cotengra * Add run scripts for m10 on GPU * Add runnable scripot for m12 single and subset runs with Jet * Update m12 jet example * Add example barplot script for timing data * Update run-scripts * Add requirements file * Tidy Py env setup * Move TBCC to TBC * Remove unneeded comments * Remove std::chrono:: * Remove more TBCCs * Update build-scripts * Remove details * Trigger Build * Add new gbs benchmarks * Update examples/paper_benchmarks/README.md Co-authored-by: Mikhail Andrenkov <Mandrenkov@users.noreply.github.com> * Update examples/paper_benchmarks/README.md Co-authored-by: Mikhail Andrenkov <Mandrenkov@users.noreply.github.com> * Update examples/paper_benchmarks/README.md Co-authored-by: Mikhail Andrenkov <Mandrenkov@users.noreply.github.com> * Update examples/paper_benchmarks/README.md Co-authored-by: Mikhail Andrenkov <Mandrenkov@users.noreply.github.com> * Reference 0.2.0-benchmarks tag for all BM cmake files * Update tag for benchmarks Co-authored-by: Mikhail Andrenkov <M.Andrenkov@gmail.com> Co-authored-by: lee <lee@ip-172-31-13-18.ec2.internal> Co-authored-by: Trevor Vincent <tvincent@cita.utoronto.ca> Co-authored-by: Mikhail Andrenkov <Mandrenkov@users.noreply.github.com>
XanaduAI · Jul 20, 2021 · d74502b · d74502b
1 parent c75e51a
commit d74502b
Show file tree

Hide file tree

Showing 64 changed files with 3,173 additions and 5 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -206,7 +206,12 @@ if(ENABLE_WARNINGS)
 endif()
 
 if(ENABLE_NATIVE)
-    target_compile_options(Jet INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-march=native>)
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
+        target_compile_options(Jet INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-mcpu=native>)
+        target_compile_options(Jet INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-mtune=native>)
+    else()
+        target_compile_options(Jet INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-march=native>)
+    endif()
 endif()
 if(ENABLE_IPO)
     target_compile_options(Jet INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-flto>)
@@ -220,8 +225,11 @@ if(ENABLE_HPTT)
 endif()
 
 if (ENABLE_CUTENSOR)
+    # Avoid DSO errors on platforms preferring static linkage
+    string(REPLACE "libcudart_static.a" "libcudart.so" CUDA_SHARED_RT "${CUDA_LIBRARIES}")
+
     target_include_directories(Jet INTERFACE ${CUDA_TOOLKIT_ROOT_DIR}/include)
-    target_link_libraries(Jet INTERFACE ${CUTENSOR_LIB} ${CURAND_LIB} ${CUDA_LIBRARIES})
+    target_link_libraries(Jet INTERFACE ${CUTENSOR_LIB} ${CURAND_LIB} ${CUDA_SHARED_RT})
     target_compile_options(Jet INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-DCUTENSOR>)
     if(DISABLE_CUDA_SAFETY)
         target_compile_options(Jet INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-DCUDATENSOR_UNSAFE>)

diff --git a/examples/paper_benchmarks/CPU/cot_cpu_m10/full.py b/examples/paper_benchmarks/CPU/cot_cpu_m10/full.py
@@ -0,0 +1,42 @@
+import quimb.tensor as qtn
+import cotengra as ctg
+import time
+import numpy as np
+
+from opt_einsum import contract, contract_expression, contract_path, helpers
+from opt_einsum.paths import linear_to_ssa, ssa_to_linear
+
+import pandas as pd
+import numpy as np
+
+def read_cotengra_file(filename):
+    df = pd.read_csv(filename, sep=' ', header = None)
+    tensors = []
+    for i in range(len(df[0])):
+        # print(df[3][i])
+        tens_data = df[3][i].replace("[","").replace("]","").replace("'","")
+        tens_data = [complex(s) for s in tens_data.split(',')]
+        tens_shape = df[2][i].replace("[","").replace("]","").replace("'","")
+        tens_shape = [int(s) for s in tens_shape.split(',')]
+        tens_tags = df[0][i].replace("[","").replace("]","").replace("'","")
+        tens_tags = [str(s) for s in tens_tags.split(',')]
+        tens_inds = df[1][i].replace("[","").replace("]","").replace("'","")
+        tens_inds = [str(s) for s in tens_inds.split(',')]
+        data = np.array(tens_data).reshape(tens_shape)
+        inds = tens_inds
+        tags = tens_tags
+        tensors.append(qtn.Tensor(data, inds, tags))
+    return qtn.TensorNetwork(tensors)
+
+m10_ssa_path = ((81, 185), (127, 322), (82, 319), (99, 324), (323, 325), (304, 326), (261, 291), (9, 328), (249, 329), (206, 330), (327, 331), (69, 70), (113, 272), (333, 334), (37, 97), (298, 336), (335, 337), (314, 338), (332, 339), (230, 282), (5, 290), (341, 342), (13, 343), (235, 263), (344, 345), (169, 251), (213, 347), (346, 348), (162, 199), (207, 350), (349, 351), (170, 352), (161, 353), (340, 354), (154, 155), (355, 356), (98, 320), (120, 268), (358, 359), (357, 360), (114, 115), (361, 362), (128, 129), (363, 364), (121, 122), (365, 366), (44, 45), (310, 368), (367, 369), (76, 77), (370, 371), (218, 253), (240, 373), (16, 265), (374, 375), (10, 285), (376, 377), (247, 279), (259, 379), (200, 236), (380, 381), (378, 382), (208, 214), (383, 384), (372, 385), (163, 164), (386, 387), (123, 124), (388, 389), (95, 107), (271, 391), (63, 64), (32, 393), (392, 394), (311, 395), (390, 396), (148, 149), (397, 398), (108, 109), (399, 400), (38, 39), (305, 402), (401, 403), (71, 72), (404, 405), (86, 186), (132, 407), (54, 87), (100, 409), (408, 410), (24, 411), (406, 412), (175, 176), (413, 414), (18, 255), (267, 416), (221, 243), (417, 418), (14, 287), (419, 420), (237, 241), (215, 219), (422, 423), (421, 424), (1, 276), (223, 289), (426, 427), (226, 428), (190, 194), (231, 430), (429, 431), (6, 283), (432, 433), (425, 434), (201, 209), (435, 436), (415, 437), (171, 172), (438, 439), (156, 157), (440, 441), (116, 117), (442, 443), (50, 51), (315, 445), (444, 446), (89, 141), (187, 448), (101, 135), (57, 450), (449, 451), (179, 180), (452, 453), (447, 454), (26, 317), (455, 456), (55, 56), (457, 458), (25, 316), (459, 460), (22, 312), (461, 462), (46, 47), (463, 464), (83, 84), (465, 466), (52, 53), (467, 468), (20, 307), (469, 470), (88, 140), (177, 472), (134, 178), (133, 474), (473, 475), (471, 476), (299, 306), (477, 478), (33, 34), (479, 480), (130, 131), (174, 482), (139, 173), (85, 484), (483, 485), (481, 486), (295, 300), (487, 488), (118, 119), (321, 490), (75, 137), (160, 492), (491, 493), (96, 318), (112, 495), (68, 184), (67, 497), (496, 498), (494, 499), (42, 43), (303, 309), (501, 502), (500, 503), (73, 74), (504, 505), (258, 260), (8, 278), (507, 508), (205, 248), (509, 510), (198, 246), (511, 512), (152, 153), (513, 514), (506, 515), (110, 111), (516, 517), (106, 183), (62, 519), (31, 61), (94, 521), (520, 522), (518, 523), (21, 297), (308, 525), (524, 526), (80, 138), (125, 528), (167, 168), (126, 530), (529, 531), (48, 49), (78, 79), (533, 534), (532, 535), (527, 536), (4, 275), (281, 538), (229, 257), (539, 540), (193, 245), (541, 542), (12, 262), (250, 544), (212, 234), (545, 546), (543, 547), (197, 204), (548, 549), (537, 550), (158, 159), (551, 552), (146, 147), (553, 554), (35, 36), (302, 556), (555, 557), (65, 66), (558, 559), (23, 313), (301, 561), (560, 562), (40, 41), (19, 564), (563, 565), (58, 59), (182, 567), (28, 93), (103, 569), (568, 570), (294, 571), (566, 572), (29, 30), (296, 574), (573, 575), (136, 145), (144, 577), (60, 105), (104, 579), (578, 580), (576, 581), (27, 293), (582, 583), (92, 102), (270, 585), (584, 586), (142, 143), (587, 588), (181, 269), (90, 91), (590, 591), (292, 592), (589, 593), (189, 225), (244, 595), (256, 273), (596, 597), (2, 277), (598, 599), (228, 233), (192, 196), (601, 602), (600, 603), (7, 284), (15, 605), (239, 264), (606, 607), (217, 252), (608, 609), (604, 610), (203, 211), (611, 612), (594, 613), (165, 166), (614, 615), (150, 151), (616, 617), (489, 618), (220, 254), (242, 620), (17, 266), (621, 622), (11, 286), (623, 624), (232, 238), (210, 216), (626, 627), (625, 628), (0, 288), (222, 274), (630, 631), (188, 632), (191, 227), (224, 634), (633, 635), (3, 280), (636, 637), (629, 638), (619, 639), (195, 640), (202, 641))
+m10_linear_path = ssa_to_linear(m10_ssa_path)
+
+tn = read_cotengra_file("m10.cotengra")
+tn.astype_('complex64')
+#print(tn)
+
+start = time.time()
+res = tn.contract(all, optimize=m10_linear_path)
+end = time.time()
+print("time = " + str(end-start))
+#info = tn.contract(all, optimize=m10_linear_path, get='path-info', output_inds=[])
diff --git a/examples/paper_benchmarks/CPU/cot_cpu_m10/niagara_cot_full.slurm b/examples/paper_benchmarks/CPU/cot_cpu_m10/niagara_cot_full.slurm
@@ -0,0 +1,24 @@
+#!/bin/bash 
+#SBATCH --nodes=1
+#SBATCH --time=0:20:00
+#SBATCH --job-name=cot_m10
+#SBATCH --ntasks=80
+
+cd $SLURM_SUBMIT_DIR
+
+module load cmake python intel
+source py_benchenv/bin/activate
+
+export num_runs=10
+
+export OMP_PROC_BIND=false 
+export OMP_PLACES=cores 
+export OMP_DYNAMIC=false 
+export OMP_SCHEDULE=static 
+
+for th in 1 2 4 8 16 32 64; do
+for i in $(seq 1 $num_runs); do
+    echo "### RUN=${i} ###" >> cotengra_omp${th}.out;
+    OMP_NUM_THREADS=${th} python ./cotengra_m10.py ${p} ${sl} >> cotengra_omp${th}.out;
+done
+done
diff --git a/examples/paper_benchmarks/CPU/cot_cpu_m10/sliced.py b/examples/paper_benchmarks/CPU/cot_cpu_m10/sliced.py
@@ -0,0 +1,229 @@
+import argparse, io, math, os, pickle, sys, time
+
+import cotengra as ctg
+import numpy as np
+import opt_einsum
+import pandas as pd
+import quimb as qu
+import quimb.tensor as qtn
+import tqdm
+
+ALPHABET_SIZE_ = 52
+ALPHABET_ = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
+             "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
+             "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
+             "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
+
+def GenerateStringIndex(ind):
+    div_id = int(ind/ALPHABET_SIZE_)
+    prefix = ALPHABET_[ind % ALPHABET_SIZE_]
+    suffix = "" if div_id == 0 else str((div_id) - 1)
+    index_char = prefix+suffix
+    return index_char
+
+def write_kraken_file(tn,path,sliced_inds,inv_map,filew):
+    print(str(opt_einsum.paths.linear_to_ssa(path)).replace("[","{").replace("]","}").replace(")","}").replace("(","{"),file=filew)
+    print(sliced_inds,file=filew)
+    for i in tn:
+        print(str(i._tags).replace("{","[").replace("}","]"),end=' ',file=filew)
+        print(str([inv_map[j] for j in i._inds]).replace("(","[").replace(",)",")").replace(")","]"),end=' ',file=filew)
+        print(str(i._data.shape).replace("(","[").replace(")","]").replace(",]","]"),end=' ',file=filew)
+        tensor_data = "["
+        for j in i._data.flatten():
+            tensor_data += "(" + str(j.real) + "," + str(j.imag) + ");"
+        tensor_data += "]"
+        tensor_data = tensor_data.replace(";]","]")
+        print(tensor_data,file=filew)
+
+def read_qasm_file(file, swap_trick=True):
+    if swap_trick:
+        gate_opts={'contract': 'swap-split-gate', 'max_bond': 2}  
+    else:
+        gate_opts={}
+    return qtn.Circuit.from_qasm_file(file, gate_opts=gate_opts)
+
+def read_cotengra_file(file_name):
+    df = pd.read_csv(file_name, sep=' ', header = None)
+    tensors = []
+    for i in range(len(df[0])):
+        tens_data = df[3][i].replace("[","").replace("]","").replace("'","")
+        tens_data = [complex(s) for s in tens_data.split(',')]                
+        tens_shape = df[2][i].replace("[","").replace("]","").replace("'","")
+        tens_shape = [int(s) for s in tens_shape.split(',')]
+        tens_tags = df[0][i].replace("[","").replace("]","").replace("'","")
+        tens_tags = [str(s) for s in tens_tags.split(',')]
+        tens_inds = df[1][i].replace("[","").replace("]","").replace("'","")
+        tens_inds = [str(s) for s in tens_inds.split(',')]
+        data = np.array(tens_data).reshape(tens_shape)
+        inds = tens_inds
+        tags = tens_tags
+        tensors.append(qtn.Tensor(data, inds, tags))
+    return qtn.TensorNetwork(tensors)
+
+if __name__ == "__main__":
+    my_parser = argparse.ArgumentParser(description='Search for benchmark contraction paths')
+    my_parser.add_argument('--file_name',
+                           type=str,
+                           help='.cotengra file')
+
+    my_parser.add_argument('--save_suffix',
+                           type=str,
+                           default="",
+                           help='optional suffix for any saved files')
+
+    my_parser.add_argument('--simplify_string',
+                           type=str,
+                           default="RC",
+                           help='Cotengra simplify string')
+
+    my_parser.add_argument('--search_time',
+                           type=int,
+                           default=30,
+                           help='Cotengra search time')
+
+    my_parser.add_argument('--job_rank',
+                           type=int,
+                           default=0,
+                           help='Cotengra job rank')
+
+    my_parser.add_argument('--swap_trick',
+                           type=bool,
+                           default=True,
+                           help='swap_trick')
+
+    args = my_parser.parse_args()
+    file_name = args.file_name
+    save_suffix = args.save_suffix
+    job_rank = args.job_rank
+    search_time = args.search_time
+    simplify_string = args.simplify_string
+    swap_trick = args.swap_trick
+
+    print("file_name = " + str(file_name))
+    print("save_suffix = " + str(save_suffix))
+    print("search_time = " + str(search_time))
+    print("simplify_string = " + str(simplify_string))
+    print("swap_trick = " + str(swap_trick))
+
+
+    circ = read_qasm_file(file_name,swap_trick)
+
+    import random as rd
+    rd.seed(42)
+
+    bitstring = "".join(rd.choice('01') for _ in range(53))
+    print(bitstring)
+
+    # the squeeze removes all size 1 bonds
+    psi_sample = qtn.MPS_computational_state(bitstring, tags='PSI_f').squeeze()
+    tn = circ.psi & psi_sample
+
+    print("num tensors = " + str(tn.num_tensors))
+    print("num indices = " + str(tn.num_indices))
+    tn.full_simplify_(simplify_string,output_inds=[])
+
+    print("num tensors after simplify = " + str(tn.num_tensors))
+    print("num indices after simplify = " + str(tn.num_indices))
+    tn.astype_('complex64')
+
+    opt = ctg.ReusableHyperOptimizer(
+          methods=['kahypar','greedy'],
+          max_repeats=1_000_000,
+          max_time=search_time,
+          directory="ctg_path_cache_" + str(job_rank),
+          slicing_reconf_opts={
+             'target_size': 2**20,
+             'forested': True,
+             'num_trees': 2,
+             'reconf_opts': {
+                 'subtree_size': 12,
+                 'forested': True,
+                 'num_trees': 2,
+                 'parallel' : False,
+             }
+         }
+    )
+
+    info = tn.contract(all, optimize=opt, get='path-info',output_inds=[])
+    symmap = tn.contract(all, optimize=opt, get='symbol-map',output_inds=[])
+
+    print(info)
+    print("opts = ")
+    print(opt)
+    print(vars(opt))
+
+    sys.stdout = sys.__stdout__
+    sys.stderr = sys.__stderr__
+
+    base=os.path.basename(file_name)
+    kraken_file_name = os.path.splitext(base)[0] + "_" + save_suffix + ".kraken"
+
+    filew=open(kraken_file_name,'w')
+    inv_map = {v: k for k, v in symmap.items()}
+
+    counter = 0
+    for k, v in inv_map.items():
+        inv_map[k] = GenerateStringIndex(counter)
+        counter += 1
+
+    write_kraken_file(tn,[],"",inv_map,filew)
+
+    start = time.time()
+    res = tn.contract(all,optimize=opt,output_inds=[])
+    end = time.time()
+    print("res =",res)
+    print("no jax time = " + str(end - start))
+
+    start = time.time()
+    res = tn.contract(all,optimize=opt,output_inds=[],backend='jax')
+    end = time.time()
+    print("res =",res)
+    print("jax 1 time = " + str(end - start))
+
+    start = time.time()
+    res = tn.contract(all,optimize=opt,output_inds=[],backend='jax')
+    end = time.time()
+    print("res =",res)
+    print("jax 2 time = " + str(end - start))
+
+
+    #slice to 2**20
+    sf = ctg.SliceFinder(info, target_size=2**20)
+    ix_sl, cost_sl = sf.search(temperature=1.0)
+    ix_sl, cost_sl = sf.search(temperature=0.1)
+    ix_sl, cost_sl = sf.search(temperature=0.01)
+    print(ix_sl,cost_sl)
+    arrays = [t.data for t in tn] 
+    sc = sf.SlicedContractor(arrays)
+    c = 0
+    start = time.time()
+    for i in tqdm.tqdm(range(0, sc.nslices)):
+          c = c + sc.contract_slice(i)
+    end = time.time()
+    print("c =",c)
+    print("20 slice time = " + str(end - start))
+    sliced_inds = [symmap[j] for j in ix_sl]
+    inv_sliced_inds = [inv_map[j] for j in sliced_inds]
+    print("sliced_inds =",sliced_inds)
+    print("inv_sliced_inds =",inv_sliced_inds)
+
+    #slice to 2**23
+    sf = ctg.SliceFinder(info, target_size=2**23)
+    ix_sl, cost_sl = sf.search(temperature=1.0)
+    ix_sl, cost_sl = sf.search(temperature=0.1)
+    ix_sl, cost_sl = sf.search(temperature=0.01)
+    print(ix_sl,cost_sl)
+    arrays = [t.data for t in tn] 
+    sc = sf.SlicedContractor(arrays)
+    c = 0
+    start = time.time()
+    for i in tqdm.tqdm(range(0, sc.nslices)):
+          c = c + sc.contract_slice(i)
+    end = time.time()
+    print("c =",c)
+    print("23 slice time = " + str(end - start))
+    sliced_inds = [symmap[j] for j in ix_sl]
+    inv_sliced_inds = [inv_map[j] for j in sliced_inds]
+    print("sliced_inds =",sliced_inds)
+    print("inv_sliced_inds =",inv_sliced_inds)
+
diff --git a/examples/paper_benchmarks/CPU/jet_cpu_gbs/CMakeLists.txt b/examples/paper_benchmarks/CPU/jet_cpu_gbs/CMakeLists.txt
@@ -0,0 +1,31 @@
+
+#############################
+## I. Set project details
+#############################
+cmake_minimum_required(VERSION 3.14)
+
+project("Jet GBS CPU Benchmark"
+    VERSION 0.1.0
+    DESCRIPTION "Jet project CPU benchmarks"
+    LANGUAGES CXX
+)
+
+#############################
+## II. Fetch Jet project
+#############################
+
+Include(FetchContent)
+
+FetchContent_Declare(
+    Jet
+    GIT_REPOSITORY  git@github.com:XanaduAI/jet.git
+    GIT_TAG         0.2.0
+)
+FetchContent_MakeAvailable(Jet)
+
+#############################
+## III. Create project target
+#############################
+
+add_executable(jet_gbs_full jet_gbs_full.cpp)
+target_link_libraries(jet_gbs_full Jet)