import json import csv import requests from collections import Set from pprint import pprint # subgraph ID from management console. SUBGRAPH_ID = "6044a63406e748bc9c1cd54c1a77f4da" # Our API key, also from te management console to authenticate our requests. API_KEY = "f88edf839cad4600b139bed5d6184efb" # Specify Auth-Token header HEADERS = {'Auth-Token': API_KEY, 'Content-Type': 'application/json'} # Base URL for Datamaglia API BASE_URL = 'https://api.datamaglia.com/v1{}' # URL for inserting sources (users) SRC_URL = BASE_URL.format('/subgraphs/' + SUBGRAPH_ID + '/data/sources/') # URL for inserting targets (pictures) TARGET_URL = BASE_URL.format('/subgraphs/' + SUBGRAPH_ID + '/data/targets') # URL for inserting relationships (user -[likes]-> photo) REL_URL = BASE_URL.format('/subgraphs/' + SUBGRAPH_ID + '/data/relationships/') # helper function to iterate through a list in chunks of a specified size def chunker(seq, size): return (seq[pos:pos + size] for pos in xrange(0, len(seq), size)) # init empty list to hold our users users = [] # use csv.DictReader to parse the file and append to our users list with open('data/users.csv') as f: for line in csv.DictReader(f): users.append(line) print users[0:2] def createSources(users): # list comprehension to create a dict for each user in the users list of the form {id: username} entities = [ { 'id': user['users'] } for user in users ] payload = {'entities': entities} # This payload will be serialized to JSON and sent with our request resp = requests.post(SRC_URL, headers=HEADERS, data=json.dumps(payload)) # Make the POST request print resp # 204 status for chunk in chunker(users[7000:], 100): createSources(chunk) # init empty pictures list pictures = [] # read csv file and add each picture dict to the pictures list with open('data/out_pic.csv') as f: for line in csv.DictReader(f): pictures.append(line) pprint(pictures[0:2]) def createTargets(pictures): entities = [ { 'id': pic['url'], 'properties': [ {'key': 'lat', 'value': pic['lat']}, {'key': 'lon', 'value': pic['lon']}, {'key': 'text', 'value': pic['text']}, {'key': 'user', 'value': pic['user']} ] } for pic in pictures ] payload = {'entities': entities} resp = requests.post(TARGET_URL, headers=HEADERS, data=json.dumps(payload)) print resp # Should be 204 for chunk in chunker(pictures, 100): createTargets(chunk) likes = [] with open('data/out_like.csv') as f: for line in csv.DictReader(f): likes.append(line) def createLikes(likes): entities = [ { 'weight': 0, 'source': like['user'], 'target': like['pic'] } for like in likes ] payload = {'entities': entities} resp = requests.post(REL_URL, headers=HEADERS, data=json.dumps(payload)) print resp # Should be 204 for chunk in chunker(likes, 100): createLikes(chunk)