#!/usr/bin/env python # coding: utf-8 # In[2]: cd /home/anshul/DecisionStats/python-codes/ # #### Import all Required Libraries # In[3]: import csv import json import pprint import sys sys.path.append("/usr/local/lib/python2.7/dist-packages") # In[4]: DATAFILE = 'BigDiamonds.csv' # #### This function will read the CSV file and convert it in Dictionary form so that can be stored in MongoDB # In[5]: def process_file(filename): data = [] with open(filename, "r") as f: reader = csv.DictReader(f) for line in reader: line[""]=int(line[""]) if line["price"]!="NA": line["price"]=float(line["price"]) line["carat"]=float(line["carat"]) if line["x"]!="NA": line["x"]=float(line["x"]) if line["y"]!="NA": line["y"]=float(line["y"]) if line["z"]!="NA": line["z"]=float(line["z"]) line["depth"]=float(line["depth"]) data.append(line) return data # #### Function to store Documents in examples database and in collection BigDiamonds in MongoDB # In[6]: def insert_data(data, db): db.BigDiamonds.insert_many(data) # #### This will store the csv data in json file and in MongoDB # In[7]: def csvToMongo(): data = process_file(DATAFILE) with open('BigDiamonds.json',"w") as f: json.dump(data,f) from pymongo import MongoClient client = MongoClient("mongodb://localhost:27017") db = client.examples insert_data(data, db) print db.BigDiamonds.find_one() # In[8]: csvToMongo() # #### To run aggregate queries # In[21]: def aggregate(db, pipeline): return [doc for doc in db.BigDiamonds.aggregate(pipeline)] # In[10]: from pymongo import MongoClient client = MongoClient('localhost:27017') db = client.examples # #### Viewing First 20 documents of the data in MongoDB # In[11]: count=0 for doc in db.BigDiamonds.find(): if count==20: break pprint.pprint(doc) count+=1 # #### Number of Documents # In[12]: db.BigDiamonds.find().count() # #### Selection Query # In[13]: for doc in db.BigDiamonds.find({"cut":"Good"})[:3]: pprint.pprint(doc) # #### Running aggregation queries # In[26]: def make_pipeline(): # complete the aggregation pipeline pipeline = [{"$match":{"price": {"$ne":"NA"} } }, {"$group":{"_id":"null", "average_price":{"$avg":"$price"}, "average_carat":{"$avg":"$carat"} } } ] return pipeline pipeline = make_pipeline() result = aggregate(db, pipeline) pprint.pprint(result) # In[30]: def make_pipeline(): # complete the aggregation pipeline pipeline = [{"$group":{"_id":"$color", "average_carat":{"$avg":"$carat"}, } }, {"$sort":{"average_carat":-1} } ] return pipeline pipeline = make_pipeline() result = aggregate(db, pipeline) pprint.pprint(result) # In[31]: def make_pipeline(): # complete the aggregation pipeline pipeline = [{"$group":{"_id":"$cut", "average_carat":{"$avg":"$carat"}, } }, {"$sort":{"average_carat":-1} } ] return pipeline pipeline = make_pipeline() result = aggregate(db, pipeline) pprint.pprint(result) # In[ ]: