Pkg.add("Cascadia") Pkg.add("HTTP") using Cascadia using HTTP using Gumbo r=HTTP.get("https://stackoverflow.com/questions/tagged/julia-lang?sort=newest&pagesize=50") typeof(r) strResponse = String(take!(r)) h = parsehtml(strResponse) typeof(h) h.root.children[1].children[1] s=Selector(".question-summary") qs = matchall(s, h.root) author_elem = matchall(sel".user-details a",qs[1]) author_elem[1].children[1] text(author_elem[1].children[1]) s=sel"[rel=\"next\"]" matchall(s, h.root) matchall(s, h.root)[1] nxt = getattr(matchall(s, h.root)[1], "href") const baseURL = "https://stackoverflow.com" HTTP.get("$baseURL$nxt") matchall(sel"blah", h.root) function count_julia_q() h = parsehtml(String(take!(HTTP.get("$baseURL/questions/tagged/julia-lang?sort=newest&pagesize=50")))); s1=sel".question-summary" s2=sel"[rel=\"next\"]" count = 0 while true q = matchall(s1, h.root) count = count + length(q) elem = matchall(s2, h.root) if length(elem) > 0 nxt = getattr(elem[1], "href") h = parsehtml(String(take!(HTTP.get("$baseURL$nxt")))) else break end end println("Total number of Julia Questions: $count") end @time count_julia_q() const queue = Channel(10) const count2 = Ref{Int}() put!(queue, "$baseURL/questions/tagged/julia-lang?sort=newest&pagesize=50") function count_julia_q_async() s1=sel".question-summary" s2=sel"[rel=\"next\"]" @sync while true u = take!(queue) if u == nothing; break; end @async begin h = parsehtml(String(take!(HTTP.get(u)))); q = matchall(s1, h.root) count2[] = count2[] + length(q) elem = matchall(s2, h.root) if length(elem) > 0 nx = getattr(elem[1], "href") put!(queue, "$baseURL$nx") else put!(queue, nothing) end end end end @time count_julia_q_async() function count_q(tag) client = HTTP.Client() h1 = parsehtml(String(take!(HTTP.get(client, "$baseURL/questions/tagged/$tag?sort=newest&pagesize=50")))); s1=sel".question-summary" s2=sel"[rel=\"next\"]" count = 0 client = HTTP.Client() while true q = matchall(s1, h1.root) count = count + length(q) elem = matchall(s2, h1.root) if length(elem) > 0 nx = getattr(elem[1], "href") h1 = parsehtml(String(take!(HTTP.get(client, "$baseURL$nx")))) else break end end println("Total number of $tag Questions: $count") end @time @sync for i in ["julia-lang", "sencha-touch", "jni"] @async count_q(i) end h = parsehtml(String(take!(HTTP.get("$baseURL/questions/tagged/sencha-touch?sort=newest&pagesize=50")))) matchall(sel".question-summary", h.root) 2+2 function count_qs(tag) client = HTTP.Client() h1 = parsehtml(String(take!(HTTP.get(client, "$baseURL/questions/tagged/$tag?sort=newest&pagesize=50")))); s1=sel".question-summary" s2=sel"[rel=\"next\"]" count = 0 client = HTTP.Client() while true sleep(1) q = matchall(s1, h1.root) count = count + length(q) elem = matchall(s2, h1.root) if length(elem) > 0 nx = getattr(elem[1], "href") h1 = parsehtml(String(take!(HTTP.get(client, "$baseURL$nx")))) else break end end println("Total number of $tag Questions: $count") end function count_qs_async(tags) @sync for i in tags @async count_qs(i) end end alltags = ["julia-lang", "sencha-touch", "jni", "popen", "chromecast", "freebsd", "spree", "eigen", "thrift" ] alltags = reshape(alltags, 3, 3) alltags = [alltags[:, i] for i in 1:size(alltags, 2)] using JuliaRun ctx = JuliaRun.init() init_parallel() nb=self() scale!(ctx, nb, 3) nworkers() scale(ctx,nb) @everywhere Pkg.add("Cascadia") @everywhere Pkg.add("HTTP") @everywhere using Cascadia @everywhere using Gumbo @everywhere using HTTP pmap(count_qs_async, alltags)