• This repository has been archived on 05/Dec/2019
  • Stars
    star
    121
  • Rank 293,924 (Top 6 %)
  • Language
    JavaScript
  • License
    MIT License
  • Created over 13 years ago
  • Updated over 8 years ago

Reviews

There are no reviews yet. Be the first to send feedback to the community and the maintainers!

Repository Details

async map-reduce functions for nodejs

Map Reduce for leveldb (via levelup)

Incremental map-reduces and real-time results.

Build Status

Waat?

An "incremental map reduce" means when you update one key, only a relevant portion of the data needs to be recalculated.

"real-time results" means that you can listen to the database, and recieve change notifications on the fly! a la level-live-stream

If you just want something very simple, like mapping the date a blog post is created to the blog, then level-index may be enough.

Example

create a simple map-reduce

var LevelUp   = require('levelup')
var SubLevel  = require('level-sublevel')
var MapReduce = require('map-reduce')

var db = SubLevel(LevelUp(file))

var mapDb = 
  MapReduce(
    db, //the parent db
    'example',  //name.
    function (key, value, emit) {
      //perform some mapping.
      var obj = JSON.parse(value)
      //emit(key, value)
      //key may be an array of strings. 
      //value must be a string or buffer.
      emit(['all', obj.group], ''+obj.lines.length)
    },
    function (acc, value, key) {
      //reduce little into big
      //must return a string or buffer.
      return ''+(Number(acc) + Number(value))
    },
    //pass in the initial value for the reduce.
    //*must* be a string or buffer.
    '0'
  })
})

map-reduce uses level-trigger to make map reduces durable.

querying results.

  //get all the results in a specific group
  //start:[...] implies end:.. to be the end of that group.
  mapDb.createReadStream({range: ['all', group]}) 

  //get all the results in under a group.
  mapDb.createReadStream({range: ['all', true]}) 

  //get all the top level 
  mapDb.createReadStream({range: [true]})

complex aggregations

map-reduce with multiple levels of aggregation.

suppose we are building a database of all the street-food in the world. the data looks like this:

{
  country: USA | Germany | Cambodia, etc...
  state:   CA | NY | '', etc...
  city: Oakland | New York | Berlin | Phnom Penh, etc...
  type: taco | chili-dog | doner | noodles, etc...
}

We will aggregate to counts per-region, that look like this:

//say: under the key USA
{
  'taco': 23497,
  'chili-dog': 5643,
  etc...
}

first we'll map the raw data to ([country, state, city],type) tuples. then we'll count up all the instances of a particular type in that region!

var LevelUp   = require('levelup')
var SubLevel  = require('level-sublevel')
var MapReduce = require('map-reduce')

var db = SubLevel(LevelUp(file))
var mapDb = 
  MapReduce(
    db,
    'streetfood',
    function (key, value, emit) {
      //perform some mapping.
      var obj = JSON.parse(value)
      //emit(key, value)
      //key may be an array of strings. 
      //value must be a string or buffer.
      emit(
        [obj.country, obj.state || '', obj.city],
        //notice that we are just returning a string.
        JSON.stringify(obj.type)
      )
    },
    function (acc, value) {
      acc = JSON.parse(acc)
      value = JSON.parse(value)
      //check if this is top level data, like 'taco' or 'noodle'
      if('string' === typeof value) {
        //increment by one (remember to set as a number if it was undefined)
        acc[value] = (acc[value] || 0) ++
        return JSON.stringify(acc)
      }
      //if we get to here, we are combining two aggregates.
      //say, all the cities in a state, or all the countries in the world.
      //value and acc will both be objects {taco: number, doner: number2, etc...}

      for(var type in value) {
        //add the counts for each type together...
        //remembering to check that it is set as a value...
        acc[type] = (acc[type] || 0) + value[type]
      }
      //stringify the object, so that it can be written to disk!
      return JSON.stringify(acc)
    },
    '{}')

then query it like this:

mapDb.createReadStream({range: ['USA', 'CA', true]})
  .pipe(...)

retrive a specific result

pass db.get an array, and you can retrive a specific value, by group.

var userMapping = require("map-reduce")(
    db,
    "userPoints",
    function(key, value, emit){
        value = JSON.parse(value);
        var date = new Date(value.created);
        emit([value.user, date.getYear(), date.getMonth()], value.amount);
    },
    function(acc, value){
        return (Number(acc) + Number(value)).toString();
    },
    0
);

function getTotalPointsForUser(user, year, month, cb){
    userMapping.get([user, year, month], cb);
}

License

MIT

More Repositories

1

event-stream

EventStream is like functional programming meets IO
JavaScript
2,189
star
2

JSON.sh

a pipeable JSON parser written in Bash
Shell
1,996
star
3

JSONStream

rawStream.pipe(JSONStream.parse()).pipe(streamOfObjects)
JavaScript
1,913
star
4

scuttlebutt

peer-to-peer replicatable data structure
JavaScript
1,310
star
5

rc

The non-configurable configuration loader for lazy people.
JavaScript
995
star
6

crdt

Commutative Replicated Data Types for easy collaborative/distributed systems.
JavaScript
836
star
7

through

simple way to create a ReadableWritable stream that works
JavaScript
667
star
8

your-web-app-is-bloated

measuring memory usage of popular webapps
514
star
9

npmd

JavaScript
450
star
10

split

JavaScript
346
star
11

curry

simple curry module, with nothing *too clever*, and full test coverage
JavaScript
313
star
12

random-name

JavaScript
296
star
13

hashlru

JavaScript
240
star
14

wifi.sh

Shell
216
star
15

level-sublevel

no longer maintained, sorry!
JavaScript
194
star
16

mux-demux

mutiplex-demultiplex multiple streams through a single text Stream
JavaScript
179
star
17

noderify

official fork: https://github.com/staltz/noderify
JavaScript
157
star
18

feedopensource

Iteratively Fund Open Source Projects With Bitcoin
JavaScript
142
star
19

excel-stream

JavaScript
137
star
20

stream-spec

executable specification for Stream (make testing streams easy)
JavaScript
125
star
21

map-stream

JavaScript
122
star
22

cyphernet

115
star
23

observable

A Mutable Value represented as a Function.
HTML
111
star
24

stream-combiner

JavaScript
103
star
25

rpc-stream

JavaScript
98
star
26

bench-lru

JavaScript
87
star
27

pull-box-stream

One way streaming encryption based on libsodium's secretbox primitive
JavaScript
84
star
28

level-live-stream

JavaScript
79
star
29

stack-expression

inspired by regular expressions but can do nested structures
JavaScript
76
star
30

hipster

JavaScript
72
star
31

snob

distributed version control system implemented in javascript.
JavaScript
71
star
32

xdiff

diff complex javascript objects
JavaScript
70
star
33

from

Easy way to create a Readable Stream
JavaScript
70
star
34

scalable-secure-scuttlebutt

HTML
68
star
35

explain-error

JavaScript
67
star
36

fsm

Finite State Machines in javascript
JavaScript
66
star
37

r-edit

JavaScript
64
star
38

readme

JavaScript
62
star
39

tiles

JavaScript
61
star
40

indexhtmlify

JavaScript
59
star
41

tacodb

JavaScript
57
star
42

adiff

diff and patch operations on arrays.
JavaScript
57
star
43

map-filter-reduce

JavaScript
57
star
44

browser-stream

open pipable streams to and from the browser, with Socket.io
JavaScript
55
star
45

reconnect

JavaScript
53
star
46

level-replicate

JavaScript
51
star
47

electro

JavaScript
51
star
48

d64

JavaScript
50
star
49

on-change-network

JavaScript
49
star
50

lock

lock asynchronous resources
JavaScript
48
star
51

crypto-bench

HTML
47
star
52

mynosql

JavaScript
44
star
53

monotonic-timestamp

JavaScript
44
star
54

pause-stream

JavaScript
43
star
55

json-select

JavaScript
43
star
56

json-buffer

JavaScript
41
star
57

coherence

JavaScript
41
star
58

bittodo

JavaScript
40
star
59

stream-punks

discussion repo for streams
39
star
60

charwise

JavaScript
39
star
61

proxy-by-url

custom logic for node-http-proxy to proxy based on incoming url
JavaScript
38
star
62

sentimental-versioning

version numbers with meaning
HTML
38
star
63

level-hooks

JavaScript
37
star
64

sodium-browserify

JavaScript
37
star
65

secret-handshake-paper

TeX
36
star
66

browselectrify

create browserify bundle that also works in electron
JavaScript
36
star
67

kv

simple kv store for streams
JavaScript
35
star
68

c2wasm

C++
35
star
69

level-trigger

triggers for levelup
JavaScript
33
star
70

deploy

scripts to setup continuous deployment with git push
Shell
33
star
71

presentations

JavaScript
32
star
72

rumours

Intergration of scuttlebutt family.
JavaScript
32
star
73

web-bootloader

HTML
28
star
74

what-is-scuttlebutt

spec for defining "scuttlebutt" as a living changing protocol
28
star
75

remote-events

connect EventEmitters through Streams.
JavaScript
28
star
76

indexes-of

JavaScript
27
star
77

mpg123

JavaScript
27
star
78

level-master

JavaScript
27
star
79

h

JavaScript
26
star
80

testbed

continuous integration for nodejs
JavaScript
25
star
81

canvas-browserify

HTML
25
star
82

it-is

assertion DSL based on functional idioms.
JavaScript
25
star
83

level-merkle

JavaScript
25
star
84

semver-ftw

Simple Description of SemVer
HTML
25
star
85

level-inverted-index

JavaScript
24
star
86

computer-modern

CSS
24
star
87

hyperaudio

JavaScript
24
star
88

level-search

JavaScript
24
star
89

level-scuttlebutt

leveldb persistence for scuttlebutts (scuttlebutt/crdt/append-only and friends)
JavaScript
24
star
90

level-couch-sync

JavaScript
23
star
91

simple-xlsx

maintained fork is at https://github.com/zeke/simple-xlsx
JavaScript
23
star
92

shasum

JavaScript
23
star
93

content-addressable-store

JavaScript
23
star
94

ticket-auth

JavaScript
22
star
95

ssh-key-to-pem

JavaScript
21
star
96

private-groups-paper

21
star
97

scuttlebucket

JavaScript
21
star
98

looper

JavaScript
20
star
99

deterministic-tar

JavaScript
20
star
100

npm-browserify

JavaScript
20
star