Line Count

29 April 2014

Ákos Frohner

Google Inc.

Overview

Why Go?

Exercise

Counting the number of lines (\n) in files.

v0.go

func lineCount(path string) int {
    f, err := os.Open(path)
    if err != nil {
        return 0
    }
    defer f.Close()
    r := bufio.NewReader(f)
    var lc int
    for err == nil {
        _, err = r.ReadString('\n')
        lc++
    }
    return lc
}


func main() {
    flag.Parse()
    for _, path := range flag.Args() {
        fmt.Printf("%d\t%s\n", lineCount(path), path)
    }
}

running v0

$ go run v0.go -- v0.go 10
35    v0.go
11    10

not perfect, we should test the code!

func lineCount(path string) int {
    f, err := os.Open(path)
    if err != nil {
        return 0
    }
    defer f.Close()
    r := bufio.NewReader(f)
    var lc int
    for err == nil {
        _, err = r.ReadString('\n')
        lc++
    }
    return lc
}

v0_test.go

func TestLineCount(t *testing.T) {
    for i, tc := range []struct {
        path string
        lc   int
    }{
        {"10", 10},
        {"11", 0},
    } {
        lc := lineCount(tc.path)
        if lc != tc.lc {
            t.Errorf("%d. lineCount(%q) = %d; want = %d", i, tc.path, lc, tc.lc)
        }
    }
}
$ go test v0_test.go v0.go
--- FAIL: TestLineCount (0.00 seconds)
    v0_test.go:19: 0. lineCount("10") = 11; want = 10

v1.go

func lineCount(path string) (int, error) {
    f, err := os.Open(path)
    if err != nil {
        return 0, err
    }
    defer f.Close()
    r := bufio.NewReader(f)
    var lc int
    for err == nil {
        _, err = r.ReadString('\n')
        if err == nil {
            lc++
        }
    }
    return lc, nil
}

Testing at Google

75+M tests per day

Count ALL the lines!

$ find /usr/share/doc -type f -print0 | xargs -0 linecount

Walk the tree

func visit(path string, fi os.FileInfo, err error) error {
    if err != nil {
        return err
    }
    if !fi.Mode().IsRegular() {
        return nil
    }
    lc, err := lineCount(path)
    if err != nil {
        return err
    }
    globalLineCount += lc
    fmt.Printf("%d\t%s\n", lc, path)
    return nil
}
    root := flag.Arg(0)
    err := filepath.Walk(root, visit)
$ go run v2.go -- /usr/share/doc

Latency

We read the files in sequence:

If accessing L1 cache is 0.5s, then
- accessing the main memory is 100 seconds
- reading 1MB from SSD is 11.6 days
- reading 1MB from HDD is 7.8 months!

Latency numbers: original, humanized and timeline

We can do better!

func visit(path string, fi os.FileInfo, err error) error {
    if err != nil {
        return err
    }
    if !fi.Mode().IsRegular() {
        return nil
    }
    go func() {
        lc, err := lineCount(path)
        if err != nil {
            fmt.Errorf("%s", err)
        }
        globalLineCount += lc
        fmt.Printf("%d\t%s\n", lc, path)
    }()
    return nil
}

... and fail faster

$ go run v3.go -- /usr/share/doc

No results ... we have to wait for the results!

var (
    globalLineCount int
    wg              sync.WaitGroup
)
    wg.Add(1)
    go func() {
        lc, err := lineCount(path)
        if err != nil {
            fmt.Errorf("%s", err)
        }
        globalLineCount += lc
        fmt.Printf("%d\t%s\n", lc, path)
        wg.Done()
    }()
    wg.Wait()
    fmt.Printf("%d\ttotal\n", globalLineCount)

... still not perfect

$ GORACE="log_path=/tmp/v4.race_log" go run -race v4.go -- /usr/bin
WARNING: DATA RACE
  Write by goroutine 139:
    main.func·001()
      /home/szamcsi/w/go/lc/v4.go:50 +0x15b

  Previous write by goroutine 144:
    main.func·001()
      /home/szamcsi/w/go/lc/v4.go:50 +0x15b
    wg.Add(1)
    go func() {
        lc, err := lineCount(path)
        if err != nil {
            fmt.Errorf("%s", err)
        }
        globalLineCount += lc
        fmt.Printf("%d\t%s\n", lc, path)
        wg.Done()
    }()

Channels

c := make(chan int)
c <- 1
x := <- c

see also Hoare'75

Sending results to a channel

type lcResult struct {
    lc   int
    path string
    err  error
}

var (
    lcC chan *lcResult
    wg  sync.WaitGroup
)
    wg.Add(1)
    go func() {
        lc, err := lineCount(path)
        lcC <- &lcResult{lc, path, err}
        wg.Done()
    }()

Gathering all the results

func main() {
    lcC = make(chan *lcResult, 100)
    flag.Parse()
    root := flag.Arg(0)
    err := filepath.Walk(root, visit)
    if err != nil {
        fmt.Errorf("%s", err)
    }
    go func() { wg.Wait(); close(lcC) }()

    var globalLineCount int
    for lcr := range lcC {
        if lcr.err != nil {
            fmt.Errorf("%s", lcr.err)
            err = lcr.err
        }
        globalLineCount += lcr.lc
        fmt.Printf("%d\t%s\n", lcr.lc, lcr.path)
    }
    fmt.Printf("%d\ttotal\n", globalLineCount)
    if err != nil {
        os.Exit(1)
    }
}

Further Exercises

The C10K Problem

And the Go solution:

package main

import (
    "fmt"
    "net/http"
)

type Hello struct{}

func (h Hello) ServeHTTP(
    w http.ResponseWriter,
    r *http.Request) {
    fmt.Fprint(w, "Hello!")
}

func main() {
    var h Hello
    http.ListenAndServe("localhost:4000", h)
}

Thank you

Ákos Frohner

Google Inc.