profile
viewpoint
Roman Khavronenko hagen1778 London

Vertamedia/chproxy 502

ClickHouse http proxy and load balancer

Vertamedia/clickhouse-grafana 432

Clickhouse datasource for grafana

hagen1778/grafana-import-export 120

shell scripts for importing and exporting Grafana's dashboards and datasources

hagen1778/fasthttploader 92

Http benchmark (kinda ab) with autoadjustment and charts based on fasthttp library

Vertamedia/chtable 22

Grafana's table plugin for ClickHouse

lwolf/konsumerator 17

Kafka Consumer Operator. Kubernetes operator to manage consumers of unbalanced kafka topics with per-partition vertical autoscaling based on Prometheus metrics

mkocikowski/libkafka 14

Golang implementation of Kafka wire protocol.

mkocikowski/kafkaclient 8

Golang kafka client based on libkafka

hagen1778/tsbs 3

Time Series Benchmark Suite, a tool for comparing and evaluating databases for time series data

issue commentVictoriaMetrics/VictoriaMetrics

support passing passwords via file

There is a temporary workaround - to pass passwords via environment variables. See https://victoriametrics.github.io/#environment-variables for details. Passing command-line flags via environment variables is supported by all the VictoriaMetrics components - vmagent, vmalert, vmauth, etc.

jelmd

comment created time in 2 minutes

issue closedVictoriaMetrics/VictoriaMetrics

Bad metrics / labels when using -promscrape.streamParse on vmagent

This mostly fixed in https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825#issuecomment-723430240 But garbage metrics still present in small amounts. For example, labels from random ts appeared as metric names:

machine_dc=\dc1\
machine_group=\group1\}
\nodejs_common_name\

All this must be a labels in some time series, but became as a name of new ts.

closed time in 8 minutes

wf1nder

issue commentVictoriaMetrics/VictoriaMetrics

Bad metrics / labels when using -promscrape.streamParse on vmagent

Then closing the bug as resolved.

wf1nder

comment created time in 8 minutes

issue commentVictoriaMetrics/VictoriaMetrics

vmselect: ability to set connect / response timeout from storage nodes

But there may be a situation when all shards in location became unavailable. So there may be gap of data in this location generally.

Yes. But in this case gaps remain even if vmselect fetches data from all the vmstorage nodes.

wf1nder

comment created time in 8 minutes

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {+	// wait time must be less, then fasthttp client deadline - its 1 minute.+	baseQueryArgs := fmt.Sprintf("?sdc=%s", url.QueryEscape(dc))+	var nodeMeta string+	if len(sdc.NodeMeta) > 0 {+		for k, v := range sdc.NodeMeta {+			nodeMeta += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+		}+	}+	if sdc.AllowStale {+		baseQueryArgs += "&stale"+	}+	wc := watchConsul{+		client:              client,+		baseQueryArgs:       baseQueryArgs,+		shouldWatchServices: sdc.Services,+		shouldWatchTags:     sdc.Tags,+		services:            make(map[string]serviceWatch),+	}++	watchServiceNames, _, err := wc.getServiceNames(0)+	if err != nil {+		return nil, err+	}+	// global context+	ctx, cancel := context.WithCancel(context.Background())+	wc.cancel = cancel+	var syncWait sync.WaitGroup+	for serviceName := range watchServiceNames {+		ctx, cancel := context.WithCancel(ctx)+		syncWait.Add(1)+		go wc.startWatchService(ctx, serviceName, &syncWait)+		wc.services[serviceName] = serviceWatch{cancel: cancel}+	}+	// wait for first init.+	syncWait.Wait()+	go wc.watchForServices(ctx)+	return &wc, nil+}++// stops all service watchers.+func (w *watchConsul) stopAll() {+	w.mu.Lock()+	for _, sw := range w.services {+		sw.cancel()+	}+	w.mu.Unlock()+}++// getServiceNames returns serviceNames and index version.+func (w *watchConsul) getServiceNames(index uint64) (map[string]struct{}, uint64, error) {+	sns := make(map[string]struct{})+	path := fmt.Sprintf("/v1/catalog/services%s", w.baseQueryArgs)+	if len(w.nodeMeta) > 0 {+		path += w.nodeMeta+	}+	data, newIndex, err := getAPIResponse(w.client, path, index)+	if err != nil {+		return nil, index, err+	}+	var m map[string][]string+	if err := json.Unmarshal(data, &m); err != nil {+		return nil, index, fmt.Errorf("cannot parse services response=%q, err=%w", data, err)+	}+	for k, tags := range m {+		if !shouldCollectServiceByName(w.shouldWatchServices, k) {+			continue+		}+		if !shouldCollectServiceByTags(w.shouldWatchTags, tags) {+			continue+		}+		sns[k] = struct{}{}+	}+	return sns, newIndex, nil+}++// listen for new services and update it.+func (w *watchConsul) watchForServices(ctx context.Context) {+	ticker := time.NewTicker(*SDCheckInterval)+	defer ticker.Stop()+	var index uint64+	for {+		select {+		case <-ctx.Done():+			w.stopAll()+			return+		case <-ticker.C:+			if fasttime.UnixTimestamp()-atomic.LoadUint64(&w.lastAccessTime) > uint64(SDCheckInterval.Seconds())*2 {

There is no need in using fasttime here, since this loop is executed rarely. It looks like it would be better from readability point of view to use functions from standard time package here.

f41gh7

comment created time in an hour

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func GetHTTPClient() *http.Client {  // Client is http client, which talks to the given apiServer. type Client struct {-	hc        *fasthttp.HostClient-	ac        *promauth.Config-	apiServer string-	hostPort  string+	hc          *fasthttp.HostClient+	watchClient *fasthttp.HostClient

could you rename watchClient to blockingClient and add the following comment to it:

blockingClient is used for performing long-polling requests.
f41gh7

comment created time in an hour

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {+	// wait time must be less, then fasthttp client deadline - its 1 minute.+	baseQueryArgs := fmt.Sprintf("?sdc=%s", url.QueryEscape(dc))+	var nodeMeta string+	if len(sdc.NodeMeta) > 0 {+		for k, v := range sdc.NodeMeta {+			nodeMeta += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+		}+	}+	if sdc.AllowStale {+		baseQueryArgs += "&stale"+	}+	wc := watchConsul{+		client:              client,+		baseQueryArgs:       baseQueryArgs,+		shouldWatchServices: sdc.Services,+		shouldWatchTags:     sdc.Tags,+		services:            make(map[string]serviceWatch),+	}++	watchServiceNames, _, err := wc.getServiceNames(0)+	if err != nil {+		return nil, err+	}+	// global context+	ctx, cancel := context.WithCancel(context.Background())+	wc.cancel = cancel+	var syncWait sync.WaitGroup+	for serviceName := range watchServiceNames {+		ctx, cancel := context.WithCancel(ctx)+		syncWait.Add(1)+		go wc.startWatchService(ctx, serviceName, &syncWait)+		wc.services[serviceName] = serviceWatch{cancel: cancel}+	}+	// wait for first init.+	syncWait.Wait()+	go wc.watchForServices(ctx)+	return &wc, nil+}++// stops all service watchers.+func (w *watchConsul) stopAll() {+	w.mu.Lock()+	for _, sw := range w.services {+		sw.cancel()+	}+	w.mu.Unlock()+}++// getServiceNames returns serviceNames and index version.+func (w *watchConsul) getServiceNames(index uint64) (map[string]struct{}, uint64, error) {+	sns := make(map[string]struct{})+	path := fmt.Sprintf("/v1/catalog/services%s", w.baseQueryArgs)+	if len(w.nodeMeta) > 0 {+		path += w.nodeMeta+	}+	data, newIndex, err := getAPIResponse(w.client, path, index)+	if err != nil {+		return nil, index, err+	}+	var m map[string][]string+	if err := json.Unmarshal(data, &m); err != nil {+		return nil, index, fmt.Errorf("cannot parse services response=%q, err=%w", data, err)+	}+	for k, tags := range m {+		if !shouldCollectServiceByName(w.shouldWatchServices, k) {+			continue+		}+		if !shouldCollectServiceByTags(w.shouldWatchTags, tags) {+			continue+		}+		sns[k] = struct{}{}+	}+	return sns, newIndex, nil+}++// listen for new services and update it.+func (w *watchConsul) watchForServices(ctx context.Context) {+	ticker := time.NewTicker(*SDCheckInterval)+	defer ticker.Stop()+	var index uint64+	for {+		select {+		case <-ctx.Done():+			w.stopAll()+			return+		case <-ticker.C:+			if fasttime.UnixTimestamp()-atomic.LoadUint64(&w.lastAccessTime) > uint64(SDCheckInterval.Seconds())*2 {+				// exit watch and stop all background watchers.+				w.stopAll()+				return+			}+			m, newIndex, err := w.getServiceNames(index)+			if err != nil {+				logger.Errorf("failed get serviceNames from consul api: err=%v", err)+				continue+			}+			// nothing changed.+			if index == newIndex {+				continue+			}+			w.mu.Lock()+			// start new services watchers.+			for svc := range m {+				if _, ok := w.services[svc]; !ok {+					ctx, cancel := context.WithCancel(ctx)+					go w.startWatchService(ctx, svc, nil)+					w.services[svc] = serviceWatch{cancel: cancel}+				}+			}+			// stop watch for removed services.+			for svc, s := range w.services {+				if _, ok := m[svc]; !ok {+					s.cancel()+					delete(w.services, svc)+				}+			}+			w.mu.Unlock()+			index = newIndex+		}+	}++}++// start watch for consul service changes.+func (w *watchConsul) startWatchService(ctx context.Context, svc string, initWait *sync.WaitGroup) {

Could you rename this function to watchForServiceUpdates?

f41gh7

comment created time in an hour

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {+	// wait time must be less, then fasthttp client deadline - its 1 minute.+	baseQueryArgs := fmt.Sprintf("?sdc=%s", url.QueryEscape(dc))+	var nodeMeta string+	if len(sdc.NodeMeta) > 0 {+		for k, v := range sdc.NodeMeta {+			nodeMeta += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+		}+	}+	if sdc.AllowStale {+		baseQueryArgs += "&stale"+	}+	wc := watchConsul{+		client:              client,+		baseQueryArgs:       baseQueryArgs,+		shouldWatchServices: sdc.Services,+		shouldWatchTags:     sdc.Tags,+		services:            make(map[string]serviceWatch),+	}++	watchServiceNames, _, err := wc.getServiceNames(0)+	if err != nil {+		return nil, err+	}+	// global context+	ctx, cancel := context.WithCancel(context.Background())+	wc.cancel = cancel+	var syncWait sync.WaitGroup+	for serviceName := range watchServiceNames {+		ctx, cancel := context.WithCancel(ctx)+		syncWait.Add(1)+		go wc.startWatchService(ctx, serviceName, &syncWait)+		wc.services[serviceName] = serviceWatch{cancel: cancel}+	}+	// wait for first init.+	syncWait.Wait()+	go wc.watchForServices(ctx)+	return &wc, nil+}++// stops all service watchers.+func (w *watchConsul) stopAll() {+	w.mu.Lock()+	for _, sw := range w.services {+		sw.cancel()+	}+	w.mu.Unlock()+}++// getServiceNames returns serviceNames and index version.+func (w *watchConsul) getServiceNames(index uint64) (map[string]struct{}, uint64, error) {+	sns := make(map[string]struct{})+	path := fmt.Sprintf("/v1/catalog/services%s", w.baseQueryArgs)+	if len(w.nodeMeta) > 0 {+		path += w.nodeMeta+	}+	data, newIndex, err := getAPIResponse(w.client, path, index)+	if err != nil {+		return nil, index, err+	}+	var m map[string][]string+	if err := json.Unmarshal(data, &m); err != nil {+		return nil, index, fmt.Errorf("cannot parse services response=%q, err=%w", data, err)+	}+	for k, tags := range m {+		if !shouldCollectServiceByName(w.shouldWatchServices, k) {+			continue+		}+		if !shouldCollectServiceByTags(w.shouldWatchTags, tags) {+			continue+		}+		sns[k] = struct{}{}+	}+	return sns, newIndex, nil+}++// listen for new services and update it.+func (w *watchConsul) watchForServices(ctx context.Context) {

Could you rename this function to watchForServiceListUpdates?

f41gh7

comment created time in an hour

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {+	// wait time must be less, then fasthttp client deadline - its 1 minute.+	baseQueryArgs := fmt.Sprintf("?sdc=%s", url.QueryEscape(dc))+	var nodeMeta string+	if len(sdc.NodeMeta) > 0 {+		for k, v := range sdc.NodeMeta {+			nodeMeta += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+		}+	}+	if sdc.AllowStale {+		baseQueryArgs += "&stale"+	}+	wc := watchConsul{+		client:              client,+		baseQueryArgs:       baseQueryArgs,+		shouldWatchServices: sdc.Services,+		shouldWatchTags:     sdc.Tags,+		services:            make(map[string]serviceWatch),+	}++	watchServiceNames, _, err := wc.getServiceNames(0)+	if err != nil {+		return nil, err+	}+	// global context+	ctx, cancel := context.WithCancel(context.Background())+	wc.cancel = cancel+	var syncWait sync.WaitGroup+	for serviceName := range watchServiceNames {+		ctx, cancel := context.WithCancel(ctx)+		syncWait.Add(1)+		go wc.startWatchService(ctx, serviceName, &syncWait)+		wc.services[serviceName] = serviceWatch{cancel: cancel}+	}+	// wait for first init.+	syncWait.Wait()+	go wc.watchForServices(ctx)+	return &wc, nil+}++// stops all service watchers.+func (w *watchConsul) stopAll() {+	w.mu.Lock()+	for _, sw := range w.services {+		sw.cancel()+	}+	w.mu.Unlock()+}++// getServiceNames returns serviceNames and index version.+func (w *watchConsul) getServiceNames(index uint64) (map[string]struct{}, uint64, error) {+	sns := make(map[string]struct{})+	path := fmt.Sprintf("/v1/catalog/services%s", w.baseQueryArgs)+	if len(w.nodeMeta) > 0 {+		path += w.nodeMeta+	}+	data, newIndex, err := getAPIResponse(w.client, path, index)+	if err != nil {+		return nil, index, err+	}+	var m map[string][]string+	if err := json.Unmarshal(data, &m); err != nil {+		return nil, index, fmt.Errorf("cannot parse services response=%q, err=%w", data, err)+	}+	for k, tags := range m {+		if !shouldCollectServiceByName(w.shouldWatchServices, k) {+			continue+		}+		if !shouldCollectServiceByTags(w.shouldWatchTags, tags) {+			continue+		}+		sns[k] = struct{}{}+	}+	return sns, newIndex, nil+}++// listen for new services and update it.+func (w *watchConsul) watchForServices(ctx context.Context) {+	ticker := time.NewTicker(*SDCheckInterval)+	defer ticker.Stop()+	var index uint64+	for {+		select {+		case <-ctx.Done():+			w.stopAll()+			return+		case <-ticker.C:+			if fasttime.UnixTimestamp()-atomic.LoadUint64(&w.lastAccessTime) > uint64(SDCheckInterval.Seconds())*2 {+				// exit watch and stop all background watchers.+				w.stopAll()+				return+			}+			m, newIndex, err := w.getServiceNames(index)+			if err != nil {+				logger.Errorf("failed get serviceNames from consul api: err=%v", err)+				continue+			}+			// nothing changed.+			if index == newIndex {+				continue+			}+			w.mu.Lock()+			// start new services watchers.+			for svc := range m {+				if _, ok := w.services[svc]; !ok {+					ctx, cancel := context.WithCancel(ctx)+					go w.startWatchService(ctx, svc, nil)+					w.services[svc] = serviceWatch{cancel: cancel}+				}+			}+			// stop watch for removed services.+			for svc, s := range w.services {+				if _, ok := m[svc]; !ok {+					s.cancel()+					delete(w.services, svc)+				}+			}+			w.mu.Unlock()+			index = newIndex+		}+	}++}++// start watch for consul service changes.+func (w *watchConsul) startWatchService(ctx context.Context, svc string, initWait *sync.WaitGroup) {+	ticker := time.NewTicker(*SDCheckInterval)+	defer ticker.Stop()+	updateServiceState := func(index uint64) uint64 {+		sns, newIndex, err := getServiceState(w.client, svc, w.baseQueryArgs, index)+		if err != nil {+			logger.Errorf("failed update service state err=%v", err)+			return index+		}+		if newIndex == index {+			return index+		}+		w.mu.Lock()+		s := w.services[svc]+		s.serviceNodes = sns+		w.services[svc] = s+		w.mu.Unlock()+		return newIndex+	}+	watchIndex := updateServiceState(0)+	// report after first sync if needed.+	if initWait != nil {+		initWait.Done()+	}+	for {+		select {+		case <-ticker.C:+			watchIndex = updateServiceState(watchIndex)+		case <-ctx.Done():

It looks like it would be better using bare stopCh <-chan struct{} instead of over-engineered ctx across the PR. See how stopCh is used in other places.

f41gh7

comment created time in an hour

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {+	// wait time must be less, then fasthttp client deadline - its 1 minute.+	baseQueryArgs := fmt.Sprintf("?sdc=%s", url.QueryEscape(dc))+	var nodeMeta string+	if len(sdc.NodeMeta) > 0 {+		for k, v := range sdc.NodeMeta {+			nodeMeta += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+		}+	}+	if sdc.AllowStale {+		baseQueryArgs += "&stale"+	}+	wc := watchConsul{+		client:              client,+		baseQueryArgs:       baseQueryArgs,+		shouldWatchServices: sdc.Services,+		shouldWatchTags:     sdc.Tags,+		services:            make(map[string]serviceWatch),+	}++	watchServiceNames, _, err := wc.getServiceNames(0)+	if err != nil {+		return nil, err+	}+	// global context+	ctx, cancel := context.WithCancel(context.Background())+	wc.cancel = cancel+	var syncWait sync.WaitGroup+	for serviceName := range watchServiceNames {+		ctx, cancel := context.WithCancel(ctx)+		syncWait.Add(1)+		go wc.startWatchService(ctx, serviceName, &syncWait)+		wc.services[serviceName] = serviceWatch{cancel: cancel}

I don't understand the purpose of this line. It has the following issues:

  • it may result in data race with the started goroutine when updating wc.services
  • it overwrites wc.services[serviceName] with empty serviceWatch.serviceNodes
f41gh7

comment created time in an hour

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {+	// wait time must be less, then fasthttp client deadline - its 1 minute.+	baseQueryArgs := fmt.Sprintf("?sdc=%s", url.QueryEscape(dc))+	var nodeMeta string+	if len(sdc.NodeMeta) > 0 {+		for k, v := range sdc.NodeMeta {+			nodeMeta += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+		}+	}+	if sdc.AllowStale {+		baseQueryArgs += "&stale"+	}+	wc := watchConsul{+		client:              client,+		baseQueryArgs:       baseQueryArgs,+		shouldWatchServices: sdc.Services,+		shouldWatchTags:     sdc.Tags,+		services:            make(map[string]serviceWatch),+	}++	watchServiceNames, _, err := wc.getServiceNames(0)+	if err != nil {+		return nil, err+	}+	// global context+	ctx, cancel := context.WithCancel(context.Background())+	wc.cancel = cancel+	var syncWait sync.WaitGroup+	for serviceName := range watchServiceNames {+		ctx, cancel := context.WithCancel(ctx)+		syncWait.Add(1)+		go wc.startWatchService(ctx, serviceName, &syncWait)+		wc.services[serviceName] = serviceWatch{cancel: cancel}+	}+	// wait for first init.+	syncWait.Wait()+	go wc.watchForServices(ctx)+	return &wc, nil+}++// stops all service watchers.+func (w *watchConsul) stopAll() {+	w.mu.Lock()+	for _, sw := range w.services {+		sw.cancel()+	}+	w.mu.Unlock()+}++// getServiceNames returns serviceNames and index version.+func (w *watchConsul) getServiceNames(index uint64) (map[string]struct{}, uint64, error) {+	sns := make(map[string]struct{})+	path := fmt.Sprintf("/v1/catalog/services%s", w.baseQueryArgs)+	if len(w.nodeMeta) > 0 {+		path += w.nodeMeta+	}+	data, newIndex, err := getAPIResponse(w.client, path, index)+	if err != nil {+		return nil, index, err+	}+	var m map[string][]string+	if err := json.Unmarshal(data, &m); err != nil {+		return nil, index, fmt.Errorf("cannot parse services response=%q, err=%w", data, err)+	}+	for k, tags := range m {+		if !shouldCollectServiceByName(w.shouldWatchServices, k) {+			continue+		}+		if !shouldCollectServiceByTags(w.shouldWatchTags, tags) {+			continue+		}+		sns[k] = struct{}{}+	}+	return sns, newIndex, nil+}++// listen for new services and update it.+func (w *watchConsul) watchForServices(ctx context.Context) {+	ticker := time.NewTicker(*SDCheckInterval)+	defer ticker.Stop()+	var index uint64+	for {+		select {+		case <-ctx.Done():+			w.stopAll()+			return+		case <-ticker.C:+			if fasttime.UnixTimestamp()-atomic.LoadUint64(&w.lastAccessTime) > uint64(SDCheckInterval.Seconds())*2 {+				// exit watch and stop all background watchers.+				w.stopAll()+				return+			}+			m, newIndex, err := w.getServiceNames(index)+			if err != nil {+				logger.Errorf("failed get serviceNames from consul api: err=%v", err)+				continue+			}+			// nothing changed.+			if index == newIndex {+				continue+			}+			w.mu.Lock()+			// start new services watchers.+			for svc := range m {+				if _, ok := w.services[svc]; !ok {+					ctx, cancel := context.WithCancel(ctx)+					go w.startWatchService(ctx, svc, nil)+					w.services[svc] = serviceWatch{cancel: cancel}+				}+			}+			// stop watch for removed services.+			for svc, s := range w.services {+				if _, ok := m[svc]; !ok {+					s.cancel()+					delete(w.services, svc)+				}+			}+			w.mu.Unlock()+			index = newIndex+		}+	}++}++// start watch for consul service changes.+func (w *watchConsul) startWatchService(ctx context.Context, svc string, initWait *sync.WaitGroup) {+	ticker := time.NewTicker(*SDCheckInterval)+	defer ticker.Stop()+	updateServiceState := func(index uint64) uint64 {+		sns, newIndex, err := getServiceState(w.client, svc, w.baseQueryArgs, index)+		if err != nil {+			logger.Errorf("failed update service state err=%v", err)+			return index+		}+		if newIndex == index {+			return index+		}+		w.mu.Lock()+		s := w.services[svc]+		s.serviceNodes = sns+		w.services[svc] = s+		w.mu.Unlock()+		return newIndex+	}+	watchIndex := updateServiceState(0)+	// report after first sync if needed.+	if initWait != nil {

The code on lines 183-184 looks very fragile. Could you refactor it, so this function doesn't accept initWait? Try applying the following pattern during initialization:

var wg sync.WaitGroup
for _, serviceName := range serviceNames {
  wg.Add(1)
  go func(serviceName string) {
      defer wg.Done()
      w.watchForServiceUpdates(serviceName)
  }(serviceName)
}
wg.Wait()
f41gh7

comment created time in an hour

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.

bachground discovery -> background service discovery for Consul

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {+	// wait time must be less, then fasthttp client deadline - its 1 minute.

It looks like this comment is irrelevant here.

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex

could you move mu field closer to services field and rename it to servicesLock? This should improve code readability a bit.

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func getDatacenter(client *discoveryutils.Client, dc string) (string, error) { 	return a.Config.Datacenter, nil } -func getAPIResponse(cfg *apiConfig, path string) ([]byte, error) {-	separator := "?"-	if strings.Contains(path, "?") {-		separator = "&"-	}-	path += fmt.Sprintf("%sdc=%s", separator, url.QueryEscape(cfg.datacenter))-	if cfg.allowStale {-		// See https://www.consul.io/api/features/consistency-		path += "&stale"-	}-	if len(cfg.nodeMeta) > 0 {-		for k, v := range cfg.nodeMeta {-			path += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+// returns ServiceNodesState and version index.+func getServiceState(client *discoveryutils.Client, svc, baseArgs string, index uint64) ([]ServiceNode, uint64, error) {+	path := fmt.Sprintf("/v1/health/service/%s%s", svc, baseArgs)+	// The /v1/health/service/:service endpoint supports background refresh caching,+	// which guarantees fresh results obtained from local Consul agent.+	// See https://www.consul.io/api-docs/health#list-nodes-for-service+	// and https://www.consul.io/api/features/caching for details.+	// Query cached results in order to reduce load on Consul cluster.+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/574 .+	path += "&cached"++	data, newIndex, err := getAPIResponse(client, path, index)+	if err != nil {+		return nil, index, err+	}+	sns, err := parseServiceNodes(data)+	if err != nil {+		return nil, index, err+	}+	return sns, newIndex, nil+}++// returns consul api response with new index version of object.+func getAPIResponse(client *discoveryutils.Client, path string, index uint64) ([]byte, uint64, error) {

Could you rename this function to getBlockingAPIResponse for improved readability?

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {

could you rename dc to datacenter? This should improve code readability a bit

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

+package consul++import (+	"context"+	"encoding/json"+	"fmt"+	"net/url"+	"sync"+	"sync/atomic"+	"time"++	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils"+)++type serviceWatch struct {+	cancel       context.CancelFunc+	serviceNodes []ServiceNode+}++// watcher for consul api, updates targets in background with long-polling.+type watchConsul struct {+	baseQueryArgs  string+	cancel         context.CancelFunc+	client         *discoveryutils.Client+	lastAccessTime uint64+	// guards services+	mu                  sync.Mutex+	nodeMeta            string+	shouldWatchServices []string+	shouldWatchTags     []string+	services            map[string]serviceWatch+}++// init new watcher and start bachground discovery.+func newWatchConsul(client *discoveryutils.Client, sdc *SDConfig, dc string) (*watchConsul, error) {+	// wait time must be less, then fasthttp client deadline - its 1 minute.+	baseQueryArgs := fmt.Sprintf("?sdc=%s", url.QueryEscape(dc))+	var nodeMeta string+	if len(sdc.NodeMeta) > 0 {+		for k, v := range sdc.NodeMeta {+			nodeMeta += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+		}+	}+	if sdc.AllowStale {+		baseQueryArgs += "&stale"+	}+	wc := watchConsul{+		client:              client,+		baseQueryArgs:       baseQueryArgs,+		shouldWatchServices: sdc.Services,+		shouldWatchTags:     sdc.Tags,+		services:            make(map[string]serviceWatch),+	}++	watchServiceNames, _, err := wc.getServiceNames(0)+	if err != nil {+		return nil, err+	}+	// global context+	ctx, cancel := context.WithCancel(context.Background())+	wc.cancel = cancel+	var syncWait sync.WaitGroup+	for serviceName := range watchServiceNames {+		ctx, cancel := context.WithCancel(ctx)+		syncWait.Add(1)+		go wc.startWatchService(ctx, serviceName, &syncWait)+		wc.services[serviceName] = serviceWatch{cancel: cancel}+	}+	// wait for first init.+	syncWait.Wait()+	go wc.watchForServices(ctx)+	return &wc, nil+}++// stops all service watchers.+func (w *watchConsul) stopAll() {+	w.mu.Lock()+	for _, sw := range w.services {+		sw.cancel()+	}+	w.mu.Unlock()+}++// getServiceNames returns serviceNames and index version.+func (w *watchConsul) getServiceNames(index uint64) (map[string]struct{}, uint64, error) {+	sns := make(map[string]struct{})+	path := fmt.Sprintf("/v1/catalog/services%s", w.baseQueryArgs)+	if len(w.nodeMeta) > 0 {+		path += w.nodeMeta+	}+	data, newIndex, err := getAPIResponse(w.client, path, index)+	if err != nil {+		return nil, index, err+	}+	var m map[string][]string+	if err := json.Unmarshal(data, &m); err != nil {+		return nil, index, fmt.Errorf("cannot parse services response=%q, err=%w", data, err)+	}+	for k, tags := range m {+		if !shouldCollectServiceByName(w.shouldWatchServices, k) {+			continue+		}+		if !shouldCollectServiceByTags(w.shouldWatchTags, tags) {+			continue+		}+		sns[k] = struct{}{}+	}+	return sns, newIndex, nil+}++// listen for new services and update it.+func (w *watchConsul) watchForServices(ctx context.Context) {+	ticker := time.NewTicker(*SDCheckInterval)+	defer ticker.Stop()+	var index uint64+	for {+		select {+		case <-ctx.Done():+			w.stopAll()+			return+		case <-ticker.C:+			if fasttime.UnixTimestamp()-atomic.LoadUint64(&w.lastAccessTime) > uint64(SDCheckInterval.Seconds())*2 {+				// exit watch and stop all background watchers.+				w.stopAll()+				return+			}+			m, newIndex, err := w.getServiceNames(index)+			if err != nil {+				logger.Errorf("failed get serviceNames from consul api: err=%v", err)+				continue+			}+			// nothing changed.+			if index == newIndex {+				continue+			}+			w.mu.Lock()+			// start new services watchers.+			for svc := range m {+				if _, ok := w.services[svc]; !ok {+					ctx, cancel := context.WithCancel(ctx)+					go w.startWatchService(ctx, svc, nil)+					w.services[svc] = serviceWatch{cancel: cancel}+				}+			}+			// stop watch for removed services.+			for svc, s := range w.services {+				if _, ok := m[svc]; !ok {+					s.cancel()+					delete(w.services, svc)+				}+			}+			w.mu.Unlock()+			index = newIndex+		}+	}++}++// start watch for consul service changes.+func (w *watchConsul) startWatchService(ctx context.Context, svc string, initWait *sync.WaitGroup) {+	ticker := time.NewTicker(*SDCheckInterval)+	defer ticker.Stop()+	updateServiceState := func(index uint64) uint64 {+		sns, newIndex, err := getServiceState(w.client, svc, w.baseQueryArgs, index)+		if err != nil {+			logger.Errorf("failed update service state err=%v", err)+			return index+		}+		if newIndex == index {+			return index+		}+		w.mu.Lock()+		s := w.services[svc]+		s.serviceNodes = sns+		w.services[svc] = s+		w.mu.Unlock()+		return newIndex+	}+	watchIndex := updateServiceState(0)+	// report after first sync if needed.+	if initWait != nil {+		initWait.Done()+	}+	for {+		select {+		case <-ticker.C:+			watchIndex = updateServiceState(watchIndex)+		case <-ctx.Done():+			return+		}+	}+}++// returns combined ServiceNodes.+func (w *watchConsul) getSNS() []ServiceNode {

Could you rename this function to getServiceNodes? This should improve readability at call site a bit.

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func GetLabels(sdc *SDConfig, baseDir string) ([]map[string]string, error) { 	if err != nil { 		return nil, fmt.Errorf("cannot get API config: %w", err) 	}-	ms, err := getServiceNodesLabels(cfg)+	sns := cfg.consulWatcher.getSNS()

It looks like this line can be moved into addServiceNodesLabels. This should reduce the resulting diff for the PR.

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func getDatacenter(client *discoveryutils.Client, dc string) (string, error) { 	return a.Config.Datacenter, nil } -func getAPIResponse(cfg *apiConfig, path string) ([]byte, error) {-	separator := "?"-	if strings.Contains(path, "?") {-		separator = "&"-	}-	path += fmt.Sprintf("%sdc=%s", separator, url.QueryEscape(cfg.datacenter))-	if cfg.allowStale {-		// See https://www.consul.io/api/features/consistency-		path += "&stale"-	}-	if len(cfg.nodeMeta) > 0 {-		for k, v := range cfg.nodeMeta {-			path += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+// returns ServiceNodesState and version index.+func getServiceState(client *discoveryutils.Client, svc, baseArgs string, index uint64) ([]ServiceNode, uint64, error) {+	path := fmt.Sprintf("/v1/health/service/%s%s", svc, baseArgs)+	// The /v1/health/service/:service endpoint supports background refresh caching,+	// which guarantees fresh results obtained from local Consul agent.+	// See https://www.consul.io/api-docs/health#list-nodes-for-service+	// and https://www.consul.io/api/features/caching for details.+	// Query cached results in order to reduce load on Consul cluster.+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/574 .+	path += "&cached"++	data, newIndex, err := getAPIResponse(client, path, index)+	if err != nil {+		return nil, index, err+	}+	sns, err := parseServiceNodes(data)+	if err != nil {+		return nil, index, err+	}+	return sns, newIndex, nil+}++// returns consul api response with new index version of object.+func getAPIResponse(client *discoveryutils.Client, path string, index uint64) ([]byte, uint64, error) {+	if index > 0 {+		path = path + "&index=" + strconv.Itoa(int(index))+	}+	path = path + fmt.Sprintf("&wait=%s", watchTime)+	getMeta := func(resp *fasthttp.Response) {+		if ind := resp.Header.Peek("X-Consul-Index"); len(ind) > 0 {+			newIndex, err := strconv.ParseUint(string(ind), 10, 64)+			if err != nil {+				logger.Errorf("failed to parse consul index: %v", err)+				return+			}+			index = newIndex

The returned index should be validated before use according to https://www.consul.io/api-docs/features/blocking#implementation-details . I.e. if the returned index is smaller than the previous index or is smaller than 1, then the returned index must be reset to 1.

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 package consul import ( 	"encoding/json" 	"fmt"-	"net/url" 	"strconv" 	"strings"  	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" ) -// getServiceNodesLabels returns labels for Consul service nodes obtained from the given cfg-func getServiceNodesLabels(cfg *apiConfig) ([]map[string]string, error) {-	sns, err := getAllServiceNodes(cfg)-	if err != nil {-		return nil, err-	}+// addServiceNodesLabels returns labels for Consul service nodes obtained from the given cfg+func addServiceNodesLabels(sns []ServiceNode, tagSeparator string) ([]map[string]string, error) {

Could you return back the function name to getServiceNodesLabels and update the comment to this function, since it becomes stale after this PR is merged?

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func concurrencyLimitChInit() { 	concurrencyLimitCh = make(chan struct{}, *maxConcurrency) } +// APIRequestParams modifies api request with given params.+type APIRequestParams struct {

Let's just pass FetchFromResponse and SetToRequest callbacks directly to GetBlockingAPIResponse. This should improve code readability a bit. Moreover, it looks like SetToRequest isn't used at the moment, so it should be enough to pass a single callback to GetBlockingAPIResponse:

func (c *Client) GetBlockingAPIResponse(path string, inspectResponse func(resp *fasthttp.Response)) ([]byte, error)
f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func concurrencyLimitChInit() { 	concurrencyLimitCh = make(chan struct{}, *maxConcurrency) } +// APIRequestParams modifies api request with given params.+type APIRequestParams struct {+	FetchFromResponse func(resp *fasthttp.Response)+	SetToRequest      func(req *fasthttp.Request)+}+ // GetAPIResponse returns response for the given absolute path. func (c *Client) GetAPIResponse(path string) ([]byte, error) {+	return c.GetAPIResponseWithParamsAndPossibleWatch(path, nil, false)+}++// GetAPIResponseWithParamsAndPossibleWatch returns response for given absolute path with modifying request and response params+// and possible long-polling watch request.+func (c *Client) GetAPIResponseWithParamsAndPossibleWatch(path string, params *APIRequestParams, useWatch bool) ([]byte, error) {

It looks like useWatch arg is redundant here, since it is set to true only if params != nil.

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func getDatacenter(client *discoveryutils.Client, dc string) (string, error) { 	return a.Config.Datacenter, nil } -func getAPIResponse(cfg *apiConfig, path string) ([]byte, error) {-	separator := "?"-	if strings.Contains(path, "?") {-		separator = "&"-	}-	path += fmt.Sprintf("%sdc=%s", separator, url.QueryEscape(cfg.datacenter))-	if cfg.allowStale {-		// See https://www.consul.io/api/features/consistency-		path += "&stale"-	}-	if len(cfg.nodeMeta) > 0 {-		for k, v := range cfg.nodeMeta {-			path += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+// returns ServiceNodesState and version index.+func getServiceState(client *discoveryutils.Client, svc, baseArgs string, index uint64) ([]ServiceNode, uint64, error) {+	path := fmt.Sprintf("/v1/health/service/%s%s", svc, baseArgs)+	// The /v1/health/service/:service endpoint supports background refresh caching,+	// which guarantees fresh results obtained from local Consul agent.+	// See https://www.consul.io/api-docs/health#list-nodes-for-service+	// and https://www.consul.io/api/features/caching for details.+	// Query cached results in order to reduce load on Consul cluster.+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/574 .+	path += "&cached"++	data, newIndex, err := getAPIResponse(client, path, index)+	if err != nil {+		return nil, index, err+	}+	sns, err := parseServiceNodes(data)+	if err != nil {+		return nil, index, err+	}+	return sns, newIndex, nil+}++// returns consul api response with new index version of object.+func getAPIResponse(client *discoveryutils.Client, path string, index uint64) ([]byte, uint64, error) {+	if index > 0 {+		path = path + "&index=" + strconv.Itoa(int(index))

It is better to write path += ... instead of path = path + ...

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func getDatacenter(client *discoveryutils.Client, dc string) (string, error) { 	return a.Config.Datacenter, nil } -func getAPIResponse(cfg *apiConfig, path string) ([]byte, error) {-	separator := "?"-	if strings.Contains(path, "?") {-		separator = "&"-	}-	path += fmt.Sprintf("%sdc=%s", separator, url.QueryEscape(cfg.datacenter))-	if cfg.allowStale {-		// See https://www.consul.io/api/features/consistency-		path += "&stale"-	}-	if len(cfg.nodeMeta) > 0 {-		for k, v := range cfg.nodeMeta {-			path += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+// returns ServiceNodesState and version index.+func getServiceState(client *discoveryutils.Client, svc, baseArgs string, index uint64) ([]ServiceNode, uint64, error) {+	path := fmt.Sprintf("/v1/health/service/%s%s", svc, baseArgs)+	// The /v1/health/service/:service endpoint supports background refresh caching,+	// which guarantees fresh results obtained from local Consul agent.+	// See https://www.consul.io/api-docs/health#list-nodes-for-service+	// and https://www.consul.io/api/features/caching for details.+	// Query cached results in order to reduce load on Consul cluster.+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/574 .+	path += "&cached"++	data, newIndex, err := getAPIResponse(client, path, index)+	if err != nil {+		return nil, index, err+	}+	sns, err := parseServiceNodes(data)+	if err != nil {+		return nil, index, err+	}+	return sns, newIndex, nil+}++// returns consul api response with new index version of object.

Could you add an url https://www.consul.io/api-docs/features/blocking to this comment? This url explains blocking queries in Consul, so it may be useful for future readers of this code.

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func concurrencyLimitChInit() { 	concurrencyLimitCh = make(chan struct{}, *maxConcurrency) } +// APIRequestParams modifies api request with given params.+type APIRequestParams struct {+	FetchFromResponse func(resp *fasthttp.Response)+	SetToRequest      func(req *fasthttp.Request)+}+ // GetAPIResponse returns response for the given absolute path. func (c *Client) GetAPIResponse(path string) ([]byte, error) {+	return c.GetAPIResponseWithParamsAndPossibleWatch(path, nil, false)+}++// GetAPIResponseWithParamsAndPossibleWatch returns response for given absolute path with modifying request and response params+// and possible long-polling watch request.+func (c *Client) GetAPIResponseWithParamsAndPossibleWatch(path string, params *APIRequestParams, useWatch bool) ([]byte, error) {

Could you rename this function to GetBlockingAPIResponse?

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func getDatacenter(client *discoveryutils.Client, dc string) (string, error) { 	return a.Config.Datacenter, nil } -func getAPIResponse(cfg *apiConfig, path string) ([]byte, error) {-	separator := "?"-	if strings.Contains(path, "?") {-		separator = "&"-	}-	path += fmt.Sprintf("%sdc=%s", separator, url.QueryEscape(cfg.datacenter))-	if cfg.allowStale {-		// See https://www.consul.io/api/features/consistency-		path += "&stale"-	}-	if len(cfg.nodeMeta) > 0 {-		for k, v := range cfg.nodeMeta {-			path += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+// returns ServiceNodesState and version index.+func getServiceState(client *discoveryutils.Client, svc, baseArgs string, index uint64) ([]ServiceNode, uint64, error) {+	path := fmt.Sprintf("/v1/health/service/%s%s", svc, baseArgs)+	// The /v1/health/service/:service endpoint supports background refresh caching,+	// which guarantees fresh results obtained from local Consul agent.+	// See https://www.consul.io/api-docs/health#list-nodes-for-service+	// and https://www.consul.io/api/features/caching for details.+	// Query cached results in order to reduce load on Consul cluster.+	// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/574 .+	path += "&cached"++	data, newIndex, err := getAPIResponse(client, path, index)+	if err != nil {+		return nil, index, err+	}+	sns, err := parseServiceNodes(data)+	if err != nil {+		return nil, index, err+	}+	return sns, newIndex, nil+}++// returns consul api response with new index version of object.+func getAPIResponse(client *discoveryutils.Client, path string, index uint64) ([]byte, uint64, error) {+	if index > 0 {

It is safe setting the initial index to 1 according to https://www.consul.io/api-docs/features/blocking#implementation-details :

It is always safe to use an index of 1 to wait for updates when the data being requested doesn't exist yet, so clients should sanity check that their index is at least 1 after each blocking response is handled to be sure they actually block on the next request.

f41gh7

comment created time in 2 hours

Pull request review commentVictoriaMetrics/VictoriaMetrics

Changes consul discovery api

 func getDatacenter(client *discoveryutils.Client, dc string) (string, error) { 	return a.Config.Datacenter, nil } -func getAPIResponse(cfg *apiConfig, path string) ([]byte, error) {-	separator := "?"-	if strings.Contains(path, "?") {-		separator = "&"-	}-	path += fmt.Sprintf("%sdc=%s", separator, url.QueryEscape(cfg.datacenter))-	if cfg.allowStale {-		// See https://www.consul.io/api/features/consistency-		path += "&stale"-	}-	if len(cfg.nodeMeta) > 0 {-		for k, v := range cfg.nodeMeta {-			path += fmt.Sprintf("&node-meta=%s", url.QueryEscape(k+":"+v))+// returns ServiceNodesState and version index.+func getServiceState(client *discoveryutils.Client, svc, baseArgs string, index uint64) ([]ServiceNode, uint64, error) {+	path := fmt.Sprintf("/v1/health/service/%s%s", svc, baseArgs)+	// The /v1/health/service/:service endpoint supports background refresh caching,

I think cached query arg isn't needed for long polling.

f41gh7

comment created time in 2 hours

issue commentprometheus/prometheus

Setup packagecloud and distribute official debs

I think the way to go is to run our own prometheus repository (similar what Grafana is doing) in order to have up2date versions on Debian.

What is the latest status for this btw?

gouthamve

comment created time in 2 hours

push eventVictoriaMetrics/VictoriaMetrics

Nikolay

commit sha e4e33cb7573b2cfa21f18b5169354616305fb6dc

fixes checksum calculation (#928) * fixes checksum calculation, 'for' rule param wasnt marshal properly during checksum calculation * fixes error

view details

push time in 2 hours

pull request commentVictoriaMetrics/VictoriaMetrics

fixes checksum calculation

Thanks!

f41gh7

comment created time in 2 hours

more