Error: Endpoint read failed at /user_code/node_modules/@google-cloud/logging/node_modules/grpc/src/node/src/client.js:569:15 #2438

deremer · 2017-07-06T02:46:28Z

We are using Stack Driver to log errors for Firebase functions. We unpredictably get the error in the title. Typically it happens the first to we try to log an error after a function re-deploys. Subsequent error log writes will go through to StackDriver without issue, but occasionally we'll get the error again.

We're using "@google-cloud/logging": "^1.0.2", and deployed via Firebase Functions.

Below is our module that implements the logging...

Anybody have any idea what is causing this?

const Logging = require('@google-cloud/logging');

// To keep on top of errors, we should raise a verbose error report with Stackdriver rather
// than simply relying on console.error. This will calculate users affected + send you email
// alerts, if you've opted into receiving them.
const logging = Logging();

// This is the name of the StackDriver log stream that will receive the log
// entry. This name can be any valid log stream name, but must contain "err"
// in order for the error to be picked up by StackDriver Error Reporting.
const logName:string = 'errors-fb-func';

// Enum of StackDriver severities
enum Severities {
  ERROR = 500, // ERROR	(500) Error events are likely to cause problems.
  CRITICAL = 600, // CRITICAL	(600) Critical events cause more severe problems or outages.
  ALERT = 700, // ALERT	(700) A person must take an action immediately.
  EMERGENCY = 800 // EMERGENCY	(800) One or more systems are unusable.
}

// Provide an error object and and optional context object
export function log(err:Error, logLevel:number=Severities.ERROR, user?:string): Promise<any> {
  // https://cloud.google.com/functions/docs/monitoring/error-reporting#advanced_error_reporting

  const FUNCTION_NAME = process.env.FUNCTION_NAME;
  const log = logging.log(logName);

  const metadata = {
    // MonitoredResource
    // See https://cloud.google.com/logging/docs/api/ref_v2beta1/rest/v2beta1/MonitoredResource
    resource: {
      // MonitoredResource.type
      type: 'cloud_function',
      // MonitoredResource.labels
      labels: {
       function_name: FUNCTION_NAME
      }
    },
    severity: logLevel
  };

  const context:any = {};
  if (user && typeof user === 'string') {
    // ErrorEvent.context.user
    context.user = user;
  }

  // ErrorEvent
  // See https://cloud.google.com/error-reporting/reference/rest/v1beta1/ErrorEvent
  let structPayload:any = {
    // ErrorEvent.serviceContext
    serviceContext: {
      // ErrorEvent.serviceContext.service
      service: `cloud_function:${FUNCTION_NAME}`,
      // ErrorEvent.serviceContext.version
      resourceType: 'cloud_function'
    },

  };

  if (context) {
    // ErrorEvent.context
    structPayload.context = context;
  }

  structPayload.message = getMsgForError(err);

  return writeLog(log, metadata, structPayload);
}

function getMsgForError(error:Error): string {
  // https://cloud.google.com/functions/docs/monitoring/error-reporting#advanced_error_reporting
  // ErrorEvent.message
  if (error instanceof Error && typeof error.stack === 'string') {
    return error.stack;
  } else if (typeof error === 'string') {
    return error;
  } else if (typeof error.message === 'string') {
    return error.message;
  } else {
    logFatalError(error, "Error message type not supported");
    return "";
  }
}

function writeLog(log:any, metadata:any, structPayload:any): Promise<any> {
  console.log(metadata);
  console.log(structPayload);
  // Write the error log entry
  return new Promise((resolve, reject) => {
    try {
      log.write(log.entry(metadata, structPayload), (error:any) => {
        if (error) {
          logFatalError(error);
          reject(error);
        }
        resolve();
      });
    } catch(error) {
      reject(error);
    }
  });
}

// Utility function to log error if Logger fails
function logFatalError(error:Error, msg?:string): void {
  console.error(error, msg);
  throw error;
}

// error, crtical, alert, emergency, accept an Error object
// And then set error.stack as the message
export function error(error:Error, user?:string): Promise<any> {
  return log(error, Severities.ERROR, user);
}

export function critical(error:Error, user?:string): Promise<any> {
  return log(error, Severities.CRITICAL, user);
}

export function alert(error:Error, user?:string): Promise<any> {
  return log(error, Severities.ALERT, user);
}

export function emergency(error:Error, user?:string): Promise<any> {
  return log(error, Severities.EMERGENCY, user);
}

The text was updated successfully, but these errors were encountered:

stephenplusplus · 2017-07-08T18:08:37Z

@ofrobots ever run into anything like this before?

deremer · 2017-07-10T14:14:20Z

As an update, it seems to be more of an issue the first time a call is made to Stackdriver after a Firebase function is updated and restarted. Subsequent requests seem to go through ok. Although it does re-emerge sporadically. We really haven't been able to get to the bottom of it.

Unfortunately, this causes the functions to timeout and throw an "Unhandled Exception", which is not going to work for us in production. So unless we can resolve this issue, we will have to replace Stackdriver with another logger.

lukesneeringer · 2017-07-10T15:10:31Z

I too am hoping @ofrobots will have an idea. :-)

@deremer Can you provide the actual stack trace?

deremer · 2017-07-10T15:28:10Z

Here is one log entry where we see this...

{
 insertId:  "000000-6ca1de21-3818-4c66-a620-a8a02227e1a7"   
 labels: {
  execution_id:  "134720017564250"    
 }
 logName:  "projects/REDACTED/logs/cloudfunctions.googleapis.com%2Fcloud-functions"   
 receiveTimestamp:  "2017-07-10T00:12:20.477133425Z"   
 resource: {
  labels: {
   function_name:  "LotteryActions"     
   project_id:  "REDACTED"     
   region:  "us-central1"     
  }
  type:  "cloud_function"    
 }
 severity:  "ERROR"   
 textPayload:  "{ Error: Endpoint read failed
    at /user_code/node_modules/@google-cloud/logging/node_modules/grpc/src/node/src/client.js:569:15 code: 14, metadata: Metadata { _internal_repr: {} } } undefined"   
 timestamp:  "2017-07-10T00:12:14.178Z"   
}

deremer · 2017-07-10T15:30:10Z

I've been wondering if we're setting metadata incorrectly or something because of the metadata: Metadata { _internal_repr: {} } } undefined

But here's another one where it doesn't have that...

{
 insertId:  "000001-901cb19e-a4cb-4c78-a339-360408edb7d8"   
 labels: {
  execution_id:  "134720017564250"    
 }
 logName:  "projects/REDACTED/logs/cloudfunctions.googleapis.com%2Fcloud-functions"   
 receiveTimestamp:  "2017-07-10T00:12:20.477133425Z"   
 resource: {
  labels: {
   function_name:  "LotteryActions"     
   project_id:  "REDACTED"     
   region:  "us-central1"     
  }
  type:  "cloud_function"    
 }
 severity:  "ERROR"   
 textPayload:  "Error: Endpoint read failed
    at /user_code/node_modules/@google-cloud/logging/node_modules/grpc/src/node/src/client.js:569:15"   
 timestamp:  "2017-07-10T00:12:14.179Z"   
}

ofrobots · 2017-07-10T21:47:09Z

/cc @GoogleCloudPlatform/node-team @murgatroid99

ofrobots · 2017-07-10T21:50:36Z

The error indicates a TCP read error – which suggests that sporadically there are some network issues in the environment?

ofrobots · 2017-07-10T21:54:04Z

@swalkowski @jasonpolites: any ideas if this could be an issue on Functions?

deremer · 2017-07-10T21:54:34Z

@ofrobots Just so future readers are 100% clear on this... This code is being run as Firebase Functions, so it's 100% within the Google Cloud environment.

murgatroid99 · 2017-07-10T21:58:14Z

Note: the part with the metadata is probably irrelevant. That is trailing response metadata, which is often empty.

murgatroid99 · 2017-07-10T23:01:39Z

It seems like the gRPC client successfully establishes a TCP connection (if it didn't, the error would say "Connect failed"), then the first attempt to read from that TCP connection fails for some reason, and the request it was a part of failed.

It might help to see more details of the TCP error, but that doesn't propagate far enough up the stack. It is possible to see it (along with a torrent of other information) by setting the environment variables GRPC_TRACE=tcp GRPC_VERBOSITY=DEBUG. The trace information from that gets dumped to stderr, so if you have access to that, that could help (though it could also expose too much information).

deremer · 2017-07-10T23:04:57Z

@murgatroid99 any idea how to set an env var like that with Firebase Functions...or through the Cloud Function console?

Put it in runtime.config.json?

murgatroid99 · 2017-07-10T23:57:36Z

I am not familiar with Cloud Functions, but you should just be able to do process.env.GRPC_TRACE='tcp'; and process.env.GRPC_VERBOSITY=DEBUG'; in the JS file before loading the logging module.

deremer · 2017-07-11T21:00:50Z

We were experimenting with a few things and one thing we try is moving const logging = Logging(); inside the log() function. Since we've done that we haven't seen the error.

If ok, can we leave this open for a couple of days so that I can report back on if that fixed it or not?

If that is in fact the issue, then Firebase may want to update their docs: https://firebase.google.com/docs/functions/reporting-errors#importing_dependencies

deremer · 2017-07-14T15:57:31Z

Update: since we moved const logging = Logging(); inside the log() function, we have not seen this error.

So I believe we have enough sample data to declare that a fix. Now whether or not that TCP error should in fact happen, is up to you guys to determine :-)

I'm good to close this, but I'll let you guys at Google decide if it should be closed, since you probably want to get the Firebase guys to update their docs (https://firebase.google.com/docs/functions/reporting-errors#importing_dependencies) and their sample (https://github.com/firebase/functions-samples/blob/master/stripe/functions/index.js)

I also logged an issue pointing to this issue for firebase: firebase/functions-samples#194

jasonpolites · 2017-07-15T02:21:26Z

There are some related issues with other GCP node modules related to auth tokens expiring, which fits with the "fix" of moving initialization to function-scope, but does not fit with the "we only see if after re-deploy". On the other hand, network issues experienced at startup only might be related to slower-than-expected spool up of network resources, but that doesn't fit with the behavior of the "fix". More investigation on our (Google) end needed methinks.

ghost · 2017-07-18T02:17:19Z

For me 'Error: "Endpoint read failed"' is often reported in Stackdriver when I call the Spanner API from a GCP Function which has been erroneously deployed with local node_modules from my computer (instead of allowing GCP to create the dependencies).

deremer · 2017-07-18T14:38:33Z

We are deploying via Firebase and accoriding to this link, "If you are deploying through the Firebase CLI, the local node_modules folder will be ignored, and you must specify your dependencies in package.json"

ghost · 2017-07-18T19:24:59Z

I get the error occasionally even with the correct deps. Yesterday was especially bad. This morning the error has been replaced with '"Function execution took 242 ms, finished with status: 'connection error'"' and the last log before that is the call to database.runTransaction. I do some more investigation and post the results here

ghost · 2017-07-23T23:04:32Z

I have issues using GCP Functions and Spanner with JS client 0.7.1. I sometimes receive errors invoking GCP Functions through Postman or using the testing screen from within the GCP Functions web console. The same Function works fine if I invoke it locally during periods when errors are occurring with the remote code. This happens around 5-10% of the time but it is not consistent, sometimes there are periods of no errors. I am initializing the JS client using these instructions "https://cloud.google.com/spanner/docs/getting-started/nodejs/". At the moment the error seems to happen when the Function is cold i.e. after running it a few times with no errors, wait for 10min or so, run it again and see the errors, run it a few more times and see no errors.

The errors are:
database.run
Error: Endpoint read failed","error_code":14,"query":"SELECT
Error: Session not found: projects/REDACTED/instances/lc-data/databases/lc-data-uat/sessions/AFZejKDAPJhREDACTEDYnFRwi7iMjT0_","error_code":5,"query":"SELECT

database.runTransaction
Error: Endpoint read failed","error_code":14
connection error'"

stephenplusplus · 2017-08-08T13:14:22Z

Thanks to all for explaining their situations in great detail. Please correct any mistakes I might make in my assumptions:

These problems can be considered the same, in that they are all various forms of a connection error in GCF environments
The best fixes that the user can do:
- Move the @google-cloud/* module instantiation code to within the function
- Do not deploy your local node_modules directory

@8357753 -- you elaborated on your situation further in #2489, and a few others are involved in a discussion, so we'll pursue the specifics there.

@jasonpolites -- do you still think there needs to be an investigation? Without much experience with GCF, my best interpretation of the situation now is that GCF is doing some type of caching of either old code or stale gRPC connections. If that's the case, I don't think we can solve much from GCN. If that's not the case, any help getting me up to speed on how I might be looking at this wrong would be appreciated. Thanks!

JustinBeckwith · 2019-02-23T00:20:51Z

Yeah, there's an internal bug on cloud functions here. They're doing some things to increase a timeout, but this issue absolutely still exists.

rhodgkins · 2019-02-27T12:10:26Z

@JustinBeckwith does that also apply to GAE NodeJS8 Standard Environment as well?

JustinBeckwith · 2019-02-27T14:35:34Z

Possibly. Are you seeing a similar callstack in an app engine standard app? cc @steren

rhodgkins · 2019-02-27T19:52:23Z

I can't remember/find the stack trace I'm afraid (I starred this issue after this comment
#2438 (comment)) :(

I originally came here because of googleapis/nodejs-logging-winston#190, and just reverted back to a previous version of @google-cloud/logging-winston.

kyle-mccarthy · 2019-03-06T22:51:09Z

@JustinBeckwith fwiw I am having this issue in GAE Standard with node 10. Do you know if there is an open issue within google/or their issue tracker? I am using the nodejs-logging-winston and see the TCP read failed and at the same time my redis connection to a GCE instance also has problems and gets a ECONNRESET.

- 2019-03-06 14:38:04.758 CST    [ioredis] Error: read ECONNRESET ... at TCP.onStreamRead (internal/stream_base_commons.js:111:27)
- 2019-03-06 14:43:27.776 CST    Winston encountered an error - Error: 14 UNAVAILABLE: TCP Read failed

Is it an issue in how internal communication is handled?

deremer · 2019-06-11T13:29:45Z

Not sure if this is unrelated, since the original issue was related to logging

Recently, we've been getting these more in our Node 8 cloud functions. We've isolated it down to a part of the code that writes to Firestore.

It may be coincidental, but there seems to be a pattern. We tend to get this error in batches (i.e., many of them around the same time and then it stops). It also tends to happen around the time our app's load spikes. I have a hypothesis that during traffic bursts there may be some issue setting up resources as instances our scaled? Just a wild guess...

Error: 14 UNAVAILABLE: TCP Read failed
at Object.exports.createStatusError (common.js:87)
at Object.onReceiveStatus (/srv/node_modules/grpc/src/client_interceptors.js:1188)
at InterceptingListener._callNext (/srv/node_modules/grpc/src/client_interceptors.js:564)
at InterceptingListener.onReceiveStatus (/srv/node_modules/grpc/src/client_interceptors.js:614)
at callback (/srv/node_modules/grpc/src/client_interceptors.js:841)

JustinBeckwith · 2019-08-23T21:42:27Z

Greetings folks! I think we've resolved this issue. With the latest and greatest version of all libraries, you should now be getting a dependency on @grpc/grpc-js instead of grpc. This module is rewritten from the ground up, and uses the native HTTP/2 support in node core. We think the combination of this new module, and some fixes to timeouts on the Cloud Functions/App Engine backend should resolve the issue.

~~If you are running into this still ... please let us know! Just make sure you're using the latest version of the module, and that you post a stack trace (as above).~~

Edit: I was wrong

juanparadox · 2019-09-11T13:00:29Z

Any work arounds for this issue?

JustinBeckwith · 2019-09-11T15:12:54Z

Not currently, but we're making progress on a fix

alexander-fenster · 2019-09-11T15:58:15Z

Hi folks,

We made some changes in @grpc/grpc-js grpc/grpc-node#1021 that might fix the problem described here. They are releases as v0.5.3. Can I bother you updating, checking if the issue is still here with v0.5.3, and sending the stack trace back if something still does not work?

Thanks!

bcoe · 2019-09-17T21:20:39Z

@deremer 👋 hey, I just wanted to check in and see if @alexander-fenster's changes in @grpc/grpc-js helped?

deremer · 2019-09-18T00:04:51Z

@deremer 👋 hey, I just wanted to check in and see if @alexander-fenster's changes in @grpc/grpc-js helped?

We haven’t had a release to deploy with updated dependencies yet. I will report back once we do so and get some data. Lots of other folks on here too so they might see results sooner.

bcoe · 2019-10-08T19:03:28Z

@deremer @juanparadox @kyle-mccarthy, can you folks confirm that the latest release of @google-cloud/logging has addressed this issue for you?

bcoe · 2019-10-09T16:52:07Z

@deremer I'm going to go ahead and close this issue, because it's been almost a month, and seems like things have been working okay for folks.

Please feel free to re-open if you are continuing to bump into these issues.

felixmpaulus · 2019-12-16T22:02:00Z

This error popped up this morning for the first time.

{ Error: 14 UNAVAILABLE: TCP Read failed
    at Object.exports.createStatusError (/srv/node_modules/grpc/src/common.js:91:15)
    at Object.onReceiveStatus (/srv/node_modules/grpc/src/client_interceptors.js:1204:28)
    at InterceptingListener._callNext (/srv/node_modules/grpc/src/client_interceptors.js:568:42)
    at InterceptingListener.onReceiveStatus (/srv/node_modules/grpc/src/client_interceptors.js:618:8)
    at callback (/srv/node_modules/grpc/src/client_interceptors.js:845:24)
  code: 14,
  metadata: Metadata { _internal_repr: {} },
  details: 'TCP Read failed' }

JustinBeckwith · 2019-12-16T22:13:29Z

Greetings! Could you open a new issue with details?

stephenplusplus added the api: logging Issues related to the Cloud Logging API. label Jul 8, 2017

deremer mentioned this issue Jul 14, 2017

logging = require('@google-cloud/logging')(); leads to issues if declared outside function firebase/functions-samples#194

Closed

stephenplusplus marked this as a duplicate of #2489 Jul 24, 2017

stephenplusplus mentioned this issue Jul 24, 2017

Spanner node.js client used with GCP Functions generates errors when GCP Function is cold #2489

Closed

landrito added type: bug Error or flaw in code with unintended results or allowing sub-optimal usage patterns. priority: p1 Important issue which blocks shipping the next release. Will be fixed prior to next release. status: acknowledged labels Aug 7, 2017

landrito assigned stephenplusplus Aug 7, 2017

yoshi-automation added the triage me I really want to be triaged. label Feb 22, 2019

JustinBeckwith self-assigned this Mar 12, 2019

yoshi-automation removed the triage me I really want to be triaged. label Mar 12, 2019

DominicKramer mentioned this issue Mar 15, 2019

Error: Attempt to write logs with no transports googleapis/nodejs-logging#427

Closed

JustinBeckwith added priority: p2 Moderately-important priority. Fix may not be included in next release. and removed priority: p1 Important issue which blocks shipping the next release. Will be fixed prior to next release. labels Jun 17, 2019

JustinBeckwith closed this as completed Aug 23, 2019

JustinBeckwith reopened this Aug 23, 2019

JustinBeckwith assigned murgatroid99 and alexander-fenster and unassigned JustinBeckwith Aug 24, 2019

bcoe closed this as completed Oct 9, 2019

bcoe removed the 🚨 This issue needs some love. label Oct 9, 2019

googleapis locked as resolved and limited conversation to collaborators Dec 16, 2019

Error: Endpoint read failed at /user_code/node_modules/@google-cloud/logging/node_modules/grpc/src/node/src/client.js:569:15 #2438

Error: Endpoint read failed at /user_code/node_modules/@google-cloud/logging/node_modules/grpc/src/node/src/client.js:569:15 #2438

Comments

deremer commented Jul 6, 2017 • edited Loading

stephenplusplus commented Jul 8, 2017

deremer commented Jul 10, 2017

lukesneeringer commented Jul 10, 2017

deremer commented Jul 10, 2017 • edited Loading

deremer commented Jul 10, 2017

ofrobots commented Jul 10, 2017 • edited Loading

ofrobots commented Jul 10, 2017

ofrobots commented Jul 10, 2017

deremer commented Jul 10, 2017

murgatroid99 commented Jul 10, 2017

murgatroid99 commented Jul 10, 2017

deremer commented Jul 10, 2017

murgatroid99 commented Jul 10, 2017

deremer commented Jul 11, 2017

deremer commented Jul 14, 2017

jasonpolites commented Jul 15, 2017

ghost commented Jul 18, 2017

deremer commented Jul 18, 2017

ghost commented Jul 18, 2017

ghost commented Jul 23, 2017 • edited by ghost Loading

stephenplusplus commented Aug 8, 2017

JustinBeckwith commented Feb 23, 2019

rhodgkins commented Feb 27, 2019

JustinBeckwith commented Feb 27, 2019

rhodgkins commented Feb 27, 2019

kyle-mccarthy commented Mar 6, 2019 • edited Loading

deremer commented Jun 11, 2019 • edited Loading

JustinBeckwith commented Aug 23, 2019 • edited Loading

juanparadox commented Sep 11, 2019

JustinBeckwith commented Sep 11, 2019

alexander-fenster commented Sep 11, 2019

bcoe commented Sep 17, 2019

deremer commented Sep 18, 2019

bcoe commented Oct 8, 2019

bcoe commented Oct 9, 2019

felixmpaulus commented Dec 16, 2019 • edited Loading

JustinBeckwith commented Dec 16, 2019

deremer commented Jul 6, 2017 •

edited

Loading

deremer commented Jul 10, 2017 •

edited

Loading

ofrobots commented Jul 10, 2017 •

edited

Loading

ghost commented Jul 23, 2017 •

edited by ghost

Loading

kyle-mccarthy commented Mar 6, 2019 •

edited

Loading

deremer commented Jun 11, 2019 •

edited

Loading

JustinBeckwith commented Aug 23, 2019 •

edited

Loading

felixmpaulus commented Dec 16, 2019 •

edited

Loading