Pub/Sub messages pull not receiving after a few hours


#1

Hi I run lora open server on GCP containers and have some trouble with hanging messages pulling from pub/sub.
Deployed version of applications are:
docker:loraserver/lora-app-server:2.3.0
docker:loraserver/loraserver:2.3.0 - two separate instances for diffrent frequencies
docker:loraserver/lora-geo-server:2.0.0
binary:lora-gateway-bridge_2.6.0_linux_armv7.tar.gz - installed on Tektelic gateways

Application is working at least 10 hours ok. After that messages do not come (no information about messages in logs). Lora server is working (after going to app server and going to gateway information new logs appear in). After restart of lora server old messages persisted in subscription are coming and consumed.

It seams that pulling from pub/sub is hold (messages lives in pub/sub subscription). Lora server seams to work as it sends information to app server about gateways (only pulling of messages seams to not work).

What could be the cause of this?

Logs level 4 from loraserver:
// about 10 hours after start
time=“2018-11-05T18:51:58Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-05T18:51:58Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-05T18:52:02Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-05T18:52:02Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-05T18:52:28Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-05T18:52:28Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-05T18:52:33Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-05T18:52:33Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-05T18:52:58Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-05T18:52:58Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-05T18:53:03Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-05T18:53:03Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-05T18:53:28Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-05T18:53:28Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-05T18:53:33Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-05T18:53:33Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-05T18:53:58Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-05T18:53:58Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
// after 10 hours messages from pub/sub are not received
// no logs for 12 hours

// this logs appears after going to app lora server
time=“2018-11-06T07:47:30Z” level=info msg=“finished unary call with code OK” grpc.code=OK grpc.method=GetGatewayStats grpc.service=ns.NetworkServerService grpc.start_time=“2018-11-06T07:47:30Z” grpc.time_ms=8.39 peer.address=“99.99.99.99:53248” span.kind=server system=grpc
time=“2018-11-06T07:47:30Z” level=info msg=“finished unary call with code OK” grpc.code=OK grpc.method=GetGatewayStats grpc.service=ns.NetworkServerService grpc.start_time=“2018-11-06T07:47:30Z” grpc.time_ms=9.017 peer.address=“99.99.99.99:53248” span.kind=server system=grpc
time=“2018-11-06T07:48:18Z” level=info msg=“finished unary call with code OK” grpc.code=OK grpc.method=GetGatewayStats grpc.service=ns.NetworkServerService grpc.start_time=“2018-11-06T07:48:18Z” grpc.time_ms=9.8 peer.address=“99.99.99.99:53248” span.kind=server system=grpc
time=“2018-11-06T07:48:18Z” level=info msg=“finished unary call with code OK” grpc.code=OK grpc.method=GetGatewayStats grpc.service=ns.NetworkServerService grpc.start_time=“2018-11-06T07:48:18Z” grpc.time_ms=10.134 peer.address=“99.99.99.99:53248” span.kind=server system=grpc
// after restart of lora server only (not app server) old messages are consumed

time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message published” command=down duration=155.437484ms gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:11Z” level=info msg=“downlink-frames saved” dev_eui=cccccccccccccccc token=34913
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message published” command=down duration=122.844502ms gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:55:11Z” level=info msg=“downlink-frames saved” dev_eui=cccccccccccccccc token=14759
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-06T07:55:11Z” level=info msg=“packet(s) collected” dev_eui=cccccccccccccccc gw_count=1 gw_ids=bbbbbbbbbbbbbbbb mtype=JoinRequest
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:55:11Z” level=info msg=“device-queue flushed” dev_eui=cccccccccccccccc
time=“2018-11-06T07:55:11Z” level=info msg=“device-session saved” dev_addr=01989522 dev_eui=cccccccccccccccc
time=“2018-11-06T07:55:11Z” level=info msg=“device-activation created” dev_eui=cccccccccccccccc id=83
time=“2018-11-06T07:55:11Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:55:11Z” level=info msg=“gateway/gcp_pub_sub: message published” command=down duration=29.364848ms gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:11Z” level=info msg=“downlink-frames saved” dev_eui=cccccccccccccccc token=16289
time=“2018-11-06T07:55:13Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=ack
time=“2018-11-06T07:55:13Z” level=info msg=“gateway/gcp_pub_sub: message published” command=down duration=30.86427ms gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:13Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=ack
time=“2018-11-06T07:55:13Z” level=info msg=“gateway/gcp_pub_sub: message published” command=down duration=24.72833ms gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:14Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=ack
time=“2018-11-06T07:55:15Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=ack
time=“2018-11-06T07:55:15Z” level=info msg=“gateway/gcp_pub_sub: message published” command=down duration=22.674543ms gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:16Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=ack
time=“2018-11-06T07:55:17Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=ack
time=“2018-11-06T07:55:31Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-06T07:55:31Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:55:32Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-06T07:55:32Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa
time=“2018-11-06T07:56:01Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=bbbbbbbbbbbbbbbb type=stats
time=“2018-11-06T07:56:01Z” level=info msg=“gateway updated” gateway_id=bbbbbbbbbbbbbbbb
time=“2018-11-06T07:56:03Z” level=info msg=“gateway/gcp_pub_sub: message received” gateway_id=aaaaaaaaaaaaaaaa type=stats
time=“2018-11-06T07:56:03Z” level=info msg=“gateway updated” gateway_id=aaaaaaaaaaaaaaaa

// and new messages are coming too