Analyzing Cloudflare Logs with AWS Athena

Table DDL

CREATE EXTERNAL TABLE cloudflare_logs (
CacheCacheStatus string,
CacheResponseBytes int,
CacheResponseStatus int,
CacheTieredFill boolean,
ClientASN int,
ClientCountry string,
ClientDeviceType string,
ClientIP string,
ClientIPClass string,
ClientRequestBytes int,
ClientRequestHost string,
ClientRequestMethod string,
ClientRequestPath string,
ClientRequestProtocol string,
ClientRequestReferer string,
ClientRequestURI string,
ClientRequestUserAgent string,
ClientSSLCipher string,
ClientSSLProtocol string,
ClientSrcPort int,
EdgeColoID int,
EdgeEndTimestamp string,
EdgePathingOp string,
EdgePathingSrc string,
EdgePathingStatus string,
EdgeRateLimitAction string,
EdgeRateLimitID int,
EdgeRequestHost string,
EdgeResponseBytes int,
EdgeResponseCompressionRatio double,
EdgeResponseContentType string,
EdgeResponseStatus int,
EdgeServerIP string,
EdgeStartTimestamp string,
FirewallMatchesActions ARRAY < string >,
FirewallMatchesSources ARRAY < string >,
FirewallMatchesRuleIDs ARRAY < string >,
OriginIP string,
OriginResponseBytes int,
OriginResponseHTTPExpires string,
OriginResponseHTTPLastModified string,
OriginResponseStatus int,
OriginResponseTime bigint,
OriginSSLProtocol string,
ParentRayID string,
RayID string,
SecurityLevel string,
WAFAction string,
WAFFlags string,
WAFMatchedVar string,
WAFProfile string,
WAFRuleID string,
WAFRuleMessage string,
WorkerCPUTime int,
WorkerStatus string,
WorkerSubrequest boolean,
WorkerSubrequestCount int,
ZoneID bigint)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' LOCATION 's3://my-cloudflare-logs/'

Querying

SELECT count(*) as requests,
c.clientrequestprotocol
FROM "cloudflare_logs"."cloudflare_logs" c
GROUP BY c.clientrequestprotocol
ORDER BY count(*) DESC limit 10;
requests clientrequestprotocol
1 15063737 HTTP/2
2 6842951 HTTP/1.1
3 4342 HTTP/1.0
SELECT avg(clientrequestbytes),
edgecoloid
FROM "cloudflare_logs"."cloudflare_logs"
-- Assuming you are using the default date format with Logpush
WHERE date_trunc('day', from_iso8601_timestamp(edgestarttimestamp)) = current_date
GROUP BY edgecoloid
ORDER BY avg(clientrequestbytes) ASC;

--

--

--

I write about Python, Go, Node.js, cloud infrastructure, and data. https://pipefail.dev

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

Blog | Watermarking your Video or Livestream

Manage your Hubspot and get lead/contact notifications in your Slack!

Leaning pandas. Data manipulation and visualization. (COVID-19 in — Scotland statistics)

AngstromCTF

Cross-Platform Mobile App Development: 3 Things You Should Know About Ionic Framework

Story of a “REST” API

What are the differences between static and dynamic libraries?

Network Fundamentals — Part 2

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Nicholas Duffy

Nicholas Duffy

I write about Python, Go, Node.js, cloud infrastructure, and data. https://pipefail.dev

More from Medium

Configure your NodeJS Application with IBM Cloud App Configuration using nconf-appconfig

Adding Auto-Scaling To AWS Fargate Services

Automatic Semver Versioning Using Github Actions and deploying to AWS Pipeline

Enterprise implementation of Infra-as-Code using CDK