Unverified Commit 6bd2e3e4 authored by Silvano Cerza's avatar Silvano Cerza Committed by GitHub

[skip changelog] Add missing partitioning query in Athena script (#1477)

parent 7d221443
...@@ -18,7 +18,6 @@ def execute(client, statement, dest_s3_output_location): ...@@ -18,7 +18,6 @@ def execute(client, statement, dest_s3_output_location):
result = client.start_query_execution( result = client.start_query_execution(
QueryString=statement, QueryString=statement,
ClientRequestToken=str(uuid.uuid4()), ClientRequestToken=str(uuid.uuid4()),
QueryExecutionContext={"Database": "etl_kpi_prod_hwfw"},
ResultConfiguration={ ResultConfiguration={
"OutputLocation": dest_s3_output_location, "OutputLocation": dest_s3_output_location,
}, },
...@@ -113,6 +112,9 @@ if __name__ == "__main__": ...@@ -113,6 +112,9 @@ if __name__ == "__main__":
session = boto3.session.Session(region_name="us-east-1") session = boto3.session.Session(region_name="us-east-1")
athena_client = session.client("athena") athena_client = session.client("athena")
# Load all partitions before querying downloads
execute(athena_client, f"MSCK REPAIR TABLE {AWS_ATHENA_SOURCE_TABLE};", DEST_S3_OUTPUT)
query = f"""SELECT replace(json_extract_scalar(url_decode(url_decode(querystring)), query = f"""SELECT replace(json_extract_scalar(url_decode(url_decode(querystring)),
'$.data.url'), 'https://downloads.arduino.cc/arduino-cli/arduino-cli_', '') '$.data.url'), 'https://downloads.arduino.cc/arduino-cli/arduino-cli_', '')
AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment