raw = LOAD ‘s3://xxxxxxxxx/*’ AS (name:chararray, year:float, occurrences:float, books:float);
B = GROUP raw BY name;
C = FOREACH B GENERATE B.name , SUM(B.occurrences)/SUM(B.books) AS average;
D = ORDER C BY average DESC;
E = LIMIT D 10;
STORE E INTO 's3://xxxxxx';
Aucun commentaire:
Enregistrer un commentaire