Skip to content

Commit

Permalink
Update for new data defines
Browse files Browse the repository at this point in the history
  • Loading branch information
mitchphillipson committed Nov 16, 2023
1 parent b2bdae0 commit 39ac4f2
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 95 deletions.
22 changes: 8 additions & 14 deletions src/data/core/bea_gsp/bea_gsp.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("./data_defines.jl")
#include("./data_defines.jl")

function load_bea_gsp!(GU,data_dir,gsp_info)

Expand Down Expand Up @@ -172,23 +172,17 @@ function load_raw_bea_gsp(data_dir,years,gsp_data)
end

function clean_raw_bea_gsp(df)
notations = []
#notations = []

push!(notations,notation_link(gsp_states,:state,:region_fullname))
push!(notations,notation_link(gsp_industry_id,:IndustryID,:gsp_industry_id))
push!(notations,notation_link(bea_gsp_map,:ComponentName,:bea_code))
push!(notations,notation_link(bea_gsp_mapsec,:gsp_industry_id,:gdp_industry_id))
#push!(notations,notation_link(gsp_states,:state,:region_fullname))
##push!(notations,notation_link(gsp_industry_id,:IndustryID,:gsp_industry_id))
#push!(notations,notation_link(bea_gsp_mapsec,:IndustryID,:gdp_industry_id))
#push!(notations,notation_link(bea_gsp_map,:ComponentName,:bea_code))

notations = bea_gsp_notations()
df = apply_notations(df,notations)

#df = load_raw_bea_gsp(data_dir,1997:2021,info_dict)

#return df

for notation in notations
df = apply_notation!(df,notation)
end



df = df[!,[:region_abbv,:year,:gdpcat,:i,:units,:value]]

Expand Down
2 changes: 1 addition & 1 deletion src/data/core/bea_io/PartitionBEA.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include("./bea_api/bea_api.jl")
include("./calibrate.jl")
include("./data_defines.jl")
#include("./data_defines.jl")



Expand Down
47 changes: 20 additions & 27 deletions src/data/core/bea_pce/bea_pce.jl
Original file line number Diff line number Diff line change
@@ -1,46 +1,39 @@
include("./data_defines.jl")
#include("./data_defines.jl")

function load_bea_pce!(GU,data_dir,info_dict)

info = info_dict["saexp1"]

data_path = "$data_dir\\$(info["path"])"
nrows = info["nrows"]
df = DataFrame(CSV.File(data_path;limit = nrows,stringtype=String));

df = select(df, Not([:Unit,:IndustryClassification,:GeoFIPS,:Region,:TableName,:LineCode]));
df[!,:Description] = string.(strip.(df[!,:Description]));



notations = []
push!(notations,WiNDC.notation_link(pce_map,:Description,:pce_description))
push!(notations,WiNDC.notation_link(pce_states,:GeoName,:region_fullname))
push!(notations,WiNDC.notation_link(pce_map_gams,:pce,:pce))
notations = bea_pce_notations()

df = DataFrame(CSV.File(data_path;limit = nrows,stringtype=String)) |>
x -> select(x,Not([:Unit,:IndustryClassification,:GeoFIPS,:Region,:TableName,:LineCode])) |>
x -> transform(x,
:Description => (y -> string.(strip.(y))) => :Description
) |>
x -> apply_notations(x,notations) |>
x -> stack(x, Not([:r,:i]),variable_name = :year,value_name = :value)

for notation in notations
df = WiNDC.apply_notation!(df,notation)
end
df = stack(df,Not([:region_abbv,:i]),variable_name = :year,value_name = :value);
filter!(:region_abbv => x-> x!="US",df);

Y = combine(groupby(df,[:year,:i]),:value=>sum)

df = leftjoin(df,Y, on = [:year,:i])

df[!,:value] = df[!,:value]./df[!,:value_sum]
df = df |>
x -> groupby(x,[:year,:i]) |>
x -> combine(x, :value => sum) |>
x -> leftjoin(df,x, on = [:year,:i]) |>
x -> transform(x,
[:value,:value_sum] => ((v,vs) -> v./vs) => :value,
:i => (i -> Symbol.(i)) => :i,
:r => (i -> Symbol.(i)) => :r,
:year => (i -> Symbol.(i)) => :year
)

df[!,:i] = Symbol.(df[!,:i])
df[!,:region_abbv] = Symbol.(df[!,:region_abbv])
df[!,:year] = Symbol.(df[!,:year])

@create_parameters(GU,begin
:pce_shr, (:yr,:r,:i), "Regional shares of final consumption"
end)

col_set_link = Dict(:yr => :year,
:r => :region_abbv,
:r => :r,
:i => :i
)

Expand Down
57 changes: 35 additions & 22 deletions src/data/core/census_sgf/census_sgf.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("./data_defines.jl")
#include("./data_defines.jl")



Expand Down Expand Up @@ -38,30 +38,43 @@ function sgf_load_clean_year(year,data_dir,path)
f = sgf_parse_line
end

L = f.([e for e in split(s,'\n') if e!=""])
df = DataFrame(L)

notations = []

push!(notations,WiNDC.notation_link(sgf_state_codes,:government_code,:code))
push!(notations,WiNDC.notation_link(sgf_states, :state, :region_fullname))
push!(notations,WiNDC.notation_link(item_codes, :item_code,:item_code));
push!(notations,WiNDC.notation_link(sgf_map, :item_name,:sgf_category));
push!(notations,WiNDC.notation_link(sgf_gams_map, :i,:sgf_category));
notations = sgf_notations()
#notations = []

for notation in notations
df = WiNDC.apply_notation!(df,notation)
end

df[!,:amount] = parse.(Int,df[!,:amount])
#push!(notations,WiNDC.notation_link(sgf_state_codes,:government_code,:code))
#push!(notations,WiNDC.notation_link(sgf_states, :state, :region_fullname))
#push!(notations,WiNDC.notation_link(item_codes, :item_code,:item_code));
#push!(notations,WiNDC.notation_link(sgf_map, :item_name,:sgf_category));
#push!(notations,WiNDC.notation_link(sgf_gams_map, :i,:sgf_category));

df = combine(groupby(df,[:i,:region_abbv]),:amount => sum);
df[!,:year] .= year;
df[!,:value] = df[!,:amount_sum]./1_000

df[!,:year] = Symbol.(df[!,:year])
df[!,:i] = Symbol.(df[!,:i])
df[!,:region_abbv] = Symbol.(df[!,:region_abbv])
L = f.([e for e in split(s,'\n') if e!=""])
df = DataFrame(L) |>
x -> apply_notations(x,notations) |>
x -> transform(x,
:amount => (y -> parse.(Int,y)) => :amount
) |>
x -> groupby(x,[:i,:region_abbv]) |>
x -> combine(x, :amount => sum) |>
x -> transform(x,
:amount_sum => (y -> Symbol(year)) => :year,
:amount_sum => (a -> a./1_000) => :value,
:i => (i -> Symbol.(i)) => :i,
:region_abbv => (r -> Symbol.(r)) => :region_abbv
)


#df = apply_notations(df,notations)

#df[!,:amount] = parse.(Int,df[!,:amount])

#df = combine(groupby(df,[:i,:region_abbv]),:amount => sum);
#df[!,:year] .= year;
#df[!,:value] = df[!,:amount_sum]./1_000

#df[!,:year] = Symbol.(df[!,:year])
#df[!,:i] = Symbol.(df[!,:i])
#df[!,:region_abbv] = Symbol.(df[!,:region_abbv])

return df
end
Expand Down
52 changes: 27 additions & 25 deletions src/data/core/faf/faf.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("./data_defines.jl")
#include("./data_defines.jl")

function load_raw_faf_data(file_path)

Expand All @@ -14,13 +14,12 @@ function load_raw_faf_data(file_path)
Not(["dms_origst","dms_destst","dms_mode","sctg2"]),
value_name = :value,
variable_name = :year
)

df[!,:year] = df[!,:year] |>
x -> replace.(x, "value_"=>"") |>
x -> parse.(Int, x)

df = df |>
) |>
x -> transform(x,
:dms_origst => (y -> string.(y)) => :dms_origst,
:dms_destst => (y -> string.(y)) => :dms_destst,
:year => (y -> replace.(y,"value_" => "")) => :year
) |>
x -> groupby(x, [:dms_origst,:dms_destst,:sctg2,:year]) |>
x -> combine(x, :value=>sum=>:value)

Expand All @@ -36,20 +35,19 @@ function load_faf_data!(GU,data_dir,info_dict)
df_cur = load_raw_faf_data(current_file_path)
df_hist = load_raw_faf_data(history_file_path)

df = filter(:year => x-> x<=2021, vcat(df_cur,df_hist))


notations = []
#notations = []

push!(notations,WiNDC.notation_link(orig,:dms_origst,:state_fips))
push!(notations,WiNDC.notation_link(dest,:dms_destst,:state_fips))
push!(notations,WiNDC.notation_link(sctg2,:sctg2,:sctg2))
push!(notations,WiNDC.notation_link(years,:year,:faf_year))
#push!(notations,WiNDC.notation_link(orig,:dms_origst,:state_fips))
#push!(notations,WiNDC.notation_link(dest,:dms_destst,:state_fips))
#push!(notations,WiNDC.notation_link(sctg2,:sctg2,:sctg2))
#push!(notations,WiNDC.notation_link(years,:year,:faf_year))

for notation in notations
df = WiNDC.apply_notation!(df,notation)
end
notations = faf_notations()

df = df |>
df = vcat(df_cur,df_hist) |>
x -> apply_notations(x,notations) |>
x -> groupby(x,[:dms_dest,:dms_orig,:year,:i]) |>
x -> combine(x, :value => sum => :value)

Expand Down Expand Up @@ -89,19 +87,23 @@ function load_faf_data!(GU,data_dir,info_dict)
)

# Make a dataframe with all the goods not present in the FAF data
X = DataFrame([[i for iGU[:i] if i Symbol.(unique(single_region[!,:i]))]],[:i])
X = Symbol.(unique(single_region[!,:i])) |>
x -> [i for iGU[:i] if i x] |>
x -> DataFrame([x],[:i])

#X = DataFrame([[i for i∈GU[:i] if i∉ Symbol.(unique(single_region[!,:i]))]],[:i])

single_region = vcat(single_region,crossjoin(X,Y));

single_region[!,:value] = single_region[!,:local_supply] ./ (single_region[!,:local_supply] .+ single_region[!,:demand])

single_region = single_region |>
x -> select(x,[:r,:year,:i,:value]) |>
x -> unstack(x,:i,:value) |>
x -> transform(x,
:uti => (y -> .9) => :uti
) |>
x -> stack(x,Not(:r,:year),variable_name = :i,value_name = :value)
x -> select(x,[:r,:year,:i,:value]) |>
x -> unstack(x,:i,:value) |>
x -> transform(x,
:uti => (y -> .9) => :uti
) |>
x -> stack(x,Not(:r,:year),variable_name = :i,value_name = :value)


@create_parameters(GU,begin
Expand Down
16 changes: 10 additions & 6 deletions src/data/core/usa_trade/usa_trade.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
include("./data_defines.jl")
#include("./data_defines.jl")


function load_usa_trade!(GU,data_dir,info_dict)
Expand Down Expand Up @@ -133,10 +133,12 @@ end
function load_raw_usa_trade(data_dir, info_dict)

out = DataFrame()
notations = []

push!(notations, WiNDC.notation_link(usatrd_states,:State,:region_fullname));
push!(notations, WiNDC.notation_link(naics_map,:naics,:naics));
notations = usatrd_notations()
#notations = []

#push!(notations, WiNDC.notation_link(usatrd_states,:State,:region_fullname));
#push!(notations, WiNDC.notation_link(naics_map,:naics,:naics));

for flow in ["exports","imports"]
dict = info_dict[flow]
Expand Down Expand Up @@ -173,9 +175,11 @@ end
function load_raw_usda_trade_shares(data_dir,info_dict)
file_path = info_dict["detail"]

notations = []
#notations = []

#push!(notations, WiNDC.notation_link(usatrd_states,:State,:region_fullname));

push!(notations, WiNDC.notation_link(usatrd_states,:State,:region_fullname));
notations = usatrd_shares_notations()

X = XLSX.readdata(joinpath(data_dir,file_path),"Total exports","A3:W55")
X[1,1] = "State"
Expand Down

0 comments on commit 39ac4f2

Please sign in to comment.