diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index b7b2d0c..f8b016e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,8 +18,8 @@ jobs: fail-fast: false matrix: version: - - '1.7' - - '1' + - '1.9' + # - '1' # add back when 1.10 is out - 'nightly' os: - ubuntu-latest diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 3042569..90dc100 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -30,4 +30,4 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} # Edit the following line to reflect the actual name of the GitHub Secret containing your private key ssh: ${{ secrets.DOCUMENTER_KEY }} - # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }} \ No newline at end of file + # ssh: ${{ secrets.NAME_OF_MY_SSH_PRIVATE_KEY_SECRET }} diff --git a/CondaPkg.toml b/CondaPkg.toml index 55c2d37..71f629c 100644 --- a/CondaPkg.toml +++ b/CondaPkg.toml @@ -1,8 +1,10 @@ channels = ["conda-forge"] [deps] -h5py = "" -pillow = ">=9.1, <10" +# h5py = "" +# pillow = ">=9.1, <10" +# pyarrow = "==6.0.0" +datasets = ">=2.12, <3" numpy = ">=1.20, <2" -datasets = ">=2.7, <3" -pyarrow = "==6.0.0" +pillow = "" + diff --git a/Project.toml b/Project.toml index ba91700..97f87e8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "HuggingFaceDatasets" uuid = "d94b9a45-fdf5-4270-b024-5cbb9ef7117d" authors = ["Carlo Lucibello"] -version = "0.3.0" +version = "0.3.1" [deps] CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" @@ -16,7 +16,7 @@ DLPack = "0.1" ImageCore = "0.9" MLUtils = "0.4.1" PythonCall = "0.9" -julia = "1.7" +julia = "1.9" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/HuggingFaceDatasets.jl b/src/HuggingFaceDatasets.jl index ee5c14d..3709a5f 100644 --- a/src/HuggingFaceDatasets.jl +++ b/src/HuggingFaceDatasets.jl @@ -41,6 +41,8 @@ function __init__() # https://cjdoris.github.io/PythonCall.jl/dev/pythoncall-reference/#PythonCall.pycopy! PythonCall.pycopy!(datasets, pyimport("datasets")) PythonCall.pycopy!(PIL, pyimport("PIL")) + pyimport("PIL.PngImagePlugin") + pyimport("PIL.JpegImagePlugin") PythonCall.pycopy!(np, pyimport("numpy")) PythonCall.pycopy!(copy, pyimport("copy")) end diff --git a/src/dataset.jl b/src/dataset.jl index ff545b4..697e4b7 100644 --- a/src/dataset.jl +++ b/src/dataset.jl @@ -107,7 +107,7 @@ version of [`with_format`](@ref). """ function set_format!(ds::Dataset, format) if format == "julia" - # ds.pyds.set_format("numpy") + ds.pyds.reset_format() # or d.pyd.set_format("python") ds.jltransform = py2jl else ds.pyds.set_format(format) diff --git a/src/datasetdict.jl b/src/datasetdict.jl index 02f7ffb..8ac2aec 100644 --- a/src/datasetdict.jl +++ b/src/datasetdict.jl @@ -102,7 +102,7 @@ version of [`with_format`](@ref). """ function set_format!(d::DatasetDict, format) if format == "julia" - d.pyd.set_format("numpy") + d.pyd.reset_format() d.jltransform = py2jl else d.pyd.set_format(format) diff --git a/src/transforms.jl b/src/transforms.jl index 9c6fb09..c95b798 100644 --- a/src/transforms.jl +++ b/src/transforms.jl @@ -21,6 +21,7 @@ function _pyconvert(x::Py) end end +# Do nothing on a non-Py object. _pyconvert(x) = x """ @@ -30,6 +31,7 @@ Convert Python types to Julia types applying `pyconvert` recursively. """ py2jl +# py2jl recurses through pycanonicalize and converts through _pyconvert py2jl(x) = pycanonicalize(_pyconvert(x)) pycanonicalize(x) = x diff --git a/test/dataset.jl b/test/dataset.jl index c7a5749..e1493b5 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -74,7 +74,6 @@ end @test x isa Dict @test x["label"] == -1 @test x["idx"] == 0 - @show x["premise"] |> typeof @test x["premise"] isa AbstractString @test x["premise"] == "The cat sat on the mat." @test x["hypothesis"] isa AbstractString diff --git a/test/datasetdict.jl b/test/datasetdict.jl index e040d79..45390d4 100644 --- a/test/datasetdict.jl +++ b/test/datasetdict.jl @@ -19,12 +19,12 @@ end @testset "with_format(julia)" begin d = with_format(mnist, "julia") ds = d["test"] - @test ds.format["type"] == "numpy" + @test ds.format["type"] == nothing x = ds[1] @test x isa Dict @test x["label"] isa Int @test x["label"] == 7 - @test x["image"] isa Matrix{UInt8} + @test x["image"] isa AbstractMatrix{<:Gray} @test size(x["image"]) == (28, 28) end