1
0

tunedCovParams.toml 488 B

1234567891011121314151617181920212223242526
  1. seed = 0
  2. [data]
  3. normalization = 'quantile'
  4. path = 'data/covtype'
  5. [model]
  6. activation = 'reglu'
  7. attention_dropout = 0.03815883962184247
  8. d_ffn_factor = 1.333333333333333
  9. d_token = 424
  10. ffn_dropout = 0.2515503440562596
  11. initialization = 'kaiming'
  12. n_heads = 8
  13. n_layers = 2
  14. prenormalization = true
  15. residual_dropout = 0.0
  16. [training]
  17. batch_size = 1024
  18. eval_batch_size = 8192
  19. lr = 3.762989816330166e-05
  20. n_epochs = 1000000000
  21. optimizer = 'adamw'
  22. patience = 16
  23. weight_decay = 0.0001239780004929955