-
Griffin: Mixing Gated Linear Recurrences with Local Attention for Efficient Language Models
Paper • 2402.19427 • Published • 48 -
Simple linear attention language models balance the recall-throughput tradeoff
Paper • 2402.18668 • Published • 16 -
ChunkAttention: Efficient Self-Attention with Prefix-Aware KV Cache and Two-Phase Partition
Paper • 2402.15220 • Published • 18 -
Linear Transformers are Versatile In-Context Learners
Paper • 2402.14180 • Published • 5
https://huggingface.co/papers/2402.18668\n","text":"similar https://huggingface.co/papers/2402.18668\n"},"id":"2402.19427","title":"Griffin: Mixing Gated Linear Recurrences with Local Attention for\n Efficient Language Models","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2402.19427.png","upvotes":48,"publishedAt":"2024-02-29T18:24:46.000Z","isUpvotedByUser":false},{"_id":"65e1cb60443c7fb8455c170d","position":1,"type":"paper","id":"2402.18668","title":"Simple linear attention language models balance the recall-throughput\n tradeoff","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2402.18668.png","upvotes":16,"publishedAt":"2024-02-28T19:28:27.000Z","isUpvotedByUser":false},{"_id":"65e1cca27754e5da55123a3e","position":2,"type":"paper","id":"2402.15220","title":"ChunkAttention: Efficient Self-Attention with Prefix-Aware KV Cache and\n Two-Phase Partition","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2402.15220.png","upvotes":18,"publishedAt":"2024-02-23T09:29:19.000Z","isUpvotedByUser":false},{"_id":"65e1cde264802b4547f029e2","position":3,"type":"paper","id":"2402.14180","title":"Linear Transformers are Versatile In-Context Learners","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2402.14180.png","upvotes":5,"publishedAt":"2024-02-21T23:45:57.000Z","isUpvotedByUser":false}],"position":1,"theme":"pink","private":false,"shareUrl":"https://huggingface.co/collections/kiranr/papers-65b007e5a037572db142e459","upvotes":0,"isUpvotedByUser":false}],"datasets":[{"author":"kiranr","downloads":0,"gated":false,"id":"kiranr/WizardLM_evol_instruct_V2_196k","lastModified":"2024-04-23T13:16:58.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":286000,"tags":["croissant"],"libraries":["datasets","mlcroissant"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false}],"hasMoreActivities":false,"models":[{"author":"kiranr","authorData":{"avatarUrl":"/avatars/56ccef10a8426d7160ef3586a771bd63.svg","fullname":"Kiran Kamble","name":"kiranr","type":"user","isPro":false,"isHf":false},"downloads":3,"gated":false,"id":"kiranr/internlm2-chat-20b-llama","lastModified":"2024-01-19T18:30:38.000Z","likes":5,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false},{"author":"kiranr","authorData":{"avatarUrl":"/avatars/56ccef10a8426d7160ef3586a771bd63.svg","fullname":"Kiran Kamble","name":"kiranr","type":"user","isPro":false,"isHf":false},"downloads":0,"gated":false,"id":"kiranr/gpt2-tokenizer","lastModified":"2023-06-20T04:55:42.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"numberLikes":55,"papers":[{"id":"2402.17553","title":"OmniACT: A Dataset and Benchmark for Enabling Multimodal Generalist\n Autonomous Agents for Desktop and Web","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2402.17553.png","upvotes":21,"publishedAt":"2024-02-27T14:47:53.000Z","isUpvotedByUser":false},{"id":"2307.03692","title":"Becoming self-instruct: introducing early stopping criteria for minimal\n instruct tuning","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2307.03692.png","upvotes":23,"publishedAt":"2023-07-05T09:42:25.000Z","isUpvotedByUser":false}],"posts":[],"totalPosts":0,"spaces":[{"author":"kiranr","authorData":{"avatarUrl":"/avatars/56ccef10a8426d7160ef3586a771bd63.svg","fullname":"Kiran Kamble","name":"kiranr","type":"user","isPro":false,"isHf":false},"colorFrom":"blue","colorTo":"purple","createdAt":"2023-11-29T23:35:02.000Z","emoji":"👁","id":"kiranr/tatr-demo","lastModified":"2023-11-27T20:56:21.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"kiranr-tatr-demo.hf.space","isCustom":false,"stage":"READY"}]},"title":"Tatr Demo","isLikedByUser":false,"originSpace":{"name":"nielsr/tatr-demo","author":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1608042047613-5f1158120c833276f61f1a84.jpeg","fullname":"Niels Rogge","name":"nielsr","type":"user","isPro":false,"isHf":true}}}],"u":{"avatarUrl":"/avatars/56ccef10a8426d7160ef3586a771bd63.svg","isPro":false,"fullname":"Kiran Kamble","user":"kiranr","orgs":[{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625001569797-60db8b5ad8b4797b129145d5.png","fullname":"Writer","name":"Writer","userRole":"write","type":"org","isHf":false}],"signup":{"github":"ki6an","details":"nlp,llm","homepage":"","twitter":""},"isHf":false,"type":"user"},"upvotes":11,"repoFilterModels":{"sortKey":"modified"},"repoFilterDatasets":{"sortKey":"modified"},"repoFilterSpaces":{"sortKey":"modified"},"numFollowers":2,"numFollowing":0,"isFollowing":false,"isFollower":false,"sampleFollowers":[{"user":"fibrosis","fullname":"Fatime","type":"user","isPro":false,"avatarUrl":"/avatars/d77917bce9600d6218f1fa4b76e317cf.svg"},{"user":"21world","fullname":"www.bigo.tv/user/981854937","type":"user","isPro":false,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64548986cd09ceba0e1709cb/muGiatjmPfzxYb3Rjcqas.jpeg"}],"isWatching":false}">
Kiran Kamble
kiranr
AI & ML interests
nlp,llm