[ { "@graph" : [ { "@id" : "https://arvix.org/abs/2207.15796", "https://sense-nets.xyz/hasZoteroItemType" : [ { "@value" : "webpage" } ] }, { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#assertion", "http://purl.org/dc/terms/creator" : [ { "@id" : "https://w3id.org/np/RA8InlmUPoZ6CTtHP_RkqFBHJSnasnRcjI3qz7EJ-nHJY" } ], "http://purl.org/spar/cito/discusses" : [ { "@id" : "https://arvix.org/abs/2207.15796" } ], "http://purl.org/spar/cito/linksTo" : [ { "@id" : "https://arvix.org/abs/2207.15796" } ], "http://www.w3.org/2000/01/rdf-schema#comment" : [ { "@value" : " New paper alert! 🚨 We've been exploring the impact of context on LLM performance evaluation. Turns out, evaluating models on individual examples might not tell the whole story. #MachineLearning #AI\n\n Our findings suggest that batch evaluation allows models to identify patterns and tendencies, leading to more nuanced assessments. Plus, a two-step decision process (analysis + scoring) shows promising results. Exciting times for ML eval! 📊🧠\n\n To learn more, check out the paper: https://arvix.org/abs/2207.15796\n\n" } ], "https://schema.org/keywords" : [ { "@value" : "AI" }, { "@value" : "LLM" }, { "@value" : "MachineLearning" }, { "@value" : "batch-evaluation" }, { "@value" : "performance-evaluation" }, { "@value" : "two-step-decision-process" } ], "https://sense-nets.xyz/announcesResource" : [ { "@id" : "https://arvix.org/abs/2207.15796" } ] } ], "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#assertion" }, { "@graph" : [ { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0", "@type" : [ "http://www.nanopub.org/nschema#Nanopublication" ], "http://www.nanopub.org/nschema#hasAssertion" : [ { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#assertion" } ], "http://www.nanopub.org/nschema#hasProvenance" : [ { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#provenance" } ], "http://www.nanopub.org/nschema#hasPublicationInfo" : [ { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#pubinfo" } ] } ], "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#head" }, { "@graph" : [ { "@id" : "https://sense-nets.xyz/", "@type" : [ "http://www.w3.org/ns/prov#SoftwareAgent" ], "http://www.w3.org/ns/prov#actedOnBehalfOf" : [ { "@id" : "https://w3id.org/np/RA8InlmUPoZ6CTtHP_RkqFBHJSnasnRcjI3qz7EJ-nHJY" } ] }, { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#activity", "@type" : [ "https://sense-nets.xyz/supervisedActivity" ], "http://www.w3.org/ns/prov#wasAssociatedWith" : [ { "@id" : "https://sense-nets.xyz/" } ] }, { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#assertion", "http://www.w3.org/ns/prov#linksTo" : [ { "@id" : "https://x.com/sensenets_demo/status/1839674524729483541" } ], "http://www.w3.org/ns/prov#wasAssociatedWith" : [ { "@id" : "https://x.com/sensenets_demo" } ], "http://www.w3.org/ns/prov#wasAttributedTo" : [ { "@id" : "https://w3id.org/np/RA8InlmUPoZ6CTtHP_RkqFBHJSnasnRcjI3qz7EJ-nHJY" } ], "http://www.w3.org/ns/prov#wasGeneratedBy" : [ { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#activity" } ] }, { "@id" : "https://w3id.org/np/RA8InlmUPoZ6CTtHP_RkqFBHJSnasnRcjI3qz7EJ-nHJY", "http://xmlns.com/foaf/0.1/account" : [ { "@id" : "https://x.com/sensenets_demo" } ] } ], "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#provenance" }, { "@graph" : [ { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0", "http://purl.org/dc/terms/created" : [ { "@type" : "http://www.w3.org/2001/XMLSchema#dateTime", "@value" : "2024-09-27T15:38:26.351Z" } ], "http://purl.org/dc/terms/creator" : [ { "@id" : "https://w3id.org/np/RA8InlmUPoZ6CTtHP_RkqFBHJSnasnRcjI3qz7EJ-nHJY" } ], "http://purl.org/dc/terms/license" : [ { "@id" : "https://creativecommons.org/licenses/by/4.0/" } ], "http://purl.org/nanopub/x/hasNanopubType" : [ { "@id" : "https://sense-nets.xyz/SemanticPost" } ], "http://purl.org/nanopub/x/wasCreatedAt" : [ { "@id" : "https://sense-nets.xyz/" } ], "http://www.w3.org/2000/01/rdf-schema#label" : [ { "@value" : "CoSMO Semantic Post" } ], "https://sense-nets.xyz/hasRootSigner" : [ { "@value" : "0x5b9967FC42C160f6146d5ea1f0d08E88370f370b" } ] }, { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#sig", "http://purl.org/nanopub/x/hasAlgorithm" : [ { "@value" : "RSA" } ], "http://purl.org/nanopub/x/hasPublicKey" : [ { "@value" : "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArHtI92jm8pAYVsvJabxLGfOT+7G0JyJGh2gwjB5x2pFPga6wWTd+rNBWWUZViIFnaJrBEsJpgdnoupLU9ppwn+khMiGRfxqGsDDzwHcj3Jc75CRys7d3etwXdBdoXfBgjsJiZBazwm13idr6tljRrC1TaEJBnRQAqzBw9cLDeGY77cSznzXT39feUGT168dpCSE9O6u/48DvvWVqciHGsH9cQ+LroJJVsMrorwtsdZnAK+q48wtIP6pIpw5shSJ5LnA0qeN/f4TvTFDV6ItYIXjiWWpTECc/Bxmfnyat3B5xWCu9nvz8fEs7Ns0TuzQwT3/K55iSKDEIi/E0nO97xwIDAQAB" } ], "http://purl.org/nanopub/x/hasSignature" : [ { "@value" : "UfNKMSMxjJF6FmekyAFM3JrDGaiwLMq8OK0b3TsssKDHOlONBvYOzIWsO+Q5sDE3EGKMNcf0L9RvIgAwkaOJd4jgM4DgISHMEE7mOdJJ8+ogj3qea5jjPDXjwPAaUC1v51Hzc7v40LKALWGD3uJEyorHVpAL1z8FO9DlrLbu9sYzQ9zUHxvnRl0fJKGXkzzT1Z5ODlEs5c5/oq2L8LtKlDg5NSW/o2+5ELcKUDXF9cB2qGy8mymmlFXId4D4Q2BuE52/YtOLdRCJQhiEJaC9ZEw8NBUmDILw3NmKj7kOaU9BZzhQkWfc415rNBJSbXxwj6uM5JQIxTX6zUcwTEa9Sw==" } ], "http://purl.org/nanopub/x/hasSignatureTarget" : [ { "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0" } ], "http://purl.org/nanopub/x/singedBy" : [ { "@id" : "https://sense-nets.xyz/" } ], "http://www.w3.org/ns/prov#wasAssociatedWith" : [ { "@id" : "https://w3id.org/np/RA8InlmUPoZ6CTtHP_RkqFBHJSnasnRcjI3qz7EJ-nHJYsigningDelegation" } ] }, { "@id" : "https://w3id.org/np/RA8InlmUPoZ6CTtHP_RkqFBHJSnasnRcjI3qz7EJ-nHJY", "http://xmlns.com/foaf/0.1/name" : [ { "@value" : "Quinn Zhang, PhD" } ] } ], "@id" : "https://w3id.org/np/RA6amI79BFW1axJuNM0PmqefGYO-SWR72EoGG5fb0KuB0#pubinfo" } ]